1549538c70
schemas / vulnerabilities (pull_request) Successful in 2m11s
schemas / check-release (pull_request) Successful in 3m8s
schemas / check (pull_request) Successful in 3m30s
pre-commit / pre-commit (pull_request) Successful in 7m13s
schemas / build (pull_request) Successful in 6m30s
schemas / deploy-prod (pull_request) Has been skipped
Following the schema cache PR, warm pods serve from cache (~24/25 hits on a long-running pod). New pods, however, start cold: the first LatestSchema query per (orgId, ref) still runs the wgc router compose subprocess, which costs 100-300m CPU per call. That cold-start cost is what kept tripping the HPA into TooManyReplicas: HPA scales up → new pod added → new pod runs wgc on first query → metrics spike → HPA scales up further → cycle repeats. Even after the caching PR landed, observed pods cycling 2→4→2→4 in production, with fresh pods showing 2 'Fetching latest schema' (cold) entries and 0 cache hits within their first minute. Add Cache.AllOrgRefs() exposing every tracked (orgId, ref) pair, and Resolver.WarmCache(ctx) which iterates them after the event-sourced caches have been populated. For each ref it fetches the subgraphs, runs sdlmerge, runs CosmoGenerator.Generate, and stores both results in the cache. Errors per ref are logged and skipped so a single bad ref does not block warming the rest. Service startup calls WarmCache right after the Resolver is wired, before the HTTP server starts accepting traffic, so the first LatestSchema query a pod receives is already a cache hit.
122 lines
3.8 KiB
Go
122 lines
3.8 KiB
Go
package graph
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
|
|
"gitlab.com/unboundsoftware/eventsourced/eventsourced"
|
|
|
|
"gitea.unbound.se/unboundsoftware/schemas/cache"
|
|
"gitea.unbound.se/unboundsoftware/schemas/graph/model"
|
|
"gitea.unbound.se/unboundsoftware/schemas/middleware"
|
|
"gitea.unbound.se/unboundsoftware/schemas/sdlmerge"
|
|
)
|
|
|
|
//go:generate go run github.com/99designs/gqlgen
|
|
//go:generate gofumpt -w .
|
|
//go:generate goimports -w -local gitea.unbound.se/unboundsoftware/schemas .
|
|
|
|
// This file will not be regenerated automatically.
|
|
//
|
|
// It serves as dependency injection for your app, add any dependencies you require here.
|
|
|
|
type Publisher interface {
|
|
Publish(ctx context.Context, event eventsourced.Event) error
|
|
}
|
|
|
|
type Resolver struct {
|
|
EventStore eventsourced.EventStore
|
|
Publisher Publisher
|
|
Logger *slog.Logger
|
|
Cache *cache.Cache
|
|
PubSub *PubSub
|
|
CosmoGenerator *CosmoGenerator
|
|
Debouncer *Debouncer
|
|
}
|
|
|
|
func (r *Resolver) apiKeyCanAccessRef(ctx context.Context, ref string, publish bool) (string, error) {
|
|
key, err := middleware.ApiKeyFromContext(ctx)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
apiKey := r.Cache.ApiKeyByKey(key)
|
|
if publish && !apiKey.Publish {
|
|
return "", fmt.Errorf("provided API-key doesn't have publish privilege")
|
|
}
|
|
if !publish && !apiKey.Read {
|
|
return "", fmt.Errorf("provided API-key doesn't have read privilege")
|
|
}
|
|
for _, rr := range apiKey.Refs {
|
|
if rr == ref {
|
|
return apiKey.Name, nil
|
|
}
|
|
}
|
|
return "", fmt.Errorf("provided API-key doesn't have the required privilege on the requested Schema Ref")
|
|
}
|
|
|
|
func (r *Resolver) handler(ctx context.Context, aggregate eventsourced.Aggregate) (eventsourced.CommandHandler, error) {
|
|
return eventsourced.NewHandler(ctx, aggregate, r.EventStore, eventsourced.WithEventPublisher(r.Publisher))
|
|
}
|
|
|
|
func apiKeyId(orgId, name string) string {
|
|
return fmt.Sprintf("%s-%s", orgId, name)
|
|
}
|
|
|
|
// WarmCache precomputes the merged SDL and SchemaUpdate (cosmo router
|
|
// config) for every (orgId, ref) tracked in the cache. Intended to run
|
|
// once at startup, after the event-sourced caches have been populated
|
|
// but before the pod accepts traffic, so the first request per ref does
|
|
// not pay the cold-start cost of running sdlmerge + wgc compose.
|
|
//
|
|
// Errors per ref are logged and skipped rather than aborting the whole
|
|
// warmup: a single bad ref must not block the pod from serving the
|
|
// remaining refs.
|
|
func (r *Resolver) WarmCache(ctx context.Context) {
|
|
refs := r.Cache.AllOrgRefs()
|
|
r.Logger.Info("Warming schema cache on startup", "refCount", len(refs))
|
|
|
|
for _, or := range refs {
|
|
services, lastUpdate := r.Cache.Services(or.OrgId, or.Ref, "")
|
|
if len(services) == 0 {
|
|
continue
|
|
}
|
|
|
|
subGraphs := make([]*model.SubGraph, len(services))
|
|
serviceSDLs := make([]string, len(services))
|
|
for i, id := range services {
|
|
sg, err := r.fetchSubGraph(ctx, id)
|
|
if err != nil {
|
|
r.Logger.Error("warmup: fetch subgraph", "error", err, "orgId", or.OrgId, "ref", or.Ref, "id", id)
|
|
subGraphs = nil
|
|
break
|
|
}
|
|
subGraphs[i] = r.toGqlSubGraph(sg)
|
|
serviceSDLs[i] = sg.Sdl
|
|
}
|
|
if subGraphs == nil {
|
|
continue
|
|
}
|
|
|
|
if sdl, err := sdlmerge.MergeSDLs(serviceSDLs...); err != nil {
|
|
r.Logger.Error("warmup: merge SDLs", "error", err, "orgId", or.OrgId, "ref", or.Ref)
|
|
} else {
|
|
r.Cache.SetMergedSDL(or.OrgId, or.Ref, lastUpdate, sdl, subGraphs)
|
|
}
|
|
|
|
cosmoConfig, err := r.CosmoGenerator.Generate(ctx, subGraphs)
|
|
if err != nil {
|
|
r.Logger.Error("warmup: generate cosmo config", "error", err, "orgId", or.OrgId, "ref", or.Ref)
|
|
continue
|
|
}
|
|
r.Cache.SetSchemaUpdate(or.OrgId, or.Ref, &model.SchemaUpdate{
|
|
Ref: or.Ref,
|
|
ID: lastUpdate,
|
|
SubGraphs: subGraphs,
|
|
CosmoRouterConfig: &cosmoConfig,
|
|
})
|
|
}
|
|
|
|
r.Logger.Info("Schema cache warmup complete", "refCount", len(refs))
|
|
}
|