fix: prevent OOM on rapid schema publishing
schemas / vulnerabilities (pull_request) Successful in 6m43s
schemas / check-release (pull_request) Successful in 11m27s
schemas / check (pull_request) Successful in 14m51s
pre-commit / pre-commit (pull_request) Successful in 19m39s
schemas / build (pull_request) Successful in 8m26s
schemas / deploy-prod (pull_request) Has been skipped

Add concurrency-limited CosmoGenerator (semaphore limit=1, 60s timeout)
to prevent unbounded concurrent wgc process spawning. Add debouncer
(500ms) to coalesce rapid schema updates per org+ref. Fix double
subgraph fetch in Supergraph resolver and goroutine leak in
SchemaUpdates subscription.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-23 08:05:47 +01:00
parent a9885f8b65
commit 28aa32ad8c
8 changed files with 283 additions and 60 deletions
+21 -49
View File
@@ -174,8 +174,9 @@ func (r *mutationResolver) UpdateSubGraph(ctx context.Context, input model.Input
return nil, err
}
// Publish schema update to subscribers
go func() {
// Debounce schema update publishing so rapid successive updates for the
// same org+ref only trigger one config generation.
r.Debouncer.Debounce(orgId+":"+input.Ref, func() {
services, lastUpdate := r.Cache.Services(orgId, input.Ref, "")
r.Logger.Info("Publishing schema update after subgraph change",
"ref", input.Ref,
@@ -191,19 +192,11 @@ func (r *mutationResolver) UpdateSubGraph(ctx context.Context, input model.Input
r.Logger.Error("fetch subgraph for update notification", "error", err)
continue
}
subGraphs[i] = &model.SubGraph{
ID: sg.ID.String(),
Service: sg.Service,
URL: sg.Url,
WsURL: sg.WSUrl,
Sdl: sg.Sdl,
ChangedBy: sg.ChangedBy,
ChangedAt: sg.ChangedAt,
}
subGraphs[i] = r.toGqlSubGraph(sg)
}
// Generate Cosmo router config
cosmoConfig, err := GenerateCosmoRouterConfig(subGraphs)
// Generate Cosmo router config (concurrency-limited)
cosmoConfig, err := r.CosmoGenerator.Generate(context.Background(), subGraphs)
if err != nil {
r.Logger.Error("generate cosmo config for update", "error", err)
cosmoConfig = "" // Send empty if generation fails
@@ -225,7 +218,7 @@ func (r *mutationResolver) UpdateSubGraph(ctx context.Context, input model.Input
)
r.PubSub.Publish(input.Ref, update)
}()
})
return r.toGqlSubGraph(subGraph), nil
}
@@ -292,30 +285,16 @@ func (r *queryResolver) Supergraph(ctx context.Context, ref string, isAfter *str
}, nil
}
subGraphs := make([]*model.SubGraph, len(services))
serviceSDLs := make([]string, len(services))
for i, id := range services {
sg, err := r.fetchSubGraph(ctx, id)
if err != nil {
return nil, err
}
subGraphs[i] = &model.SubGraph{
ID: sg.ID.String(),
Service: sg.Service,
URL: sg.Url,
WsURL: sg.WSUrl,
Sdl: sg.Sdl,
ChangedBy: sg.ChangedBy,
ChangedAt: sg.ChangedAt,
}
subGraphs[i] = r.toGqlSubGraph(sg)
serviceSDLs[i] = sg.Sdl
}
var serviceSDLs []string
for _, id := range services {
sg, err := r.fetchSubGraph(ctx, id)
if err != nil {
return nil, err
}
serviceSDLs = append(serviceSDLs, sg.Sdl)
}
sdl, err := sdlmerge.MergeSDLs(serviceSDLs...)
if err != nil {
return nil, err
@@ -388,8 +367,8 @@ func (r *queryResolver) LatestSchema(ctx context.Context, ref string) (*model.Sc
}
}
// Generate Cosmo router config
cosmoConfig, err := GenerateCosmoRouterConfig(subGraphs)
// Generate Cosmo router config (concurrency-limited)
cosmoConfig, err := r.CosmoGenerator.Generate(ctx, subGraphs)
if err != nil {
r.Logger.Error("generate cosmo config", "error", err)
cosmoConfig = "" // Return empty if generation fails
@@ -432,9 +411,6 @@ func (r *subscriptionResolver) SchemaUpdates(ctx context.Context, ref string) (<
// Send initial state immediately
go func() {
// Use background context for async operation
bgCtx := context.Background()
services, lastUpdate := r.Cache.Services(orgId, ref, "")
r.Logger.Info("Preparing initial schema update",
"ref", ref,
@@ -445,24 +421,16 @@ func (r *subscriptionResolver) SchemaUpdates(ctx context.Context, ref string) (<
subGraphs := make([]*model.SubGraph, len(services))
for i, id := range services {
sg, err := r.fetchSubGraph(bgCtx, id)
sg, err := r.fetchSubGraph(ctx, id)
if err != nil {
r.Logger.Error("fetch subgraph for initial update", "error", err, "id", id)
continue
}
subGraphs[i] = &model.SubGraph{
ID: sg.ID.String(),
Service: sg.Service,
URL: sg.Url,
WsURL: sg.WSUrl,
Sdl: sg.Sdl,
ChangedBy: sg.ChangedBy,
ChangedAt: sg.ChangedAt,
}
subGraphs[i] = r.toGqlSubGraph(sg)
}
// Generate Cosmo router config
cosmoConfig, err := GenerateCosmoRouterConfig(subGraphs)
// Generate Cosmo router config (concurrency-limited)
cosmoConfig, err := r.CosmoGenerator.Generate(ctx, subGraphs)
if err != nil {
r.Logger.Error("generate cosmo config", "error", err)
cosmoConfig = "" // Send empty if generation fails
@@ -483,7 +451,11 @@ func (r *subscriptionResolver) SchemaUpdates(ctx context.Context, ref string) (<
"cosmoConfigLength", len(cosmoConfig),
)
ch <- update
select {
case ch <- update:
case <-ctx.Done():
return
}
}()
// Clean up subscription when context is done