fix: prevent OOM on rapid schema publishing
schemas / vulnerabilities (pull_request) Successful in 6m43s
schemas / check-release (pull_request) Successful in 11m27s
schemas / check (pull_request) Successful in 14m51s
pre-commit / pre-commit (pull_request) Successful in 19m39s
schemas / build (pull_request) Successful in 8m26s
schemas / deploy-prod (pull_request) Has been skipped

Add concurrency-limited CosmoGenerator (semaphore limit=1, 60s timeout)
to prevent unbounded concurrent wgc process spawning. Add debouncer
(500ms) to coalesce rapid schema updates per org+ref. Fix double
subgraph fetch in Supergraph resolver and goroutine leak in
SchemaUpdates subscription.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-23 08:05:47 +01:00
parent a9885f8b65
commit 28aa32ad8c
8 changed files with 283 additions and 60 deletions
+112
View File
@@ -1,11 +1,15 @@
package graph
import (
"context"
"encoding/json"
"fmt"
"os"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -459,6 +463,114 @@ func TestGenerateCosmoRouterConfig_MockError(t *testing.T) {
assert.Equal(t, 1, mockExecutor.CallCount, "Should have attempted to call executor")
}
// SlowMockExecutor simulates a slow wgc command for concurrency testing.
type SlowMockExecutor struct {
MockCommandExecutor
delay time.Duration
mu sync.Mutex
concurrent atomic.Int32
maxSeen atomic.Int32
}
func (m *SlowMockExecutor) Execute(name string, args ...string) ([]byte, error) {
cur := m.concurrent.Add(1)
// Track the maximum concurrent executions observed.
for {
old := m.maxSeen.Load()
if cur <= old || m.maxSeen.CompareAndSwap(old, cur) {
break
}
}
defer m.concurrent.Add(-1)
time.Sleep(m.delay)
m.mu.Lock()
defer m.mu.Unlock()
return m.MockCommandExecutor.Execute(name, args...)
}
func TestCosmoGenerator_ConcurrencyLimit(t *testing.T) {
executor := &SlowMockExecutor{delay: 100 * time.Millisecond}
gen := NewCosmoGenerator(executor, 5*time.Second)
subGraphs := []*model.SubGraph{
{
Service: "svc",
URL: stringPtr("http://localhost:4001/query"),
Sdl: "type Query { hello: String }",
},
}
var wg sync.WaitGroup
for range 5 {
wg.Add(1)
go func() {
defer wg.Done()
_, _ = gen.Generate(context.Background(), subGraphs)
}()
}
wg.Wait()
assert.Equal(t, int32(1), executor.maxSeen.Load(),
"at most 1 wgc process should run concurrently")
}
func TestCosmoGenerator_Timeout(t *testing.T) {
// Executor that takes longer than the timeout.
executor := &SlowMockExecutor{delay: 500 * time.Millisecond}
gen := NewCosmoGenerator(executor, 50*time.Millisecond)
subGraphs := []*model.SubGraph{
{
Service: "svc",
URL: stringPtr("http://localhost:4001/query"),
Sdl: "type Query { hello: String }",
},
}
// First call: occupies the semaphore for 500ms.
go func() {
_, _ = gen.Generate(context.Background(), subGraphs)
}()
// Give the first goroutine time to acquire the semaphore.
time.Sleep(20 * time.Millisecond)
// Second call: should timeout waiting for the semaphore.
_, err := gen.Generate(context.Background(), subGraphs)
require.Error(t, err)
assert.Contains(t, err.Error(), "acquire cosmo generator")
}
func TestCosmoGenerator_ContextCancellation(t *testing.T) {
executor := &SlowMockExecutor{delay: 500 * time.Millisecond}
gen := NewCosmoGenerator(executor, 5*time.Second)
subGraphs := []*model.SubGraph{
{
Service: "svc",
URL: stringPtr("http://localhost:4001/query"),
Sdl: "type Query { hello: String }",
},
}
// First call: occupies the semaphore.
go func() {
_, _ = gen.Generate(context.Background(), subGraphs)
}()
time.Sleep(20 * time.Millisecond)
// Second call with an already-cancelled context.
ctx, cancel := context.WithCancel(context.Background())
cancel()
_, err := gen.Generate(ctx, subGraphs)
require.Error(t, err)
assert.Contains(t, err.Error(), "acquire cosmo generator")
}
// Helper function for tests
func stringPtr(s string) *string {
return &s