fix: prevent OOM on rapid schema publishing
schemas / vulnerabilities (pull_request) Successful in 6m43s
schemas / check-release (pull_request) Successful in 11m27s
schemas / check (pull_request) Successful in 14m51s
pre-commit / pre-commit (pull_request) Successful in 19m39s
schemas / build (pull_request) Successful in 8m26s
schemas / deploy-prod (pull_request) Has been skipped
schemas / vulnerabilities (pull_request) Successful in 6m43s
schemas / check-release (pull_request) Successful in 11m27s
schemas / check (pull_request) Successful in 14m51s
pre-commit / pre-commit (pull_request) Successful in 19m39s
schemas / build (pull_request) Successful in 8m26s
schemas / deploy-prod (pull_request) Has been skipped
Add concurrency-limited CosmoGenerator (semaphore limit=1, 60s timeout) to prevent unbounded concurrent wgc process spawning. Add debouncer (500ms) to coalesce rapid schema updates per org+ref. Fix double subgraph fetch in Supergraph resolver and goroutine leak in SchemaUpdates subscription. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,11 +1,15 @@
|
||||
package graph
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -459,6 +463,114 @@ func TestGenerateCosmoRouterConfig_MockError(t *testing.T) {
|
||||
assert.Equal(t, 1, mockExecutor.CallCount, "Should have attempted to call executor")
|
||||
}
|
||||
|
||||
// SlowMockExecutor simulates a slow wgc command for concurrency testing.
|
||||
type SlowMockExecutor struct {
|
||||
MockCommandExecutor
|
||||
delay time.Duration
|
||||
mu sync.Mutex
|
||||
concurrent atomic.Int32
|
||||
maxSeen atomic.Int32
|
||||
}
|
||||
|
||||
func (m *SlowMockExecutor) Execute(name string, args ...string) ([]byte, error) {
|
||||
cur := m.concurrent.Add(1)
|
||||
// Track the maximum concurrent executions observed.
|
||||
for {
|
||||
old := m.maxSeen.Load()
|
||||
if cur <= old || m.maxSeen.CompareAndSwap(old, cur) {
|
||||
break
|
||||
}
|
||||
}
|
||||
defer m.concurrent.Add(-1)
|
||||
|
||||
time.Sleep(m.delay)
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.MockCommandExecutor.Execute(name, args...)
|
||||
}
|
||||
|
||||
func TestCosmoGenerator_ConcurrencyLimit(t *testing.T) {
|
||||
executor := &SlowMockExecutor{delay: 100 * time.Millisecond}
|
||||
gen := NewCosmoGenerator(executor, 5*time.Second)
|
||||
|
||||
subGraphs := []*model.SubGraph{
|
||||
{
|
||||
Service: "svc",
|
||||
URL: stringPtr("http://localhost:4001/query"),
|
||||
Sdl: "type Query { hello: String }",
|
||||
},
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for range 5 {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
_, _ = gen.Generate(context.Background(), subGraphs)
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
assert.Equal(t, int32(1), executor.maxSeen.Load(),
|
||||
"at most 1 wgc process should run concurrently")
|
||||
}
|
||||
|
||||
func TestCosmoGenerator_Timeout(t *testing.T) {
|
||||
// Executor that takes longer than the timeout.
|
||||
executor := &SlowMockExecutor{delay: 500 * time.Millisecond}
|
||||
gen := NewCosmoGenerator(executor, 50*time.Millisecond)
|
||||
|
||||
subGraphs := []*model.SubGraph{
|
||||
{
|
||||
Service: "svc",
|
||||
URL: stringPtr("http://localhost:4001/query"),
|
||||
Sdl: "type Query { hello: String }",
|
||||
},
|
||||
}
|
||||
|
||||
// First call: occupies the semaphore for 500ms.
|
||||
go func() {
|
||||
_, _ = gen.Generate(context.Background(), subGraphs)
|
||||
}()
|
||||
|
||||
// Give the first goroutine time to acquire the semaphore.
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
// Second call: should timeout waiting for the semaphore.
|
||||
_, err := gen.Generate(context.Background(), subGraphs)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "acquire cosmo generator")
|
||||
}
|
||||
|
||||
func TestCosmoGenerator_ContextCancellation(t *testing.T) {
|
||||
executor := &SlowMockExecutor{delay: 500 * time.Millisecond}
|
||||
gen := NewCosmoGenerator(executor, 5*time.Second)
|
||||
|
||||
subGraphs := []*model.SubGraph{
|
||||
{
|
||||
Service: "svc",
|
||||
URL: stringPtr("http://localhost:4001/query"),
|
||||
Sdl: "type Query { hello: String }",
|
||||
},
|
||||
}
|
||||
|
||||
// First call: occupies the semaphore.
|
||||
go func() {
|
||||
_, _ = gen.Generate(context.Background(), subGraphs)
|
||||
}()
|
||||
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
// Second call with an already-cancelled context.
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
_, err := gen.Generate(ctx, subGraphs)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "acquire cosmo generator")
|
||||
}
|
||||
|
||||
// Helper function for tests
|
||||
func stringPtr(s string) *string {
|
||||
return &s
|
||||
|
||||
Reference in New Issue
Block a user