From eefab471d69c0765410b26b64f261f4f528b10c9 Mon Sep 17 00:00:00 2001
From: Joakim Olsson <joakim@unbound.se>
Date: Thu, 21 May 2026 18:55:51 +0200
Subject: [PATCH] fix(k8s): add scaleUp/scaleDown stabilization to schemas HPA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even with the schema cache and startup warmup, residual CPU bursts
(metrics-server occasionally samples a request mid-flight) were enough
to trip a brief scale-up, after which the default 5min scaleDown
stabilization pinned the deployment at maxReplicas long after the
spike had subsided.

Tune both directions:

- scaleUp.stabilizationWindowSeconds: 120 — a transient spike must
  persist for two consecutive minutes before any pod is added.
  Brief metric anomalies no longer move replicas.
- scaleUp policy: add at most 1 pod per 60s. Smooths reaction.
- scaleDown.stabilizationWindowSeconds: 120 (default 300) — once
  the workload calms, return to minReplicas faster.
- scaleDown policy: remove at most 1 pod per 60s. Avoids
  thundering-herd scale-down.
---
 k8s/autoscale.yaml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/k8s/autoscale.yaml b/k8s/autoscale.yaml
index 9509b9f..d23436a 100644
--- a/k8s/autoscale.yaml
+++ b/k8s/autoscale.yaml
@@ -18,3 +18,23 @@ spec:
       target:
         type: Utilization
         averageUtilization: 60
+  behavior:
+    scaleUp:
+      # Wait 2min of sustained high CPU before scaling up. Schemas is
+      # event-driven and the per-request work is bursty even with the
+      # cache + warmup, so single spikes shouldn't pull replicas up.
+      stabilizationWindowSeconds: 120
+      policies:
+      - type: Pods
+        value: 1
+        periodSeconds: 60
+    scaleDown:
+      # Default 300s window kept pods pinned at maxReplicas long after
+      # the triggering spike had subsided. 120s is long enough to avoid
+      # flapping but lets the deployment return to minReplicas quickly
+      # once the workload calms.
+      stabilizationWindowSeconds: 120
+      policies:
+      - type: Pods
+        value: 1
+        periodSeconds: 60
-- 
2.52.0