apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: labels: app.kubernetes.io/name: schemas name: schemas spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: schemas minReplicas: 2 maxReplicas: 4 metrics: - type: Resource resource: name: cpu target: type: Utilization averageUtilization: 60 behavior: scaleUp: # Wait 2min of sustained high CPU before scaling up. Schemas is # event-driven and the per-request work is bursty even with the # cache + warmup, so single spikes shouldn't pull replicas up. stabilizationWindowSeconds: 120 policies: - type: Pods value: 1 periodSeconds: 60 scaleDown: # Default 300s window kept pods pinned at maxReplicas long after # the triggering spike had subsided. 120s is long enough to avoid # flapping but lets the deployment return to minReplicas quickly # once the workload calms. stabilizationWindowSeconds: 120 policies: - type: Pods value: 1 periodSeconds: 60