schemas/k8s/autoscale.yaml

apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  labels:
    app.kubernetes.io/name: schemas
  name: schemas
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: schemas
  minReplicas: 2
  maxReplicas: 4
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 60
  behavior:
    scaleUp:
      # Wait 2min of sustained high CPU before scaling up. Schemas is
      # event-driven and the per-request work is bursty even with the
      # cache + warmup, so single spikes shouldn't pull replicas up.
      stabilizationWindowSeconds: 120
      policies:
      - type: Pods
        value: 1
        periodSeconds: 60
    scaleDown:
      # Default 300s window kept pods pinned at maxReplicas long after
      # the triggering spike had subsided. 120s is long enough to avoid
      # flapping but lets the deployment return to minReplicas quickly
      # once the workload calms.
      stabilizationWindowSeconds: 120
      policies:
      - type: Pods
        value: 1
        periodSeconds: 60