From a9dea19531300ac051feb71e6c9dd036b63905d0 Mon Sep 17 00:00:00 2001 From: Joakim Olsson Date: Fri, 21 Nov 2025 10:24:34 +0100 Subject: [PATCH] feat(health): add health checking endpoints and logic Introduce health checking functionality with liveness and readiness endpoints to monitor the application's status. Implement a health checker that verifies database connectivity and provides a simple liveness check. Update service routing to use the new health checker functionality. Add corresponding unit tests for validation. --- .gitignore | 1 + cmd/service/service.go | 11 ++++--- go.mod | 1 + go.sum | 1 + health/health.go | 73 ++++++++++++++++++++++++++++++++++++++++ health/health_test.go | 75 ++++++++++++++++++++++++++++++++++++++++++ k8s/deploy.yaml | 11 ++++++- 7 files changed, 167 insertions(+), 6 deletions(-) create mode 100644 health/health.go create mode 100644 health/health_test.go diff --git a/.gitignore b/.gitignore index e452663..b20db27 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,6 @@ coverage.html /exported /release /schemactl +/service CHANGES.md VERSION diff --git a/cmd/service/service.go b/cmd/service/service.go index 0c7f39c..98b909a 100644 --- a/cmd/service/service.go +++ b/cmd/service/service.go @@ -30,6 +30,7 @@ import ( "gitlab.com/unboundsoftware/schemas/domain" "gitlab.com/unboundsoftware/schemas/graph" "gitlab.com/unboundsoftware/schemas/graph/generated" + "gitlab.com/unboundsoftware/schemas/health" "gitlab.com/unboundsoftware/schemas/logging" "gitlab.com/unboundsoftware/schemas/middleware" "gitlab.com/unboundsoftware/schemas/monitoring" @@ -241,8 +242,12 @@ func start(closeEvents chan error, logger *slog.Logger, connectToAmqpFunc func(u Cache: lru.New[string](100), }) + healthChecker := health.New(db.DB, logger) + mux.Handle("/", monitoring.Handler(playground.Handler("GraphQL playground", "/query"))) - mux.Handle("/health", http.HandlerFunc(healthFunc)) + mux.Handle("/health", http.HandlerFunc(healthChecker.LivenessHandler)) + mux.Handle("/health/live", http.HandlerFunc(healthChecker.LivenessHandler)) + mux.Handle("/health/ready", http.HandlerFunc(healthChecker.ReadinessHandler)) mux.Handle("/query", cors.AllowAll().Handler( monitoring.Handler( mw.Middleware().CheckJWT( @@ -301,10 +306,6 @@ func loadSubGraphs(ctx context.Context, eventStore eventsourced.EventStore, serv return nil } -func healthFunc(w http.ResponseWriter, _ *http.Request) { - _, _ = w.Write([]byte("OK")) -} - func ConnectAMQP(url string) (Connection, error) { return goamqp.NewFromURL(serviceName, url) } diff --git a/go.mod b/go.mod index 7599dfd..6be4e2f 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.25 require ( github.com/99designs/gqlgen v0.17.83 + github.com/DATA-DOG/go-sqlmock v1.5.2 github.com/Khan/genqlient v0.8.1 github.com/alecthomas/kong v1.13.0 github.com/apex/log v1.9.0 diff --git a/go.sum b/go.sum index 3abb5fe..e13c599 100644 --- a/go.sum +++ b/go.sum @@ -83,6 +83,7 @@ github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY= github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0= +github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= diff --git a/health/health.go b/health/health.go new file mode 100644 index 0000000..3daa948 --- /dev/null +++ b/health/health.go @@ -0,0 +1,73 @@ +package health + +import ( + "context" + "database/sql" + "encoding/json" + "log/slog" + "net/http" + "time" +) + +type Checker struct { + db *sql.DB + logger *slog.Logger +} + +func New(db *sql.DB, logger *slog.Logger) *Checker { + return &Checker{ + db: db, + logger: logger, + } +} + +type HealthStatus struct { + Status string `json:"status"` + Checks map[string]string `json:"checks,omitempty"` +} + +// LivenessHandler checks if the application is running +// This is a simple check that always returns OK if the handler is reached +func (h *Checker) LivenessHandler(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(HealthStatus{ + Status: "UP", + }) +} + +// ReadinessHandler checks if the application is ready to accept traffic +// This checks database connectivity and other critical dependencies +func (h *Checker) ReadinessHandler(w http.ResponseWriter, r *http.Request) { + ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second) + defer cancel() + + checks := make(map[string]string) + allHealthy := true + + // Check database connectivity + if err := h.db.PingContext(ctx); err != nil { + h.logger.With("error", err).Warn("database health check failed") + checks["database"] = "DOWN" + allHealthy = false + } else { + checks["database"] = "UP" + } + + status := HealthStatus{ + Status: "UP", + Checks: checks, + } + + if !allHealthy { + status.Status = "DOWN" + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusServiceUnavailable) + _ = json.NewEncoder(w).Encode(status) + return + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(status) +} diff --git a/health/health_test.go b/health/health_test.go new file mode 100644 index 0000000..04b97ad --- /dev/null +++ b/health/health_test.go @@ -0,0 +1,75 @@ +package health + +import ( + "database/sql" + "log/slog" + "net/http" + "net/http/httptest" + "os" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLivenessHandler(t *testing.T) { + logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) + db, _, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + + checker := New(db, logger) + + req := httptest.NewRequest(http.MethodGet, "/health/live", nil) + rec := httptest.NewRecorder() + + checker.LivenessHandler(rec, req) + + assert.Equal(t, http.StatusOK, rec.Code) + assert.Contains(t, rec.Body.String(), `"status":"UP"`) +} + +func TestReadinessHandler_Healthy(t *testing.T) { + logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) + db, mock, err := sqlmock.New(sqlmock.MonitorPingsOption(true)) + require.NoError(t, err) + defer db.Close() + + // Expect a ping and return success + mock.ExpectPing().WillReturnError(nil) + + checker := New(db, logger) + + req := httptest.NewRequest(http.MethodGet, "/health/ready", nil) + rec := httptest.NewRecorder() + + checker.ReadinessHandler(rec, req) + + assert.Equal(t, http.StatusOK, rec.Code) + assert.Contains(t, rec.Body.String(), `"status":"UP"`) + assert.Contains(t, rec.Body.String(), `"database":"UP"`) + assert.NoError(t, mock.ExpectationsWereMet()) +} + +func TestReadinessHandler_DatabaseDown(t *testing.T) { + logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) + db, mock, err := sqlmock.New(sqlmock.MonitorPingsOption(true)) + require.NoError(t, err) + defer db.Close() + + // Expect a ping and return error + mock.ExpectPing().WillReturnError(sql.ErrConnDone) + + checker := New(db, logger) + + req := httptest.NewRequest(http.MethodGet, "/health/ready", nil) + rec := httptest.NewRecorder() + + checker.ReadinessHandler(rec, req) + + assert.Equal(t, http.StatusServiceUnavailable, rec.Code) + assert.Contains(t, rec.Body.String(), `"status":"DOWN"`) + assert.Contains(t, rec.Body.String(), `"database":"DOWN"`) + assert.NoError(t, mock.ExpectationsWereMet()) +} diff --git a/k8s/deploy.yaml b/k8s/deploy.yaml index d8369b8..62b196f 100644 --- a/k8s/deploy.yaml +++ b/k8s/deploy.yaml @@ -44,13 +44,22 @@ spec: requests: cpu: "20m" memory: "20Mi" + livenessProbe: + httpGet: + path: /health/live + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 readinessProbe: httpGet: - path: /health + path: /health/ready port: 8080 initialDelaySeconds: 5 periodSeconds: 5 timeoutSeconds: 5 + failureThreshold: 3 imagePullPolicy: IfNotPresent image: registry.gitlab.com/unboundsoftware/schemas:${COMMIT} ports: