fix(health): env-driven infrastructure probe targets
ci / typecheck (map[dir:apps/booking name:booking]) (push) Successful in 20s
ci / typecheck (map[dir:apps/website name:website]) (push) Successful in 22s
ci / typecheck (map[dir:apps/portal name:portal]) (push) Successful in 28s
ci / typecheck (map[dir:services/platform-api name:platform-api]) (push) Successful in 22s
ci / test (push) Successful in 30s
ci / typecheck (map[dir:apps/operator name:operator]) (push) Successful in 23s
ci / build (map[dir:apps/booking name:booking]) (push) Successful in 10s
ci / build (map[dir:apps/operator name:operator]) (push) Successful in 31s
ci / build (map[dir:services/platform-api name:platform-api]) (push) Successful in 15s
ci / build (map[dir:apps/portal name:portal]) (push) Successful in 38s
ci / deploy (push) Successful in 42s
ci / typecheck (map[dir:apps/booking name:booking]) (push) Successful in 20s
ci / typecheck (map[dir:apps/website name:website]) (push) Successful in 22s
ci / typecheck (map[dir:apps/portal name:portal]) (push) Successful in 28s
ci / typecheck (map[dir:services/platform-api name:platform-api]) (push) Successful in 22s
ci / test (push) Successful in 30s
ci / typecheck (map[dir:apps/operator name:operator]) (push) Successful in 23s
ci / build (map[dir:apps/booking name:booking]) (push) Successful in 10s
ci / build (map[dir:apps/operator name:operator]) (push) Successful in 31s
ci / build (map[dir:services/platform-api name:platform-api]) (push) Successful in 15s
ci / build (map[dir:apps/portal name:portal]) (push) Successful in 38s
ci / deploy (push) Successful in 42s
The operator infrastructure page probed docker-compose hostnames (stalwart/postgres/redis/traefik…) which don't resolve in k3s — 7 of 9 services showed down. Probe targets now come from HEALTH_* env vars with the compose names as dev defaults; platform-api-config.yaml sets the in-cluster/host addresses. 'disabled' omits a service from the report — used for OCIS/Collabora until the files tier is deployed.
This commit is contained in:
@@ -39,3 +39,13 @@ data:
|
|||||||
BILLING_STRIPE_ENABLED: "false"
|
BILLING_STRIPE_ENABLED: "false"
|
||||||
BOOKING_PUBLIC_URL: "https://booking.dezky.eu"
|
BOOKING_PUBLIC_URL: "https://booking.dezky.eu"
|
||||||
MEET_PUBLIC_URL: "https://meet.dezky.eu"
|
MEET_PUBLIC_URL: "https://meet.dezky.eu"
|
||||||
|
# Infrastructure health-probe targets (operator → /health/platform). The
|
||||||
|
# code defaults are docker-compose hostnames; these are the k3s addresses.
|
||||||
|
# "disabled" omits a service from the report until that tier is deployed.
|
||||||
|
HEALTH_STALWART_HOSTPORT: "10.42.0.1:8080"
|
||||||
|
HEALTH_AUTHENTIK_URL: "http://authentik-server.dezky-auth.svc.cluster.local/-/health/ready/"
|
||||||
|
HEALTH_POSTGRES_HOSTPORT: "postgres.dezky-data.svc.cluster.local:5432"
|
||||||
|
HEALTH_REDIS_HOSTPORT: "redis.dezky-data.svc.cluster.local:6379"
|
||||||
|
HEALTH_TRAEFIK_HOSTPORT: "traefik.kube-system.svc.cluster.local:80"
|
||||||
|
HEALTH_OCIS_URL: "disabled"
|
||||||
|
HEALTH_COLLABORA_URL: "disabled"
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
// Live health probes for the services we expect to find in the Dezky stack.
|
// Live health probes for the services we expect to find in the Dezky stack.
|
||||||
// Hostnames + ports are the compose service names from
|
// Targets are env-driven (HEALTH_*): the defaults are the docker-compose
|
||||||
// infrastructure/docker-compose/docker-compose.yml. When we move to k3s,
|
// service names so dev needs no env, and production sets in-cluster DNS /
|
||||||
// swap these for in-cluster service DNS (e.g. authentik.dezky.svc...).
|
// host addresses in platform-api-config.yaml. Setting a probe's env to
|
||||||
|
// "disabled" omits that service from the report entirely — used for tiers
|
||||||
|
// that aren't deployed yet (OCIS/Collabora in prod).
|
||||||
|
|
||||||
import { Injectable } from '@nestjs/common'
|
import { Injectable } from '@nestjs/common'
|
||||||
import { InjectConnection } from '@nestjs/mongoose'
|
import { InjectConnection } from '@nestjs/mongoose'
|
||||||
@@ -40,17 +42,17 @@ export class HealthService {
|
|||||||
|
|
||||||
async probeAll(): Promise<ProbeResult[]> {
|
async probeAll(): Promise<ProbeResult[]> {
|
||||||
const probes: ProbeSpec[] = [
|
const probes: ProbeSpec[] = [
|
||||||
{ id: 'mail', name: 'Stalwart', role: 'Mail · IMAP/JMAP/SMTP', run: () => tcpProbe('stalwart', 8080, PROBE_TIMEOUT_MS) },
|
tcpSpec('mail', 'Stalwart', 'Mail · IMAP/JMAP/SMTP', 'HEALTH_STALWART_HOSTPORT', 'stalwart:8080'),
|
||||||
{ id: 'files', name: 'OCIS', role: 'Files · OwnCloud Infinite',run: () => httpProbe('http://ocis:9200/health', PROBE_TIMEOUT_MS) },
|
httpSpec('files', 'OCIS', 'Files · OwnCloud Infinite', 'HEALTH_OCIS_URL', 'http://ocis:9200/health'),
|
||||||
{ id: 'office', name: 'Collabora', role: 'Office editing · WOPI', run: () => httpProbe('http://collabora:9980/hosting/discovery', PROBE_TIMEOUT_MS) },
|
httpSpec('office','Collabora', 'Office editing · WOPI', 'HEALTH_COLLABORA_URL', 'http://collabora:9980/hosting/discovery'),
|
||||||
{ id: 'auth', name: 'Authentik', role: 'Identity · SSO · MFA', run: () => httpProbe('http://authentik-server:9000/-/health/ready/', PROBE_TIMEOUT_MS) },
|
httpSpec('auth', 'Authentik', 'Identity · SSO · MFA', 'HEALTH_AUTHENTIK_URL', 'http://authentik-server:9000/-/health/ready/'),
|
||||||
{ id: 'pg', name: 'PostgreSQL', role: 'Authentik + OCIS database',run: () => tcpProbe('postgres', 5432, PROBE_TIMEOUT_MS) },
|
tcpSpec('pg', 'PostgreSQL', 'Authentik + OCIS database', 'HEALTH_POSTGRES_HOSTPORT', 'postgres:5432'),
|
||||||
{ id: 'mongo', name: 'MongoDB', role: 'Platform application data',run: () => this.mongoPing(PROBE_TIMEOUT_MS) },
|
{ id: 'mongo', name: 'MongoDB', role: 'Platform application data',run: () => this.mongoPing(PROBE_TIMEOUT_MS) },
|
||||||
{ id: 'redis', name: 'Redis', role: 'Cache + session store', run: () => tcpProbe('redis', 6379, PROBE_TIMEOUT_MS) },
|
tcpSpec('redis', 'Redis', 'Cache + session store', 'HEALTH_REDIS_HOSTPORT', 'redis:6379'),
|
||||||
{ id: 'proxy', name: 'Traefik', role: 'Reverse proxy · TLS', run: () => tcpProbe('traefik', 80, PROBE_TIMEOUT_MS) },
|
tcpSpec('proxy', 'Traefik', 'Reverse proxy · TLS', 'HEALTH_TRAEFIK_HOSTPORT', 'traefik:80'),
|
||||||
// platform-api itself: this code is running, so it's trivially ok.
|
// platform-api itself: this code is running, so it's trivially ok.
|
||||||
{ id: 'api', name: 'Platform API', role: 'Control plane', run: async () => { /* always ok */ } },
|
{ id: 'api', name: 'Platform API', role: 'Control plane', run: async () => { /* always ok */ } },
|
||||||
]
|
].filter((p): p is ProbeSpec => p !== null)
|
||||||
|
|
||||||
const checkedAt = new Date().toISOString()
|
const checkedAt = new Date().toISOString()
|
||||||
const results = await Promise.all(probes.map((p) => run(p, checkedAt)))
|
const results = await Promise.all(probes.map((p) => run(p, checkedAt)))
|
||||||
@@ -100,6 +102,21 @@ async function run(spec: ProbeSpec, checkedAt: string): Promise<ProbeResult> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Probe spec builders ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function tcpSpec(id: string, name: string, role: string, envKey: string, dflt: string): ProbeSpec | null {
|
||||||
|
const target = process.env[envKey] || dflt
|
||||||
|
if (target === 'disabled') return null
|
||||||
|
const [host, port] = target.split(':')
|
||||||
|
return { id, name, role, run: () => tcpProbe(host!, Number(port), PROBE_TIMEOUT_MS) }
|
||||||
|
}
|
||||||
|
|
||||||
|
function httpSpec(id: string, name: string, role: string, envKey: string, dflt: string): ProbeSpec | null {
|
||||||
|
const url = process.env[envKey] || dflt
|
||||||
|
if (url === 'disabled') return null
|
||||||
|
return { id, name, role, run: () => httpProbe(url, PROBE_TIMEOUT_MS) }
|
||||||
|
}
|
||||||
|
|
||||||
// ── Primitives ─────────────────────────────────────────────────────────────
|
// ── Primitives ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
function tcpProbe(host: string, port: number, timeoutMs: number): Promise<void> {
|
function tcpProbe(host: string, port: number, timeoutMs: number): Promise<void> {
|
||||||
|
|||||||
Reference in New Issue
Block a user