learning_ai_common_plat/services/monitoring/health-check.ts
saravanakumardb1 c97e697097 feat(services): add monitoring (Loki + Grafana config, health-check)
- Copied as-is from learning_voice_ai_agent/services/monitoring
- Grafana dashboards + provisioning for Loki datasource
- health-check.ts for service health polling
- Updated pnpm-workspace.yaml to include services/*
2026-02-12 11:39:24 -08:00

120 lines
4.7 KiB
TypeScript

/**
* Monitoring & Health Check — aggregates health from all services.
*
* Standalone script that polls each service's /health endpoint and
* reports combined status. Can be run as a cron job, GitHub Action,
* or standalone HTTP endpoint.
*
* Usage:
* npx tsx services/monitoring/health-check.ts # one-shot check
* npx tsx services/monitoring/health-check.ts --serve # HTTP server on :4004
*
* Environment:
* BACKEND_URL (default: http://localhost:8000)
* GROWTH_SERVICE_URL (default: http://localhost:4001)
* BILLING_SERVICE_URL (default: http://localhost:4002)
* PLATFORM_SERVICE_URL (default: http://localhost:4003)
* ADMIN_DASHBOARD_URL (default: http://localhost:3001)
* USER_DASHBOARD_URL (default: http://localhost:3002)
*/
export {};
interface ServiceCheck {
name: string;
url: string;
status: "healthy" | "unhealthy" | "unreachable";
responseTimeMs: number;
details?: Record<string, unknown>;
error?: string;
}
interface HealthReport {
overall: "healthy" | "degraded" | "down";
timestamp: string;
services: ServiceCheck[];
summary: { healthy: number; unhealthy: number; unreachable: number; total: number };
}
const SERVICES = [
{ name: "Backend API", url: process.env.BACKEND_URL || "http://localhost:8000", path: "/health" },
{ name: "Growth Service", url: process.env.GROWTH_SERVICE_URL || "http://localhost:4001", path: "/health" },
{ name: "Billing Service", url: process.env.BILLING_SERVICE_URL || "http://localhost:4002", path: "/health" },
{ name: "Platform Service", url: process.env.PLATFORM_SERVICE_URL || "http://localhost:4003", path: "/health" },
{ name: "Admin Dashboard", url: process.env.ADMIN_DASHBOARD_URL || "http://localhost:3001", path: "/api/health" },
{ name: "User Dashboard", url: process.env.USER_DASHBOARD_URL || "http://localhost:3002", path: "/api/health" },
];
async function checkService(svc: { name: string; url: string; path: string }): Promise<ServiceCheck> {
const fullUrl = `${svc.url}${svc.path}`;
const start = performance.now();
try {
const res = await fetch(fullUrl, { signal: AbortSignal.timeout(5_000) });
const elapsed = Math.round(performance.now() - start);
if (res.ok) {
let details: Record<string, unknown> | undefined;
try { details = await res.json() as Record<string, unknown>; } catch { /* ignore */ }
return { name: svc.name, url: svc.url, status: "healthy", responseTimeMs: elapsed, details };
}
return { name: svc.name, url: svc.url, status: "unhealthy", responseTimeMs: elapsed, error: `HTTP ${res.status}` };
} catch (err) {
const elapsed = Math.round(performance.now() - start);
return { name: svc.name, url: svc.url, status: "unreachable", responseTimeMs: elapsed, error: String(err) };
}
}
async function generateReport(): Promise<HealthReport> {
const checks = await Promise.all(SERVICES.map(checkService));
const healthy = checks.filter((c) => c.status === "healthy").length;
const unhealthy = checks.filter((c) => c.status === "unhealthy").length;
const unreachable = checks.filter((c) => c.status === "unreachable").length;
let overall: HealthReport["overall"] = "healthy";
if (unreachable === checks.length) overall = "down";
else if (unhealthy > 0 || unreachable > 0) overall = "degraded";
return {
overall,
timestamp: new Date().toISOString(),
services: checks,
summary: { healthy, unhealthy, unreachable, total: checks.length },
};
}
// ── CLI / HTTP server mode ──
const args = process.argv.slice(2);
if (args.includes("--serve")) {
// Run as HTTP server for continuous monitoring
const { createServer } = await import("http");
const PORT = Number(process.env.MONITOR_PORT || 4004);
const server = createServer(async (_req, res) => {
const report = await generateReport();
res.writeHead(report.overall === "healthy" ? 200 : 503, { "Content-Type": "application/json" });
res.end(JSON.stringify(report, null, 2));
});
server.listen(PORT, () => {
console.log(`🩺 Monitoring dashboard running on http://localhost:${PORT}`);
console.log(` Checking ${SERVICES.length} services every request`);
});
} else {
// One-shot check
const report = await generateReport();
const icon = { healthy: "✅", degraded: "⚠️", down: "❌" };
console.log(`\n${icon[report.overall]} Overall: ${report.overall.toUpperCase()}\n`);
for (const svc of report.services) {
const sIcon = { healthy: "✅", unhealthy: "⚠️", unreachable: "❌" };
console.log(` ${sIcon[svc.status]} ${svc.name.padEnd(20)} ${svc.responseTimeMs}ms${svc.error ? `${svc.error}` : ""}`);
}
console.log(`\nHealthy: ${report.summary.healthy}/${report.summary.total}`);
process.exit(report.overall === "healthy" ? 0 : 1);
}