import net from 'node:net'; export type ServiceStatus = 'healthy' | 'degraded' | 'down' | 'maintenance'; export type CheckKind = 'http-json' | 'http-status' | 'tcp'; export type InventorySource = 'docker' | 'vm'; export interface ServiceCheck { id: string; name: string; group: string; target: string; status: ServiceStatus; latency: number; version?: string; message?: string; lastChecked: string; } export interface OpsStatus { overall: 'healthy' | 'degraded' | 'critical'; timestamp: string; services: ServiceCheck[]; } interface BaseDefinition { id: string; name: string; group: string; description: string; management: InventorySource; exposure: 'internal' | 'public'; port?: number; } interface HttpServiceDefinition extends BaseDefinition { kind: 'http-json' | 'http-status'; env?: string; default: string; path: string; } interface TcpServiceDefinition extends BaseDefinition { kind: 'tcp'; host: string; port: number; } export type ServiceDefinition = HttpServiceDefinition | TcpServiceDefinition; export interface InventoryService extends ServiceCheck { description: string; management: InventorySource; exposure: 'internal' | 'public'; port?: number; restartable: boolean; } export interface HostTool { id: string; name: string; group: string; source: InventorySource; management: string; status: 'managed' | 'manual'; description: string; } export const STACK_SERVICES: ServiceDefinition[] = [ { id: 'admin-web', name: 'Admin Dashboard', group: 'Dashboards', description: 'Internal admin portal for platform review and ops workflows.', management: 'docker', exposure: 'internal', port: 3001, kind: 'http-status', default: 'http://admin-web:3001', path: '/api/health', }, { id: 'tracker-web', name: 'Tracker Dashboard', group: 'Dashboards', description: 'Internal tracker UI for issue and delivery review.', management: 'docker', exposure: 'internal', port: 3003, kind: 'http-status', default: 'http://tracker-web:3003', path: '/api/health', }, { id: 'lysnrai-dashboard', name: 'LysnrAI Dashboard', group: 'Product Web Apps', description: 'Voice AI dashboard hosted on the VM for internal product review.', management: 'docker', exposure: 'internal', port: 3002, kind: 'http-status', default: 'http://lysnrai-dashboard:3002', path: '/', }, { id: 'chronomind-web', name: 'ChronoMind Web', group: 'Product Web Apps', description: 'ChronoMind web client hosted on the VM.', management: 'docker', exposure: 'internal', port: 3030, kind: 'http-status', default: 'http://chronomind-web:3030', path: '/', }, { id: 'jarvisjr-web', name: 'JarvisJr Web', group: 'Product Web Apps', description: 'JarvisJr web client hosted on the VM.', management: 'docker', exposure: 'internal', port: 3035, kind: 'http-status', default: 'http://jarvisjr-web:3035', path: '/', }, { id: 'flowmonk-web', name: 'FlowMonk Web', group: 'Product Web Apps', description: 'FlowMonk web client hosted on the VM.', management: 'docker', exposure: 'internal', port: 3040, kind: 'http-status', default: 'http://flowmonk-web:3040', path: '/', }, { id: 'notelett-web', name: 'NoteLett Web', group: 'Product Web Apps', description: 'NoteLett web client hosted on the VM.', management: 'docker', exposure: 'internal', port: 3045, kind: 'http-status', default: 'http://notelett-web:3045', path: '/', }, { id: 'mindlyst-web', name: 'MindLyst Web', group: 'Product Web Apps', description: 'MindLyst web client hosted on the VM.', management: 'docker', exposure: 'internal', port: 3050, kind: 'http-status', default: 'http://mindlyst-web:3050', path: '/', }, { id: 'nomgap-web', name: 'NomGap Web', group: 'Product Web Apps', description: 'NomGap web client hosted on the VM.', management: 'docker', exposure: 'internal', port: 3055, kind: 'http-status', default: 'http://nomgap-web:3055', path: '/', }, { id: 'actiontrail-web', name: 'ActionTrail Web', group: 'Product Web Apps', description: 'ActionTrail web client hosted on the VM.', management: 'docker', exposure: 'internal', port: 3060, kind: 'http-status', default: 'http://actiontrail-web:3060', path: '/', }, { id: 'llmlab-dashboard', name: 'LLM Lab Dashboard', group: 'Internal Tooling', description: 'Internal LLM lab dashboard hosted on the VM for operator and developer use.', management: 'docker', exposure: 'internal', port: 3075, kind: 'http-status', default: 'http://llmlab-dashboard:3075', path: '/', }, { id: 'efforise-web', name: 'Efforise Web', group: 'Product Web Apps', description: 'Efforise web client hosted on the VM.', management: 'docker', exposure: 'internal', port: 3080, kind: 'http-status', default: 'http://efforise-web:3080', path: '/', }, { id: 'platform', name: 'Platform Service', group: 'Core Services', description: 'Core API and auth platform service.', management: 'docker', exposure: 'internal', port: 4003, env: 'PLATFORM_SERVICE_URL', kind: 'http-json', default: 'http://platform-service:4003', path: '/health', }, { id: 'extraction', name: 'Extraction Service', group: 'Core Services', description: 'Structured extraction service with product-aware throttling.', management: 'docker', exposure: 'internal', port: 4005, env: 'EXTRACTION_SERVICE_URL', kind: 'http-json', default: 'http://extraction-service:4005', path: '/health', }, { id: 'mcp', name: 'MCP Server', group: 'Core Services', description: 'Internal MCP integration surface.', management: 'docker', exposure: 'internal', port: 4007, env: 'MCP_SERVER_URL', kind: 'http-json', default: 'http://mcp-server:4007', path: '/health', }, { id: 'grafana', name: 'Grafana', group: 'Observability', description: 'Metrics and logs visualization.', management: 'docker', exposure: 'internal', port: 3000, kind: 'http-json', default: 'http://grafana:3000', path: '/api/health', }, { id: 'loki', name: 'Loki', group: 'Observability', description: 'Centralized log aggregation.', management: 'docker', exposure: 'internal', port: 3100, kind: 'http-status', default: 'http://loki:3100', path: '/ready', }, { id: 'prometheus', name: 'Prometheus', group: 'Observability', description: 'Internal metrics scraping and query engine.', management: 'docker', exposure: 'internal', port: 9090, kind: 'http-status', default: 'http://prometheus:9090', path: '/-/healthy', }, { id: 'node-exporter', name: 'Node Exporter', group: 'Observability', description: 'Host-level VM metrics exporter.', management: 'docker', exposure: 'internal', port: 9100, kind: 'http-status', default: 'http://node-exporter:9100', path: '/metrics', }, { id: 'cadvisor', name: 'cAdvisor', group: 'Observability', description: 'Container-level metrics exporter.', management: 'docker', exposure: 'internal', port: 8080, kind: 'http-status', default: 'http://cadvisor:8080', path: '/healthz', }, { id: 'valkey', name: 'Valkey', group: 'Shared Infrastructure', description: 'Shared cache and rate-limit backing store.', management: 'docker', exposure: 'internal', kind: 'tcp', host: 'valkey', port: 6379, }, { id: 'gitea-registry', name: 'Gitea Registry', group: 'Shared Infrastructure', description: 'Private npm package registry and source control service.', management: 'docker', exposure: 'internal', port: 3300, kind: 'http-json', default: 'http://gitea-npm-registry:3000', path: '/api/v1/version', }, { id: 'mailpit', name: 'Mailpit', group: 'Shared Infrastructure', description: 'SMTP sink and email inspection UI.', management: 'docker', exposure: 'internal', port: 8025, kind: 'http-status', default: 'http://mailpit:8025', path: '/', }, { id: 'azurite', name: 'Azurite', group: 'Shared Infrastructure', description: 'Local Azure Blob Storage emulator.', management: 'docker', exposure: 'internal', kind: 'tcp', host: 'azurite', port: 10000, }, { id: 'cosmos-emulator', name: 'Cosmos Emulator', group: 'Shared Infrastructure', description: 'Local Azure Cosmos DB emulator.', management: 'docker', exposure: 'internal', port: 8080, kind: 'http-status', default: 'http://cosmos-emulator:8080', path: '/ready', }, { id: 'gateway', name: 'Traefik Gateway', group: 'Ingress', description: 'Legacy internal gateway and routing layer.', management: 'docker', exposure: 'internal', port: 8080, kind: 'http-status', default: 'http://gateway:8080', path: '/', }, { id: 'caddy', name: 'Caddy', group: 'Ingress', description: 'HTTPS ingress and reverse proxy for internal and backend domains.', management: 'docker', exposure: 'public', kind: 'tcp', host: 'caddy', port: 80, }, ]; export const HOST_TOOLS: HostTool[] = [ { id: 'docker-ce', name: 'Docker CE', group: 'Host Tooling', source: 'vm', management: 'VM bootstrap', status: 'managed', description: 'Container runtime for the internal stack.', }, { id: 'docker-compose', name: 'Docker Compose', group: 'Host Tooling', source: 'vm', management: 'VM bootstrap', status: 'managed', description: 'Multi-service orchestration for the VM stack.', }, { id: 'azure-cli', name: 'Azure CLI', group: 'Host Tooling', source: 'vm', management: 'Manual install', status: 'manual', description: 'Azure subscription and NSG management from the VM.', }, { id: 'nodejs', name: 'Node.js 22', group: 'Host Tooling', source: 'vm', management: 'VM bootstrap', status: 'managed', description: 'Build/runtime toolchain for workspace services.', }, { id: 'pnpm', name: 'pnpm', group: 'Host Tooling', source: 'vm', management: 'VM bootstrap', status: 'managed', description: 'Workspace package manager.', }, { id: 'git', name: 'git', group: 'Host Tooling', source: 'vm', management: 'VM bootstrap', status: 'managed', description: 'Repo sync and deployment workflow tooling.', }, { id: 'jq', name: 'jq', group: 'Host Tooling', source: 'vm', management: 'VM bootstrap', status: 'managed', description: 'CLI JSON inspection used in ops and setup scripts.', }, { id: 'caddy-host-config', name: 'Caddy Config', group: 'Host Tooling', source: 'vm', management: 'VM file mount', status: 'managed', description: 'Host-mounted Caddy configuration at /opt/bytelyst/Caddyfile.', }, ]; export const RESTARTABLE_SERVICE_CONTAINERS: Record = { 'admin-web': 'learning_ai_common_plat-admin-web-1', 'tracker-web': 'learning_ai_common_plat-tracker-web-1', 'lysnrai-dashboard': 'learning_ai_common_plat-lysnrai-dashboard-1', 'chronomind-web': 'learning_ai_common_plat-chronomind-web-1', 'jarvisjr-web': 'learning_ai_common_plat-jarvisjr-web-1', 'flowmonk-web': 'learning_ai_common_plat-flowmonk-web-1', 'notelett-web': 'learning_ai_common_plat-notelett-web-1', 'mindlyst-web': 'learning_ai_common_plat-mindlyst-web-1', 'nomgap-web': 'learning_ai_common_plat-nomgap-web-1', 'actiontrail-web': 'learning_ai_common_plat-actiontrail-web-1', 'llmlab-dashboard': 'learning_ai_common_plat-llmlab-dashboard-1', 'efforise-web': 'learning_ai_common_plat-efforise-web-1', platform: 'learning_ai_common_plat-platform-service-1', extraction: 'learning_ai_common_plat-extraction-service-1', mcp: 'learning_ai_common_plat-mcp-server-1', grafana: 'learning_ai_common_plat-grafana-1', loki: 'learning_ai_common_plat-loki-1', prometheus: 'learning_ai_common_plat-prometheus-1', 'node-exporter': 'learning_ai_common_plat-node-exporter-1', cadvisor: 'learning_ai_common_plat-cadvisor-1', valkey: 'learning_ai_common_plat-valkey-1', 'gitea-registry': 'gitea-npm-registry', mailpit: 'learning_ai_common_plat-mailpit-1', azurite: 'learning_ai_common_plat-azurite-1', 'cosmos-emulator': 'learning_ai_common_plat-cosmos-emulator-1', }; async function checkHttpService(service: HttpServiceDefinition): Promise { const baseUrl = (service.env && process.env[service.env]) || service.default; const target = `${baseUrl}${service.path}`; const start = Date.now(); try { const res = await fetch(target, { method: 'GET', headers: { 'Content-Type': 'application/json' }, next: { revalidate: 0 }, signal: AbortSignal.timeout(3000), }); const latency = Date.now() - start; if (!res.ok) { return { id: service.id, name: service.name, group: service.group, target, status: 'down', latency, message: `HTTP ${res.status}`, lastChecked: new Date().toISOString(), }; } if (service.kind === 'http-json') { const payload = await res.json().catch(() => null); const rawStatus = payload?.status; const isOk = rawStatus === 'ok' || rawStatus === 'healthy' || payload?.database === 'ok' || payload?.commit === 'ok' || payload?.version; return { id: service.id, name: service.name, group: service.group, target, status: isOk ? 'healthy' : 'degraded', latency, version: payload?.version, message: isOk ? undefined : JSON.stringify(payload), lastChecked: new Date().toISOString(), }; } return { id: service.id, name: service.name, group: service.group, target, status: 'healthy', latency, lastChecked: new Date().toISOString(), }; } catch (err) { return { id: service.id, name: service.name, group: service.group, target, status: 'down', latency: Date.now() - start, message: err instanceof Error ? err.message : String(err), lastChecked: new Date().toISOString(), }; } } async function checkTcpService(service: TcpServiceDefinition): Promise { const start = Date.now(); const target = `${service.host}:${service.port}`; return new Promise(resolve => { const socket = net.createConnection({ host: service.host, port: service.port }); let settled = false; const finish = (status: ServiceStatus, message?: string) => { if (settled) return; settled = true; socket.destroy(); resolve({ id: service.id, name: service.name, group: service.group, target, status, latency: Date.now() - start, message, lastChecked: new Date().toISOString(), }); }; socket.setTimeout(3000); socket.once('connect', () => finish('healthy')); socket.once('timeout', () => finish('down', 'Connection timed out')); socket.once('error', err => finish('down', err.message)); }); } export async function collectOpsChecks(): Promise { return Promise.all( STACK_SERVICES.map(service => service.kind === 'tcp' ? checkTcpService(service) : checkHttpService(service) ) ); } export async function collectOpsStatus(): Promise { const services = await collectOpsChecks(); const downCount = services.filter(c => c.status === 'down').length; const degradedCount = services.filter(c => c.status === 'degraded').length; let overall: OpsStatus['overall'] = 'healthy'; if (downCount > 0) overall = 'critical'; else if (degradedCount > 0) overall = 'degraded'; return { overall, timestamp: new Date().toISOString(), services, }; } export async function collectInventoryServices(): Promise { const checks = await collectOpsChecks(); const byId = new Map(checks.map(check => [check.id, check])); return STACK_SERVICES.map(service => { const check = byId.get(service.id); return { ...(check as ServiceCheck), description: service.description, management: service.management, exposure: service.exposure, port: service.port, restartable: Boolean(RESTARTABLE_SERVICE_CONTAINERS[service.id]), }; }); }