learning_ai_common_plat/dashboards/admin-web/src/lib/ops-stack.ts

643 lines
17 KiB
TypeScript

import net from 'node:net';
export type ServiceStatus = 'healthy' | 'degraded' | 'down' | 'maintenance';
export type CheckKind = 'http-json' | 'http-status' | 'tcp';
export type InventorySource = 'docker' | 'vm';
export interface ServiceCheck {
id: string;
name: string;
group: string;
target: string;
status: ServiceStatus;
latency: number;
version?: string;
message?: string;
lastChecked: string;
}
export interface OpsStatus {
overall: 'healthy' | 'degraded' | 'critical';
timestamp: string;
services: ServiceCheck[];
}
interface BaseDefinition {
id: string;
name: string;
group: string;
description: string;
management: InventorySource;
exposure: 'internal' | 'public';
port?: number;
}
interface HttpServiceDefinition extends BaseDefinition {
kind: 'http-json' | 'http-status';
env?: string;
default: string;
path: string;
}
interface TcpServiceDefinition extends BaseDefinition {
kind: 'tcp';
host: string;
port: number;
}
export type ServiceDefinition = HttpServiceDefinition | TcpServiceDefinition;
export interface InventoryService extends ServiceCheck {
description: string;
management: InventorySource;
exposure: 'internal' | 'public';
port?: number;
restartable: boolean;
}
export interface HostTool {
id: string;
name: string;
group: string;
source: InventorySource;
management: string;
status: 'managed' | 'manual';
description: string;
}
export const STACK_SERVICES: ServiceDefinition[] = [
{
id: 'admin-web',
name: 'Admin Dashboard',
group: 'Dashboards',
description: 'Internal admin portal for platform review and ops workflows.',
management: 'docker',
exposure: 'internal',
port: 3001,
kind: 'http-status',
default: 'http://admin-web:3001',
path: '/api/health',
},
{
id: 'tracker-web',
name: 'Tracker Dashboard',
group: 'Dashboards',
description: 'Internal tracker UI for issue and delivery review.',
management: 'docker',
exposure: 'internal',
port: 3003,
kind: 'http-status',
default: 'http://tracker-web:3003',
path: '/api/health',
},
{
id: 'lysnrai-dashboard',
name: 'LysnrAI Dashboard',
group: 'Product Web Apps',
description: 'Voice AI dashboard hosted on the VM for internal product review.',
management: 'docker',
exposure: 'internal',
port: 3002,
kind: 'http-status',
default: 'http://lysnrai-dashboard:3002',
path: '/',
},
{
id: 'chronomind-web',
name: 'ChronoMind Web',
group: 'Product Web Apps',
description: 'ChronoMind web client hosted on the VM.',
management: 'docker',
exposure: 'internal',
port: 3030,
kind: 'http-status',
default: 'http://chronomind-web:3030',
path: '/',
},
{
id: 'jarvisjr-web',
name: 'JarvisJr Web',
group: 'Product Web Apps',
description: 'JarvisJr web client hosted on the VM.',
management: 'docker',
exposure: 'internal',
port: 3035,
kind: 'http-status',
default: 'http://jarvisjr-web:3035',
path: '/',
},
{
id: 'flowmonk-web',
name: 'FlowMonk Web',
group: 'Product Web Apps',
description: 'FlowMonk web client hosted on the VM.',
management: 'docker',
exposure: 'internal',
port: 3040,
kind: 'http-status',
default: 'http://flowmonk-web:3040',
path: '/',
},
{
id: 'notelett-web',
name: 'NoteLett Web',
group: 'Product Web Apps',
description: 'NoteLett web client hosted on the VM.',
management: 'docker',
exposure: 'internal',
port: 3045,
kind: 'http-status',
default: 'http://notelett-web:3045',
path: '/',
},
{
id: 'mindlyst-web',
name: 'MindLyst Web',
group: 'Product Web Apps',
description: 'MindLyst web client hosted on the VM.',
management: 'docker',
exposure: 'internal',
port: 3050,
kind: 'http-status',
default: 'http://mindlyst-web:3050',
path: '/',
},
{
id: 'nomgap-web',
name: 'NomGap Web',
group: 'Product Web Apps',
description: 'NomGap web client hosted on the VM.',
management: 'docker',
exposure: 'internal',
port: 3055,
kind: 'http-status',
default: 'http://nomgap-web:3055',
path: '/',
},
{
id: 'actiontrail-web',
name: 'ActionTrail Web',
group: 'Product Web Apps',
description: 'ActionTrail web client hosted on the VM.',
management: 'docker',
exposure: 'internal',
port: 3060,
kind: 'http-status',
default: 'http://actiontrail-web:3060',
path: '/',
},
{
id: 'llmlab-dashboard',
name: 'LLM Lab Dashboard',
group: 'Internal Tooling',
description: 'Internal LLM lab dashboard hosted on the VM for operator and developer use.',
management: 'docker',
exposure: 'internal',
port: 3075,
kind: 'http-status',
default: 'http://llmlab-dashboard:3075',
path: '/',
},
{
id: 'efforise-web',
name: 'Efforise Web',
group: 'Product Web Apps',
description: 'Efforise web client hosted on the VM.',
management: 'docker',
exposure: 'internal',
port: 3080,
kind: 'http-status',
default: 'http://efforise-web:3080',
path: '/',
},
{
id: 'platform',
name: 'Platform Service',
group: 'Core Services',
description: 'Core API and auth platform service.',
management: 'docker',
exposure: 'internal',
port: 4003,
env: 'PLATFORM_SERVICE_URL',
kind: 'http-json',
default: 'http://platform-service:4003',
path: '/health',
},
{
id: 'extraction',
name: 'Extraction Service',
group: 'Core Services',
description: 'Structured extraction service with product-aware throttling.',
management: 'docker',
exposure: 'internal',
port: 4005,
env: 'EXTRACTION_SERVICE_URL',
kind: 'http-json',
default: 'http://extraction-service:4005',
path: '/health',
},
{
id: 'mcp',
name: 'MCP Server',
group: 'Core Services',
description: 'Internal MCP integration surface.',
management: 'docker',
exposure: 'internal',
port: 4007,
env: 'MCP_SERVER_URL',
kind: 'http-json',
default: 'http://mcp-server:4007',
path: '/health',
},
{
id: 'grafana',
name: 'Grafana',
group: 'Observability',
description: 'Metrics and logs visualization.',
management: 'docker',
exposure: 'internal',
port: 3000,
kind: 'http-json',
default: 'http://grafana:3000',
path: '/api/health',
},
{
id: 'loki',
name: 'Loki',
group: 'Observability',
description: 'Centralized log aggregation.',
management: 'docker',
exposure: 'internal',
port: 3100,
kind: 'http-status',
default: 'http://loki:3100',
path: '/ready',
},
{
id: 'prometheus',
name: 'Prometheus',
group: 'Observability',
description: 'Internal metrics scraping and query engine.',
management: 'docker',
exposure: 'internal',
port: 9090,
kind: 'http-status',
default: 'http://prometheus:9090',
path: '/-/healthy',
},
{
id: 'node-exporter',
name: 'Node Exporter',
group: 'Observability',
description: 'Host-level VM metrics exporter.',
management: 'docker',
exposure: 'internal',
port: 9100,
kind: 'http-status',
default: 'http://node-exporter:9100',
path: '/metrics',
},
{
id: 'cadvisor',
name: 'cAdvisor',
group: 'Observability',
description: 'Container-level metrics exporter.',
management: 'docker',
exposure: 'internal',
port: 8080,
kind: 'http-status',
default: 'http://cadvisor:8080',
path: '/healthz',
},
{
id: 'valkey',
name: 'Valkey',
group: 'Shared Infrastructure',
description: 'Shared cache and rate-limit backing store.',
management: 'docker',
exposure: 'internal',
kind: 'tcp',
host: 'valkey',
port: 6379,
},
{
id: 'gitea-registry',
name: 'Gitea Registry',
group: 'Shared Infrastructure',
description: 'Private npm package registry and source control service.',
management: 'docker',
exposure: 'internal',
port: 3300,
kind: 'http-json',
default: 'http://gitea-npm-registry:3000',
path: '/api/v1/version',
},
{
id: 'mailpit',
name: 'Mailpit',
group: 'Shared Infrastructure',
description: 'SMTP sink and email inspection UI.',
management: 'docker',
exposure: 'internal',
port: 8025,
kind: 'http-status',
default: 'http://mailpit:8025',
path: '/',
},
{
id: 'azurite',
name: 'Azurite',
group: 'Shared Infrastructure',
description: 'Local Azure Blob Storage emulator.',
management: 'docker',
exposure: 'internal',
kind: 'tcp',
host: 'azurite',
port: 10000,
},
{
id: 'cosmos-emulator',
name: 'Cosmos Emulator',
group: 'Shared Infrastructure',
description: 'Local Azure Cosmos DB emulator.',
management: 'docker',
exposure: 'internal',
port: 8080,
kind: 'http-status',
default: 'http://cosmos-emulator:8080',
path: '/ready',
},
{
id: 'gateway',
name: 'Traefik Gateway',
group: 'Ingress',
description: 'Legacy internal gateway and routing layer.',
management: 'docker',
exposure: 'internal',
port: 8080,
kind: 'http-status',
default: 'http://gateway:8080',
path: '/',
},
{
id: 'caddy',
name: 'Caddy',
group: 'Ingress',
description: 'HTTPS ingress and reverse proxy for internal and backend domains.',
management: 'docker',
exposure: 'public',
kind: 'tcp',
host: 'caddy',
port: 80,
},
];
export const HOST_TOOLS: HostTool[] = [
{
id: 'docker-ce',
name: 'Docker CE',
group: 'Host Tooling',
source: 'vm',
management: 'VM bootstrap',
status: 'managed',
description: 'Container runtime for the internal stack.',
},
{
id: 'docker-compose',
name: 'Docker Compose',
group: 'Host Tooling',
source: 'vm',
management: 'VM bootstrap',
status: 'managed',
description: 'Multi-service orchestration for the VM stack.',
},
{
id: 'azure-cli',
name: 'Azure CLI',
group: 'Host Tooling',
source: 'vm',
management: 'Manual install',
status: 'manual',
description: 'Azure subscription and NSG management from the VM.',
},
{
id: 'nodejs',
name: 'Node.js 22',
group: 'Host Tooling',
source: 'vm',
management: 'VM bootstrap',
status: 'managed',
description: 'Build/runtime toolchain for workspace services.',
},
{
id: 'pnpm',
name: 'pnpm',
group: 'Host Tooling',
source: 'vm',
management: 'VM bootstrap',
status: 'managed',
description: 'Workspace package manager.',
},
{
id: 'git',
name: 'git',
group: 'Host Tooling',
source: 'vm',
management: 'VM bootstrap',
status: 'managed',
description: 'Repo sync and deployment workflow tooling.',
},
{
id: 'jq',
name: 'jq',
group: 'Host Tooling',
source: 'vm',
management: 'VM bootstrap',
status: 'managed',
description: 'CLI JSON inspection used in ops and setup scripts.',
},
{
id: 'caddy-host-config',
name: 'Caddy Config',
group: 'Host Tooling',
source: 'vm',
management: 'VM file mount',
status: 'managed',
description: 'Host-mounted Caddy configuration at /opt/bytelyst/Caddyfile.',
},
];
export const RESTARTABLE_SERVICE_CONTAINERS: Record<string, string> = {
'admin-web': 'learning_ai_common_plat-admin-web-1',
'tracker-web': 'learning_ai_common_plat-tracker-web-1',
'lysnrai-dashboard': 'learning_ai_common_plat-lysnrai-dashboard-1',
'chronomind-web': 'learning_ai_common_plat-chronomind-web-1',
'jarvisjr-web': 'learning_ai_common_plat-jarvisjr-web-1',
'flowmonk-web': 'learning_ai_common_plat-flowmonk-web-1',
'notelett-web': 'learning_ai_common_plat-notelett-web-1',
'mindlyst-web': 'learning_ai_common_plat-mindlyst-web-1',
'nomgap-web': 'learning_ai_common_plat-nomgap-web-1',
'actiontrail-web': 'learning_ai_common_plat-actiontrail-web-1',
'llmlab-dashboard': 'learning_ai_common_plat-llmlab-dashboard-1',
'efforise-web': 'learning_ai_common_plat-efforise-web-1',
platform: 'learning_ai_common_plat-platform-service-1',
extraction: 'learning_ai_common_plat-extraction-service-1',
mcp: 'learning_ai_common_plat-mcp-server-1',
grafana: 'learning_ai_common_plat-grafana-1',
loki: 'learning_ai_common_plat-loki-1',
prometheus: 'learning_ai_common_plat-prometheus-1',
'node-exporter': 'learning_ai_common_plat-node-exporter-1',
cadvisor: 'learning_ai_common_plat-cadvisor-1',
valkey: 'learning_ai_common_plat-valkey-1',
'gitea-registry': 'gitea-npm-registry',
mailpit: 'learning_ai_common_plat-mailpit-1',
azurite: 'learning_ai_common_plat-azurite-1',
'cosmos-emulator': 'learning_ai_common_plat-cosmos-emulator-1',
};
async function checkHttpService(service: HttpServiceDefinition): Promise<ServiceCheck> {
const baseUrl = (service.env && process.env[service.env]) || service.default;
const target = `${baseUrl}${service.path}`;
const start = Date.now();
try {
const res = await fetch(target, {
method: 'GET',
headers: { 'Content-Type': 'application/json' },
next: { revalidate: 0 },
signal: AbortSignal.timeout(3000),
});
const latency = Date.now() - start;
if (!res.ok) {
return {
id: service.id,
name: service.name,
group: service.group,
target,
status: 'down',
latency,
message: `HTTP ${res.status}`,
lastChecked: new Date().toISOString(),
};
}
if (service.kind === 'http-json') {
const payload = await res.json().catch(() => null);
const rawStatus = payload?.status;
const isOk =
rawStatus === 'ok' ||
rawStatus === 'healthy' ||
payload?.database === 'ok' ||
payload?.commit === 'ok' ||
payload?.version;
return {
id: service.id,
name: service.name,
group: service.group,
target,
status: isOk ? 'healthy' : 'degraded',
latency,
version: payload?.version,
message: isOk ? undefined : JSON.stringify(payload),
lastChecked: new Date().toISOString(),
};
}
return {
id: service.id,
name: service.name,
group: service.group,
target,
status: 'healthy',
latency,
lastChecked: new Date().toISOString(),
};
} catch (err) {
return {
id: service.id,
name: service.name,
group: service.group,
target,
status: 'down',
latency: Date.now() - start,
message: err instanceof Error ? err.message : String(err),
lastChecked: new Date().toISOString(),
};
}
}
async function checkTcpService(service: TcpServiceDefinition): Promise<ServiceCheck> {
const start = Date.now();
const target = `${service.host}:${service.port}`;
return new Promise(resolve => {
const socket = net.createConnection({ host: service.host, port: service.port });
let settled = false;
const finish = (status: ServiceStatus, message?: string) => {
if (settled) return;
settled = true;
socket.destroy();
resolve({
id: service.id,
name: service.name,
group: service.group,
target,
status,
latency: Date.now() - start,
message,
lastChecked: new Date().toISOString(),
});
};
socket.setTimeout(3000);
socket.once('connect', () => finish('healthy'));
socket.once('timeout', () => finish('down', 'Connection timed out'));
socket.once('error', err => finish('down', err.message));
});
}
export async function collectOpsChecks(): Promise<ServiceCheck[]> {
return Promise.all(
STACK_SERVICES.map(service =>
service.kind === 'tcp' ? checkTcpService(service) : checkHttpService(service)
)
);
}
export async function collectOpsStatus(): Promise<OpsStatus> {
const services = await collectOpsChecks();
const downCount = services.filter(c => c.status === 'down').length;
const degradedCount = services.filter(c => c.status === 'degraded').length;
let overall: OpsStatus['overall'] = 'healthy';
if (downCount > 0) overall = 'critical';
else if (degradedCount > 0) overall = 'degraded';
return {
overall,
timestamp: new Date().toISOString(),
services,
};
}
export async function collectInventoryServices(): Promise<InventoryService[]> {
const checks = await collectOpsChecks();
const byId = new Map(checks.map(check => [check.id, check]));
return STACK_SERVICES.map(service => {
const check = byId.get(service.id);
return {
...(check as ServiceCheck),
description: service.description,
management: service.management,
exposure: service.exposure,
port: service.port,
restartable: Boolean(RESTARTABLE_SERVICE_CONTAINERS[service.id]),
};
});
}