From bd7ebeb248cf1864035775af79dc1bf903592c12 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 31 Mar 2026 08:32:30 +0000 Subject: [PATCH] feat(admin): add VM inventory and Valkey inspector --- dashboards/admin-web/package.json | 1 + .../src/app/(dashboard)/ops/page.tsx | 604 +++++++++++++++--- .../src/app/api/ops/inventory/route.ts | 33 + .../admin-web/src/app/api/ops/status/route.ts | 270 +------- .../admin-web/src/app/api/ops/valkey/route.ts | 147 +++++ dashboards/admin-web/src/lib/ops-stack.ts | 492 ++++++++++++++ pnpm-lock.yaml | 3 + 7 files changed, 1200 insertions(+), 350 deletions(-) create mode 100644 dashboards/admin-web/src/app/api/ops/inventory/route.ts create mode 100644 dashboards/admin-web/src/app/api/ops/valkey/route.ts create mode 100644 dashboards/admin-web/src/lib/ops-stack.ts diff --git a/dashboards/admin-web/package.json b/dashboards/admin-web/package.json index eaeac27c..1fc5acdd 100644 --- a/dashboards/admin-web/package.json +++ b/dashboards/admin-web/package.json @@ -49,6 +49,7 @@ "react": "19.2.3", "react-dom": "19.2.3", "react-markdown": "^10.1.0", + "redis": "^4.7.0", "recharts": "^3.7.0", "remark-gfm": "^4.0.1", "tailwind-merge": "^3.4.0" diff --git a/dashboards/admin-web/src/app/(dashboard)/ops/page.tsx b/dashboards/admin-web/src/app/(dashboard)/ops/page.tsx index c582fad2..201a0c0d 100644 --- a/dashboards/admin-web/src/app/(dashboard)/ops/page.tsx +++ b/dashboards/admin-web/src/app/(dashboard)/ops/page.tsx @@ -1,13 +1,24 @@ 'use client'; import { useEffect, useState } from 'react'; -import { Activity, CheckCircle, ExternalLink, RefreshCw, ShieldAlert } from 'lucide-react'; +import { + Activity, + CheckCircle, + Database, + ExternalLink, + HardDrive, + RefreshCw, + Search, + ServerCog, + ShieldAlert, +} from 'lucide-react'; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'; import { Badge } from '@/components/ui/badge'; import { Progress } from '@/components/ui/progress'; import { Button } from '@/components/ui/button'; import { Skeleton } from '@/components/ui/skeleton'; +import { Input } from '@/components/ui/input'; import { Table, TableBody, @@ -35,6 +46,59 @@ interface OpsStatus { services: ServiceCheck[]; } +interface InventoryService extends ServiceCheck { + description: string; + management: 'docker' | 'vm'; + exposure: 'internal' | 'public'; + port?: number; +} + +interface HostTool { + id: string; + name: string; + group: string; + source: 'docker' | 'vm'; + management: string; + status: 'managed' | 'manual'; + description: string; +} + +interface InventoryData { + timestamp: string; + counts: { + services: number; + healthy: number; + degraded: number; + down: number; + hostTools: number; + }; + services: InventoryService[]; + hostTools: HostTool[]; +} + +interface ValkeyKey { + key: string; + type: string; + ttlSeconds: number; + size?: number; + preview?: string; +} + +interface ValkeyData { + timestamp: string; + pattern: string; + limit: number; + summary: { + ping: string; + dbsize: number; + matchedKeys: number; + version: string; + usedMemoryHuman: string; + usedMemoryPeakHuman: string; + }; + keys: ValkeyKey[]; +} + const OPS_LINKS = [ { label: 'Grafana', href: 'http://127.0.0.1:3000' }, { label: 'Prometheus', href: 'http://127.0.0.1:9090' }, @@ -46,20 +110,49 @@ const OPS_LINKS = [ export default function OpsPage() { const [data, setData] = useState(null); + const [inventory, setInventory] = useState(null); + const [valkey, setValkey] = useState(null); const [loading, setLoading] = useState(true); + const [valkeyLoading, setValkeyLoading] = useState(true); const [lastUpdated, setLastUpdated] = useState(null); const [_error, setError] = useState(null); const [countdown, setCountdown] = useState(10); + const [activeTab, setActiveTab] = useState<'overview' | 'inventory' | 'valkey'>('overview'); + const [valkeyPattern, setValkeyPattern] = useState('*'); + const [valkeyLimit, setValkeyLimit] = useState('25'); + + const fetchValkey = async (pattern = valkeyPattern, limit = valkeyLimit) => { + try { + setValkeyLoading(true); + const params = new URLSearchParams({ + pattern, + limit, + }); + const res = await fetch(`/api/ops/valkey?${params.toString()}`); + if (!res.ok) throw new Error('Failed to fetch Valkey state'); + setValkey(await res.json()); + } finally { + setValkeyLoading(false); + } + }; const fetchStatus = async () => { try { setLoading(true); - const res = await fetch('/api/ops/status'); - if (!res.ok) throw new Error('Failed to fetch status'); - const json = await res.json(); - setData(json); + const [statusRes, inventoryRes] = await Promise.all([ + fetch('/api/ops/status'), + fetch('/api/ops/inventory'), + ]); + + if (!statusRes.ok) throw new Error('Failed to fetch status'); + if (!inventoryRes.ok) throw new Error('Failed to fetch inventory'); + + const [statusJson, inventoryJson] = await Promise.all([statusRes.json(), inventoryRes.json()]); + setData(statusJson); + setInventory(inventoryJson); setLastUpdated(new Date()); setError(null); + await fetchValkey(); } catch (err) { setError(String(err)); } finally { @@ -73,7 +166,7 @@ export default function OpsPage() { const timer = setInterval(() => { setCountdown(prev => { if (prev <= 1) { - fetchStatus(); // trigger refresh + fetchStatus(); return 10; } return prev - 1; @@ -114,7 +207,6 @@ export default function OpsPage() { - {/* Global Status Banner */} {data && ( )} - - - Ops Links - Direct entry points for internal monitoring and health review. - - - {OPS_LINKS.map(link => ( - - {link.label} - - - ))} - - - - {/* Service Grid */} -
- {data?.services.map(svc => ( - - - {svc.name} - - - -
- - {svc.status} - -
- {svc.latency}ms -
-
- -
-
- Group - {svc.group} -
-
- {svc.target} -
-
- Uptime (30d) - 99.9% -
- -
- - {svc.message && ( -
- {svc.message} -
- )} - -
- v{svc.version || '?'} - {new Date(svc.lastChecked).toLocaleTimeString()} -
-
-
+
+ {[ + { id: 'overview', label: 'Overview', icon: Activity }, + { id: 'inventory', label: 'VM Inventory', icon: ServerCog }, + { id: 'valkey', label: 'Valkey Inspector', icon: Database }, + ].map(({ id, label, icon: Icon }) => ( + ))} - - {!data && - loading && - Array.from({ length: 5 }).map((_, i) => ( - - - - - - - - - ))}
- {/* Dependency Matrix (Static for now) */} + {activeTab === 'overview' && ( + <> + + + Ops Links + + Direct entry points for internal monitoring and health review. + + + + {OPS_LINKS.map(link => ( + + {link.label} + + + ))} + + + +
+ {data?.services.map(svc => ( + + + {svc.name} + + + +
+ + {svc.status} + +
+ {svc.latency}ms +
+
+ +
+
+ Group + {svc.group} +
+
+ {svc.target} +
+
+ Uptime (30d) + 99.9% +
+ +
+ + {svc.message && ( +
+ {svc.message} +
+ )} + +
+ v{svc.version || '?'} + {new Date(svc.lastChecked).toLocaleTimeString()} +
+
+
+ ))} + + {!data && + loading && + Array.from({ length: 5 }).map((_, i) => ( + + + + + + + + + ))} +
+ + )} + + {activeTab === 'inventory' && ( + <> +
+ + + + Managed Services + + + +
{inventory?.counts.services ?? 0}
+
+
+ + + Healthy + + +
+ {inventory?.counts.healthy ?? 0} +
+
+
+ + + Down + + +
{inventory?.counts.down ?? 0}
+
+
+ + + + Host Tools + + + +
{inventory?.counts.hostTools ?? 0}
+
+
+
+ + + + Service Inventory + Live Docker-managed stack reachable from the admin container. + + + + + + Service + Group + Status + Exposure + Port + Target + + + + {inventory?.services.map(service => ( + + +
{service.name}
+
{service.description}
+
+ {service.group} + + + {service.status} + + + + {service.exposure} + + {service.port ?? 'n/a'} + + {service.target} + +
+ ))} +
+
+
+
+ + + + VM Tooling + Host-level tools and mounted config that support the stack. + + + + + + Tool + Group + Management + Status + Description + + + + {inventory?.hostTools.map(tool => ( + + {tool.name} + {tool.group} + {tool.management} + + + {tool.status} + + + {tool.description} + + ))} + +
+
+
+ + )} + + {activeTab === 'valkey' && ( + <> +
+ + + Ping + + +
{valkey?.summary.ping ?? '--'}
+
+
+ + + + DB Size + + + +
{valkey?.summary.dbsize ?? 0}
+
+
+ + + + Used Memory + + + +
{valkey?.summary.usedMemoryHuman ?? '--'}
+
+
+ + + + Peak Memory + + + +
+ {valkey?.summary.usedMemoryPeakHuman ?? '--'} +
+
+
+
+ + + + Key Explorer + + Read-only Valkey inspection for keys, TTLs, and small previews. + + + +
+
+ setValkeyPattern(event.target.value)} + placeholder="Pattern, e.g. extraction:*" + /> +
+
+ setValkeyLimit(event.target.value)} + placeholder="25" + /> +
+ +
+ +
+
+
Version
+
{valkey?.summary.version ?? '--'}
+
+
+
Pattern
+
{valkey?.pattern ?? '*'}
+
+
+
Matched
+
{valkey?.summary.matchedKeys ?? 0}
+
+
+ + + + + Key + Type + TTL + Size + Preview + + + + {valkeyLoading && ( + + +
+ + Loading Valkey state... +
+
+
+ )} + {!valkeyLoading && + valkey?.keys.map(item => ( + + + {item.key} + + + {item.type} + + {item.ttlSeconds < 0 ? 'persistent' : `${item.ttlSeconds}s`} + {item.size ?? 'n/a'} + + {item.preview ?? 'No preview'} + + + ))} +
+
+
+
+ + )} + Infrastructure Dependencies @@ -258,9 +632,19 @@ export default function OpsPage() { - Stripe API - Payments - Global + Azure Blob Storage + Storage + Local Emulator + + + Operational + + + + + VM Disk + Runtime + Host + Azure VM Operational @@ -271,6 +655,42 @@ export default function OpsPage() { + + + + Internal Stack Coverage + What this admin ops surface covers today. + + +
+
+ + Health Review +
+

+ Live status for dashboards, core services, observability, ingress, and shared infrastructure. +

+
+
+
+ + Valkey Visibility +
+

+ Read-only key inspection with type, TTL, size, and preview for current internal data. +

+
+
+
+ + VM Tooling +
+

+ Inventory of Docker-managed services plus host tools used to run and operate the VM. +

+
+
+
); } diff --git a/dashboards/admin-web/src/app/api/ops/inventory/route.ts b/dashboards/admin-web/src/app/api/ops/inventory/route.ts new file mode 100644 index 00000000..71aabee5 --- /dev/null +++ b/dashboards/admin-web/src/app/api/ops/inventory/route.ts @@ -0,0 +1,33 @@ +import { NextRequest, NextResponse } from 'next/server'; +import { requireAdmin } from '@/lib/auth-server'; +import { HOST_TOOLS, collectInventoryServices } from '@/lib/ops-stack'; + +export const dynamic = 'force-dynamic'; + +export async function GET(req: NextRequest) { + try { + const admin = await requireAdmin(req); + if (!admin) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); + + const services = await collectInventoryServices(); + const counts = { + services: services.length, + healthy: services.filter(service => service.status === 'healthy').length, + degraded: services.filter(service => service.status === 'degraded').length, + down: services.filter(service => service.status === 'down').length, + hostTools: HOST_TOOLS.length, + }; + + return NextResponse.json({ + timestamp: new Date().toISOString(), + counts, + services, + hostTools: HOST_TOOLS, + }); + } catch (error) { + return NextResponse.json( + { error: error instanceof Error ? error.message : 'Unauthorized' }, + { status: 401 } + ); + } +} diff --git a/dashboards/admin-web/src/app/api/ops/status/route.ts b/dashboards/admin-web/src/app/api/ops/status/route.ts index cba72f55..43986681 100644 --- a/dashboards/admin-web/src/app/api/ops/status/route.ts +++ b/dashboards/admin-web/src/app/api/ops/status/route.ts @@ -1,265 +1,19 @@ -import net from 'node:net'; -import { NextResponse } from 'next/server'; +import { NextRequest, NextResponse } from 'next/server'; +import { requireAdmin } from '@/lib/auth-server'; +import { collectOpsStatus } from '@/lib/ops-stack'; export const dynamic = 'force-dynamic'; -type ServiceStatus = 'healthy' | 'degraded' | 'down' | 'maintenance'; -type CheckKind = 'http-json' | 'http-status' | 'tcp'; - -interface ServiceCheck { - id: string; - name: string; - group: string; - target: string; - status: ServiceStatus; - latency: number; - version?: string; - message?: string; - lastChecked: string; -} - -interface OpsStatus { - overall: 'healthy' | 'degraded' | 'critical'; - timestamp: string; - services: ServiceCheck[]; -} - -interface HttpServiceDefinition { - id: string; - name: string; - group: string; - kind: 'http-json' | 'http-status'; - env?: string; - default: string; - path: string; -} - -interface TcpServiceDefinition { - id: string; - name: string; - group: string; - kind: 'tcp'; - host: string; - port: number; -} - -type ServiceDefinition = HttpServiceDefinition | TcpServiceDefinition; - -const SERVICES: ServiceDefinition[] = [ - { - id: 'admin-web', - name: 'Admin Dashboard', - group: 'Dashboards', - kind: 'http-status', - default: 'http://admin-web:3001', - path: '/api/health', - }, - { - id: 'tracker-web', - name: 'Tracker Dashboard', - group: 'Dashboards', - kind: 'http-status', - default: 'http://tracker-web:3003', - path: '/api/health', - }, - { - id: 'platform', - name: 'Platform Service', - group: 'Core Services', - env: 'PLATFORM_SERVICE_URL', - kind: 'http-json', - default: 'http://platform-service:4003', - path: '/health', - }, - { - id: 'extraction', - name: 'Extraction Service', - group: 'Core Services', - env: 'EXTRACTION_SERVICE_URL', - kind: 'http-json', - default: 'http://extraction-service:4005', - path: '/health', - }, - { - id: 'mcp', - name: 'MCP Server', - group: 'Core Services', - env: 'MCP_SERVER_URL', - kind: 'http-json', - default: 'http://mcp-server:4007', - path: '/health', - }, - { - id: 'grafana', - name: 'Grafana', - group: 'Observability', - kind: 'http-json', - default: 'http://grafana:3000', - path: '/api/health', - }, - { - id: 'loki', - name: 'Loki', - group: 'Observability', - kind: 'http-status', - default: 'http://loki:3100', - path: '/ready', - }, - { - id: 'prometheus', - name: 'Prometheus', - group: 'Observability', - kind: 'http-status', - default: 'http://prometheus:9090', - path: '/-/healthy', - }, - { - id: 'node-exporter', - name: 'Node Exporter', - group: 'Observability', - kind: 'http-status', - default: 'http://node-exporter:9100', - path: '/metrics', - }, - { - id: 'cadvisor', - name: 'cAdvisor', - group: 'Observability', - kind: 'http-status', - default: 'http://cadvisor:8080', - path: '/healthz', - }, - { - id: 'valkey', - name: 'Valkey', - group: 'Shared Infrastructure', - kind: 'tcp', - host: 'valkey', - port: 6379, - }, -]; - -async function checkHttpService(service: HttpServiceDefinition): Promise { - const baseUrl = (service.env && process.env[service.env]) || service.default; - const target = `${baseUrl}${service.path}`; - const start = Date.now(); - +export async function GET(req: NextRequest) { try { - const res = await fetch(target, { - method: 'GET', - headers: { 'Content-Type': 'application/json' }, - next: { revalidate: 0 }, - signal: AbortSignal.timeout(3000), - }); + const admin = await requireAdmin(req); + if (!admin) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); - const latency = Date.now() - start; - - if (!res.ok) { - return { - id: service.id, - name: service.name, - group: service.group, - target, - status: 'down', - latency, - message: `HTTP ${res.status}`, - lastChecked: new Date().toISOString(), - }; - } - - if (service.kind === 'http-json') { - const payload = await res.json().catch(() => null); - const rawStatus = payload?.status; - const isOk = - rawStatus === 'ok' || - rawStatus === 'healthy' || - payload?.database === 'ok' || - payload?.commit === 'ok'; - - return { - id: service.id, - name: service.name, - group: service.group, - target, - status: isOk ? 'healthy' : 'degraded', - latency, - version: payload?.version, - message: isOk ? undefined : JSON.stringify(payload), - lastChecked: new Date().toISOString(), - }; - } - - return { - id: service.id, - name: service.name, - group: service.group, - target, - status: 'healthy', - latency, - lastChecked: new Date().toISOString(), - }; - } catch (err) { - return { - id: service.id, - name: service.name, - group: service.group, - target, - status: 'down', - latency: Date.now() - start, - message: err instanceof Error ? err.message : String(err), - lastChecked: new Date().toISOString(), - }; + return NextResponse.json(await collectOpsStatus()); + } catch (error) { + return NextResponse.json( + { error: error instanceof Error ? error.message : 'Unauthorized' }, + { status: 401 } + ); } } - -async function checkTcpService(service: TcpServiceDefinition): Promise { - const start = Date.now(); - const target = `${service.host}:${service.port}`; - - return new Promise(resolve => { - const socket = net.createConnection({ host: service.host, port: service.port }); - let settled = false; - - const finish = (status: ServiceStatus, message?: string) => { - if (settled) return; - settled = true; - socket.destroy(); - resolve({ - id: service.id, - name: service.name, - group: service.group, - target, - status, - latency: Date.now() - start, - message, - lastChecked: new Date().toISOString(), - }); - }; - - socket.setTimeout(3000); - socket.once('connect', () => finish('healthy')); - socket.once('timeout', () => finish('down', 'Connection timed out')); - socket.once('error', err => finish('down', err.message)); - }); -} - -export async function GET() { - const checks = await Promise.all( - SERVICES.map(service => - service.kind === 'tcp' ? checkTcpService(service) : checkHttpService(service) - ) - ); - - const downCount = checks.filter(c => c.status === 'down').length; - const degradedCount = checks.filter(c => c.status === 'degraded').length; - - let overall: OpsStatus['overall'] = 'healthy'; - if (downCount > 0) overall = 'critical'; - else if (degradedCount > 0) overall = 'degraded'; - - return NextResponse.json({ - overall, - timestamp: new Date().toISOString(), - services: checks, - } satisfies OpsStatus); -} diff --git a/dashboards/admin-web/src/app/api/ops/valkey/route.ts b/dashboards/admin-web/src/app/api/ops/valkey/route.ts new file mode 100644 index 00000000..fa00e4e7 --- /dev/null +++ b/dashboards/admin-web/src/app/api/ops/valkey/route.ts @@ -0,0 +1,147 @@ +import { NextRequest, NextResponse } from 'next/server'; +import { createClient } from 'redis'; +import { requireAdmin } from '@/lib/auth-server'; + +export const dynamic = 'force-dynamic'; + +interface ValkeyPreview { + key: string; + type: string; + ttlSeconds: number; + size?: number; + preview?: string; +} + +function sanitizePattern(input: string | null): string { + const value = (input || '*').trim(); + if (!value) return '*'; + if (value.length > 120 || /[\r\n]/.test(value)) { + throw new Error('Invalid key pattern'); + } + return value; +} + +function truncate(value: string, max = 160): string { + return value.length > max ? `${value.slice(0, max)}...` : value; +} + +function parseInfoValue(info: string, key: string): string | undefined { + const line = info + .split('\n') + .map(item => item.trim()) + .find(item => item.startsWith(`${key}:`)); + + return line?.split(':').slice(1).join(':'); +} + +async function getPreview(client: ReturnType, key: string): Promise { + const [type, ttlSeconds] = await Promise.all([client.type(key), client.ttl(key)]); + + if (type === 'string') { + const value = await client.get(key); + return { key, type, ttlSeconds, preview: truncate(value ?? '') }; + } + + if (type === 'hash') { + const [size, entries] = await Promise.all([client.hLen(key), client.hGetAll(key)]); + return { + key, + type, + ttlSeconds, + size, + preview: truncate(JSON.stringify(Object.fromEntries(Object.entries(entries).slice(0, 5)))), + }; + } + + if (type === 'list') { + const [size, entries] = await Promise.all([client.lLen(key), client.lRange(key, 0, 4)]); + return { + key, + type, + ttlSeconds, + size, + preview: truncate(JSON.stringify(entries)), + }; + } + + if (type === 'set') { + const [size, entries] = await Promise.all([client.sCard(key), client.sMembers(key)]); + return { + key, + type, + ttlSeconds, + size, + preview: truncate(JSON.stringify(entries.slice(0, 5))), + }; + } + + if (type === 'zset') { + const [size, entries] = await Promise.all([client.zCard(key), client.zRangeWithScores(key, 0, 4)]); + return { + key, + type, + ttlSeconds, + size, + preview: truncate(JSON.stringify(entries)), + }; + } + + return { key, type, ttlSeconds }; +} + +export async function GET(req: NextRequest) { + try { + const admin = await requireAdmin(req); + if (!admin) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); + + const url = new URL(req.url); + const pattern = sanitizePattern(url.searchParams.get('pattern')); + const limit = Math.min(Math.max(Number(url.searchParams.get('limit') || '25'), 1), 100); + const client = createClient({ + url: process.env.VALKEY_URL || 'redis://valkey:6379', + }); + + try { + await client.connect(); + const [ping, dbsize, serverInfo, memoryInfo] = await Promise.all([ + client.ping(), + client.dbSize(), + client.info('server'), + client.info('memory'), + ]); + + const keys: string[] = []; + for await (const key of client.scanIterator({ + MATCH: pattern, + COUNT: Math.min(limit * 2, 200), + })) { + keys.push(key); + if (keys.length >= limit) break; + } + + const previews = await Promise.all(keys.map(key => getPreview(client, key))); + + return NextResponse.json({ + timestamp: new Date().toISOString(), + pattern, + limit, + summary: { + ping, + dbsize, + matchedKeys: previews.length, + version: parseInfoValue(serverInfo, 'redis_version') || 'unknown', + usedMemoryHuman: parseInfoValue(memoryInfo, 'used_memory_human') || 'unknown', + usedMemoryPeakHuman: parseInfoValue(memoryInfo, 'used_memory_peak_human') || 'unknown', + }, + keys: previews, + }); + } finally { + if (client.isOpen) { + await client.quit(); + } + } + } catch (error) { + const message = error instanceof Error ? error.message : 'Unable to inspect Valkey'; + return NextResponse.json({ error: message }, { status: message === 'Unauthorized' ? 401 : 500 }); + } +} diff --git a/dashboards/admin-web/src/lib/ops-stack.ts b/dashboards/admin-web/src/lib/ops-stack.ts new file mode 100644 index 00000000..4db35f81 --- /dev/null +++ b/dashboards/admin-web/src/lib/ops-stack.ts @@ -0,0 +1,492 @@ +import net from 'node:net'; + +export type ServiceStatus = 'healthy' | 'degraded' | 'down' | 'maintenance'; +export type CheckKind = 'http-json' | 'http-status' | 'tcp'; +export type InventorySource = 'docker' | 'vm'; + +export interface ServiceCheck { + id: string; + name: string; + group: string; + target: string; + status: ServiceStatus; + latency: number; + version?: string; + message?: string; + lastChecked: string; +} + +export interface OpsStatus { + overall: 'healthy' | 'degraded' | 'critical'; + timestamp: string; + services: ServiceCheck[]; +} + +interface BaseDefinition { + id: string; + name: string; + group: string; + description: string; + management: InventorySource; + exposure: 'internal' | 'public'; + port?: number; +} + +interface HttpServiceDefinition extends BaseDefinition { + kind: 'http-json' | 'http-status'; + env?: string; + default: string; + path: string; +} + +interface TcpServiceDefinition extends BaseDefinition { + kind: 'tcp'; + host: string; + port: number; +} + +export type ServiceDefinition = HttpServiceDefinition | TcpServiceDefinition; + +export interface InventoryService extends ServiceCheck { + description: string; + management: InventorySource; + exposure: 'internal' | 'public'; + port?: number; +} + +export interface HostTool { + id: string; + name: string; + group: string; + source: InventorySource; + management: string; + status: 'managed' | 'manual'; + description: string; +} + +export const STACK_SERVICES: ServiceDefinition[] = [ + { + id: 'admin-web', + name: 'Admin Dashboard', + group: 'Dashboards', + description: 'Internal admin portal for platform review and ops workflows.', + management: 'docker', + exposure: 'internal', + port: 3001, + kind: 'http-status', + default: 'http://admin-web:3001', + path: '/api/health', + }, + { + id: 'tracker-web', + name: 'Tracker Dashboard', + group: 'Dashboards', + description: 'Internal tracker UI for issue and delivery review.', + management: 'docker', + exposure: 'internal', + port: 3003, + kind: 'http-status', + default: 'http://tracker-web:3003', + path: '/api/health', + }, + { + id: 'platform', + name: 'Platform Service', + group: 'Core Services', + description: 'Core API and auth platform service.', + management: 'docker', + exposure: 'internal', + port: 4003, + env: 'PLATFORM_SERVICE_URL', + kind: 'http-json', + default: 'http://platform-service:4003', + path: '/health', + }, + { + id: 'extraction', + name: 'Extraction Service', + group: 'Core Services', + description: 'Structured extraction service with product-aware throttling.', + management: 'docker', + exposure: 'internal', + port: 4005, + env: 'EXTRACTION_SERVICE_URL', + kind: 'http-json', + default: 'http://extraction-service:4005', + path: '/health', + }, + { + id: 'mcp', + name: 'MCP Server', + group: 'Core Services', + description: 'Internal MCP integration surface.', + management: 'docker', + exposure: 'internal', + port: 4007, + env: 'MCP_SERVER_URL', + kind: 'http-json', + default: 'http://mcp-server:4007', + path: '/health', + }, + { + id: 'grafana', + name: 'Grafana', + group: 'Observability', + description: 'Metrics and logs visualization.', + management: 'docker', + exposure: 'internal', + port: 3000, + kind: 'http-json', + default: 'http://grafana:3000', + path: '/api/health', + }, + { + id: 'loki', + name: 'Loki', + group: 'Observability', + description: 'Centralized log aggregation.', + management: 'docker', + exposure: 'internal', + port: 3100, + kind: 'http-status', + default: 'http://loki:3100', + path: '/ready', + }, + { + id: 'prometheus', + name: 'Prometheus', + group: 'Observability', + description: 'Internal metrics scraping and query engine.', + management: 'docker', + exposure: 'internal', + port: 9090, + kind: 'http-status', + default: 'http://prometheus:9090', + path: '/-/healthy', + }, + { + id: 'node-exporter', + name: 'Node Exporter', + group: 'Observability', + description: 'Host-level VM metrics exporter.', + management: 'docker', + exposure: 'internal', + port: 9100, + kind: 'http-status', + default: 'http://node-exporter:9100', + path: '/metrics', + }, + { + id: 'cadvisor', + name: 'cAdvisor', + group: 'Observability', + description: 'Container-level metrics exporter.', + management: 'docker', + exposure: 'internal', + port: 8080, + kind: 'http-status', + default: 'http://cadvisor:8080', + path: '/healthz', + }, + { + id: 'valkey', + name: 'Valkey', + group: 'Shared Infrastructure', + description: 'Shared cache and rate-limit backing store.', + management: 'docker', + exposure: 'internal', + kind: 'tcp', + host: 'valkey', + port: 6379, + }, + { + id: 'gitea-registry', + name: 'Gitea Registry', + group: 'Shared Infrastructure', + description: 'Private npm package registry and source control service.', + management: 'docker', + exposure: 'internal', + port: 3300, + kind: 'http-json', + default: 'http://gitea-npm-registry:3000', + path: '/api/v1/version', + }, + { + id: 'mailpit', + name: 'Mailpit', + group: 'Shared Infrastructure', + description: 'SMTP sink and email inspection UI.', + management: 'docker', + exposure: 'internal', + port: 8025, + kind: 'http-status', + default: 'http://mailpit:8025', + path: '/', + }, + { + id: 'azurite', + name: 'Azurite', + group: 'Shared Infrastructure', + description: 'Local Azure Blob Storage emulator.', + management: 'docker', + exposure: 'internal', + kind: 'tcp', + host: 'azurite', + port: 10000, + }, + { + id: 'cosmos-emulator', + name: 'Cosmos Emulator', + group: 'Shared Infrastructure', + description: 'Local Azure Cosmos DB emulator.', + management: 'docker', + exposure: 'internal', + port: 8080, + kind: 'http-status', + default: 'http://cosmos-emulator:8080', + path: '/ready', + }, + { + id: 'gateway', + name: 'Traefik Gateway', + group: 'Ingress', + description: 'Legacy internal gateway and routing layer.', + management: 'docker', + exposure: 'internal', + port: 8080, + kind: 'http-status', + default: 'http://gateway:8080', + path: '/', + }, + { + id: 'caddy', + name: 'Caddy', + group: 'Ingress', + description: 'HTTPS ingress and reverse proxy for internal and backend domains.', + management: 'docker', + exposure: 'public', + kind: 'tcp', + host: 'caddy', + port: 80, + }, +]; + +export const HOST_TOOLS: HostTool[] = [ + { + id: 'docker-ce', + name: 'Docker CE', + group: 'Host Tooling', + source: 'vm', + management: 'VM bootstrap', + status: 'managed', + description: 'Container runtime for the internal stack.', + }, + { + id: 'docker-compose', + name: 'Docker Compose', + group: 'Host Tooling', + source: 'vm', + management: 'VM bootstrap', + status: 'managed', + description: 'Multi-service orchestration for the VM stack.', + }, + { + id: 'azure-cli', + name: 'Azure CLI', + group: 'Host Tooling', + source: 'vm', + management: 'Manual install', + status: 'manual', + description: 'Azure subscription and NSG management from the VM.', + }, + { + id: 'nodejs', + name: 'Node.js 22', + group: 'Host Tooling', + source: 'vm', + management: 'VM bootstrap', + status: 'managed', + description: 'Build/runtime toolchain for workspace services.', + }, + { + id: 'pnpm', + name: 'pnpm', + group: 'Host Tooling', + source: 'vm', + management: 'VM bootstrap', + status: 'managed', + description: 'Workspace package manager.', + }, + { + id: 'git', + name: 'git', + group: 'Host Tooling', + source: 'vm', + management: 'VM bootstrap', + status: 'managed', + description: 'Repo sync and deployment workflow tooling.', + }, + { + id: 'jq', + name: 'jq', + group: 'Host Tooling', + source: 'vm', + management: 'VM bootstrap', + status: 'managed', + description: 'CLI JSON inspection used in ops and setup scripts.', + }, + { + id: 'caddy-host-config', + name: 'Caddy Config', + group: 'Host Tooling', + source: 'vm', + management: 'VM file mount', + status: 'managed', + description: 'Host-mounted Caddy configuration at /opt/bytelyst/Caddyfile.', + }, +]; + +async function checkHttpService(service: HttpServiceDefinition): Promise { + const baseUrl = (service.env && process.env[service.env]) || service.default; + const target = `${baseUrl}${service.path}`; + const start = Date.now(); + + try { + const res = await fetch(target, { + method: 'GET', + headers: { 'Content-Type': 'application/json' }, + next: { revalidate: 0 }, + signal: AbortSignal.timeout(3000), + }); + + const latency = Date.now() - start; + + if (!res.ok) { + return { + id: service.id, + name: service.name, + group: service.group, + target, + status: 'down', + latency, + message: `HTTP ${res.status}`, + lastChecked: new Date().toISOString(), + }; + } + + if (service.kind === 'http-json') { + const payload = await res.json().catch(() => null); + const rawStatus = payload?.status; + const isOk = + rawStatus === 'ok' || + rawStatus === 'healthy' || + payload?.database === 'ok' || + payload?.commit === 'ok' || + payload?.version; + + return { + id: service.id, + name: service.name, + group: service.group, + target, + status: isOk ? 'healthy' : 'degraded', + latency, + version: payload?.version, + message: isOk ? undefined : JSON.stringify(payload), + lastChecked: new Date().toISOString(), + }; + } + + return { + id: service.id, + name: service.name, + group: service.group, + target, + status: 'healthy', + latency, + lastChecked: new Date().toISOString(), + }; + } catch (err) { + return { + id: service.id, + name: service.name, + group: service.group, + target, + status: 'down', + latency: Date.now() - start, + message: err instanceof Error ? err.message : String(err), + lastChecked: new Date().toISOString(), + }; + } +} + +async function checkTcpService(service: TcpServiceDefinition): Promise { + const start = Date.now(); + const target = `${service.host}:${service.port}`; + + return new Promise(resolve => { + const socket = net.createConnection({ host: service.host, port: service.port }); + let settled = false; + + const finish = (status: ServiceStatus, message?: string) => { + if (settled) return; + settled = true; + socket.destroy(); + resolve({ + id: service.id, + name: service.name, + group: service.group, + target, + status, + latency: Date.now() - start, + message, + lastChecked: new Date().toISOString(), + }); + }; + + socket.setTimeout(3000); + socket.once('connect', () => finish('healthy')); + socket.once('timeout', () => finish('down', 'Connection timed out')); + socket.once('error', err => finish('down', err.message)); + }); +} + +export async function collectOpsChecks(): Promise { + return Promise.all( + STACK_SERVICES.map(service => + service.kind === 'tcp' ? checkTcpService(service) : checkHttpService(service) + ) + ); +} + +export async function collectOpsStatus(): Promise { + const services = await collectOpsChecks(); + const downCount = services.filter(c => c.status === 'down').length; + const degradedCount = services.filter(c => c.status === 'degraded').length; + + let overall: OpsStatus['overall'] = 'healthy'; + if (downCount > 0) overall = 'critical'; + else if (degradedCount > 0) overall = 'degraded'; + + return { + overall, + timestamp: new Date().toISOString(), + services, + }; +} + +export async function collectInventoryServices(): Promise { + const checks = await collectOpsChecks(); + const byId = new Map(checks.map(check => [check.id, check])); + + return STACK_SERVICES.map(service => { + const check = byId.get(service.id); + return { + ...(check as ServiceCheck), + description: service.description, + management: service.management, + exposure: service.exposure, + port: service.port, + }; + }); +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c10436d5..b6c914a5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -156,6 +156,9 @@ importers: recharts: specifier: ^3.7.0 version: 3.7.0(@types/react@19.2.14)(react-dom@19.2.3(react@19.2.3))(react-is@18.3.1)(react@19.2.3)(redux@5.0.1) + redis: + specifier: ^4.7.0 + version: 4.7.1 remark-gfm: specifier: ^4.0.1 version: 4.0.1