const PROMETHEUS_BASE = process.env.PROMETHEUS_URL ?? 'http://learning_ai_common_plat-prometheus-1:9090'; // ── Types ────────────────────────────────────────────────────────────────── export interface TrendPoint { t: number; // unix ms v: number; } export interface TrendSeries { metric: string; unit: string; points: TrendPoint[]; latest: number; avg: number; peak: number; } export interface MemoryTrend { available: TrendSeries; swap: TrendSeries; } // ── Internal helpers ──────────────────────────────────────────────────────── function rangeParams(rangeStr: string): { start: number; end: number; step: string } { const end = Date.now(); const days = rangeStr === '30d' ? 30 : 7; const start = end - days * 86_400_000; const step = days <= 7 ? '1h' : '4h'; return { start, end, step }; } async function queryRange(query: string, start: number, end: number, step: string): Promise { const url = new URL(`${PROMETHEUS_BASE}/api/v1/query_range`); url.searchParams.set('query', query); url.searchParams.set('start', String(Math.floor(start / 1000))); url.searchParams.set('end', String(Math.floor(end / 1000))); url.searchParams.set('step', step); const res = await fetch(url.toString(), { signal: AbortSignal.timeout(15_000), }); if (!res.ok) throw new Error(`Prometheus ${res.status}: ${res.statusText}`); const body = (await res.json()) as { status: string; error?: string; data?: { result: Array<{ values: [number, string][] }> }; }; if (body.status !== 'success') throw new Error(`Prometheus: ${body.error ?? 'unknown error'}`); const result = body.data?.result ?? []; if (result.length === 0) return []; if (result.length === 1) { return result[0].values .map(([ts, v]) => ({ t: ts * 1000, v: parseFloat(v) })) .filter(p => !isNaN(p.v)); } // Multi-series (e.g. per-CPU steal) → average by timestamp const byTime = new Map(); for (const series of result) { for (const [ts, v] of series.values) { const ms = ts * 1000; const val = parseFloat(v); if (!isNaN(val)) { const bucket = byTime.get(ms); if (bucket) bucket.push(val); else byTime.set(ms, [val]); } } } return Array.from(byTime.entries()) .sort(([a], [b]) => a - b) .map(([t, vals]) => ({ t, v: vals.reduce((s, x) => s + x, 0) / vals.length })); } function summarize(points: TrendPoint[]): Pick { if (points.length === 0) return { latest: 0, avg: 0, peak: 0 }; const vals = points.map(p => p.v); const latest = vals[vals.length - 1]; const avg = vals.reduce((s, v) => s + v, 0) / vals.length; const peak = Math.max(...vals); const round1 = (n: number) => Math.round(n * 10) / 10; return { latest: round1(latest), avg: round1(avg), peak: round1(peak) }; } // ── Public trend queries ──────────────────────────────────────────────────── export async function getDiskTrend(range = '7d'): Promise { const { start, end, step } = rangeParams(range); const points = await queryRange( '(1 - node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100', start, end, step, ); return { metric: 'disk', unit: '%', points, ...summarize(points) }; } export async function getMemoryTrend(range = '7d'): Promise { const { start, end, step } = rangeParams(range); const [availPts, swapPts] = await Promise.all([ queryRange('node_memory_MemAvailable_bytes / 1073741824', start, end, step), queryRange( '(node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) / 1073741824', start, end, step, ), ]); return { available: { metric: 'ram_available', unit: 'GB', points: availPts, ...summarize(availPts) }, swap: { metric: 'swap_used', unit: 'GB', points: swapPts, ...summarize(swapPts) }, }; } export async function getStealTrend(range = '7d'): Promise { const { start, end, step } = rangeParams(range); // avg() across all CPUs so multi-CPU hosts get a single % value const points = await queryRange( 'avg(rate(node_cpu_seconds_total{mode="steal"}[5m])) * 100', start, end, step, ); return { metric: 'steal', unit: '%', points, ...summarize(points) }; } export async function getIoTrend(range = '7d'): Promise { const { start, end, step } = rangeParams(range); // Total VM block write rate in GB/hr (sda = primary disk). // cAdvisor does not expose per-container blkio in this setup, so we use // the node-exporter metric which covers all processes including invttrdg. const points = await queryRange( 'rate(node_disk_written_bytes_total{device="sda"}[5m]) * 3600 / 1073741824', start, end, step, ); return { metric: 'disk_io_write', unit: 'GB/hr', points, ...summarize(points) }; } // ── Weekly digest summary (used by digest endpoint + cron) ───────────────── export interface WeeklyDigestData { period: { from: string; to: string }; steal: { avg: number; peak: number }; disk: { latest: number; peak: number }; ram: { avg: number; low: number }; swap: { avg: number; peak: number }; io: { avg: number; peak: number }; } export async function getWeeklyDigestData(): Promise { const [diskData, memData, stealData, ioData] = await Promise.all([ getDiskTrend('7d'), getMemoryTrend('7d'), getStealTrend('7d'), getIoTrend('7d'), ]); const to = new Date(); const from = new Date(to.getTime() - 7 * 86_400_000); const ramVals = memData.available.points.map(p => p.v); const swapVals = memData.swap.points.map(p => p.v); return { period: { from: from.toISOString(), to: to.toISOString() }, steal: { avg: stealData.avg, peak: stealData.peak }, disk: { latest: diskData.latest, peak: diskData.peak }, ram: { avg: memData.available.avg, low: ramVals.length ? Math.min(...ramVals) : 0 }, swap: { avg: memData.swap.avg, peak: swapVals.length ? Math.max(...swapVals) : 0 }, io: { avg: ioData.avg, peak: ioData.peak }, }; }