bytelyst-devops-tools/dashboard/backend/src/modules/vm/prometheus.ts
Hermes VM 8d32cb7980 feat(dashboard/vm): Phases 4.1-4.3 — Prometheus trends, sparklines, weekly digest
- prometheus.ts: new Prometheus client with 7d/30d range queries for disk,
  memory, swap, CPU steal, and disk I/O (GB/hr); getWeeklyDigestData()
  aggregates all metrics for digest and API endpoint
- routes.ts: GET /api/vm/metrics/trend?metric=…&range=… and
  GET /api/vm/weekly-digest endpoints
- api.ts: TrendPoint/TrendSeries types; getTrend() and getMemoryTrend()
  added to vmApi
- vm/page.tsx: Sparkline (pure SVG polyline+fill), TrendCard with
  latest/avg/peak and threshold colouring, TrendsPanel with lazy load
  on first open; Promise.allSettled() isolation for all 5 data panels
- vm-weekly-digest.sh: weekly Telegram digest via docker exec into
  devops-backend to reach Prometheus; emoji severity indicators; cron
  summary from /var/log/vm-cleanup.log
- systemd timer: Mon 08:00 UTC, Persistent=true (fires on next boot
  if missed); first trigger 2026-06-02

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-30 05:26:49 +00:00

173 lines
6.4 KiB
TypeScript

const PROMETHEUS_BASE =
process.env.PROMETHEUS_URL ?? 'http://learning_ai_common_plat-prometheus-1:9090';
// ── Types ──────────────────────────────────────────────────────────────────
export interface TrendPoint {
t: number; // unix ms
v: number;
}
export interface TrendSeries {
metric: string;
unit: string;
points: TrendPoint[];
latest: number;
avg: number;
peak: number;
}
export interface MemoryTrend {
available: TrendSeries;
swap: TrendSeries;
}
// ── Internal helpers ────────────────────────────────────────────────────────
function rangeParams(rangeStr: string): { start: number; end: number; step: string } {
const end = Date.now();
const days = rangeStr === '30d' ? 30 : 7;
const start = end - days * 86_400_000;
const step = days <= 7 ? '1h' : '4h';
return { start, end, step };
}
async function queryRange(query: string, start: number, end: number, step: string): Promise<TrendPoint[]> {
const url = new URL(`${PROMETHEUS_BASE}/api/v1/query_range`);
url.searchParams.set('query', query);
url.searchParams.set('start', String(Math.floor(start / 1000)));
url.searchParams.set('end', String(Math.floor(end / 1000)));
url.searchParams.set('step', step);
const res = await fetch(url.toString(), {
signal: AbortSignal.timeout(15_000),
});
if (!res.ok) throw new Error(`Prometheus ${res.status}: ${res.statusText}`);
const body = (await res.json()) as {
status: string;
error?: string;
data?: { result: Array<{ values: [number, string][] }> };
};
if (body.status !== 'success') throw new Error(`Prometheus: ${body.error ?? 'unknown error'}`);
const result = body.data?.result ?? [];
if (result.length === 0) return [];
if (result.length === 1) {
return result[0].values
.map(([ts, v]) => ({ t: ts * 1000, v: parseFloat(v) }))
.filter(p => !isNaN(p.v));
}
// Multi-series (e.g. per-CPU steal) → average by timestamp
const byTime = new Map<number, number[]>();
for (const series of result) {
for (const [ts, v] of series.values) {
const ms = ts * 1000;
const val = parseFloat(v);
if (!isNaN(val)) {
const bucket = byTime.get(ms);
if (bucket) bucket.push(val);
else byTime.set(ms, [val]);
}
}
}
return Array.from(byTime.entries())
.sort(([a], [b]) => a - b)
.map(([t, vals]) => ({ t, v: vals.reduce((s, x) => s + x, 0) / vals.length }));
}
function summarize(points: TrendPoint[]): Pick<TrendSeries, 'latest' | 'avg' | 'peak'> {
if (points.length === 0) return { latest: 0, avg: 0, peak: 0 };
const vals = points.map(p => p.v);
const latest = vals[vals.length - 1];
const avg = vals.reduce((s, v) => s + v, 0) / vals.length;
const peak = Math.max(...vals);
const round1 = (n: number) => Math.round(n * 10) / 10;
return { latest: round1(latest), avg: round1(avg), peak: round1(peak) };
}
// ── Public trend queries ────────────────────────────────────────────────────
export async function getDiskTrend(range = '7d'): Promise<TrendSeries> {
const { start, end, step } = rangeParams(range);
const points = await queryRange(
'(1 - node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100',
start, end, step,
);
return { metric: 'disk', unit: '%', points, ...summarize(points) };
}
export async function getMemoryTrend(range = '7d'): Promise<MemoryTrend> {
const { start, end, step } = rangeParams(range);
const [availPts, swapPts] = await Promise.all([
queryRange('node_memory_MemAvailable_bytes / 1073741824', start, end, step),
queryRange(
'(node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes) / 1073741824',
start, end, step,
),
]);
return {
available: { metric: 'ram_available', unit: 'GB', points: availPts, ...summarize(availPts) },
swap: { metric: 'swap_used', unit: 'GB', points: swapPts, ...summarize(swapPts) },
};
}
export async function getStealTrend(range = '7d'): Promise<TrendSeries> {
const { start, end, step } = rangeParams(range);
// avg() across all CPUs so multi-CPU hosts get a single % value
const points = await queryRange(
'avg(rate(node_cpu_seconds_total{mode="steal"}[5m])) * 100',
start, end, step,
);
return { metric: 'steal', unit: '%', points, ...summarize(points) };
}
export async function getIoTrend(range = '7d'): Promise<TrendSeries> {
const { start, end, step } = rangeParams(range);
// Total VM block write rate in GB/hr (sda = primary disk).
// cAdvisor does not expose per-container blkio in this setup, so we use
// the node-exporter metric which covers all processes including invttrdg.
const points = await queryRange(
'rate(node_disk_written_bytes_total{device="sda"}[5m]) * 3600 / 1073741824',
start, end, step,
);
return { metric: 'disk_io_write', unit: 'GB/hr', points, ...summarize(points) };
}
// ── Weekly digest summary (used by digest endpoint + cron) ─────────────────
export interface WeeklyDigestData {
period: { from: string; to: string };
steal: { avg: number; peak: number };
disk: { latest: number; peak: number };
ram: { avg: number; low: number };
swap: { avg: number; peak: number };
io: { avg: number; peak: number };
}
export async function getWeeklyDigestData(): Promise<WeeklyDigestData> {
const [diskData, memData, stealData, ioData] = await Promise.all([
getDiskTrend('7d'),
getMemoryTrend('7d'),
getStealTrend('7d'),
getIoTrend('7d'),
]);
const to = new Date();
const from = new Date(to.getTime() - 7 * 86_400_000);
const ramVals = memData.available.points.map(p => p.v);
const swapVals = memData.swap.points.map(p => p.v);
return {
period: { from: from.toISOString(), to: to.toISOString() },
steal: { avg: stealData.avg, peak: stealData.peak },
disk: { latest: diskData.latest, peak: diskData.peak },
ram: { avg: memData.available.avg, low: ramVals.length ? Math.min(...ramVals) : 0 },
swap: { avg: memData.swap.avg, peak: swapVals.length ? Math.max(...swapVals) : 0 },
io: { avg: ioData.avg, peak: ioData.peak },
};
}