feat: complete hermes telemetry dashboard wiring

2026-05-31 08:28:12 +00:00 · 2026-05-31 08:28:12 +00:00 · 02b362399b
commit 02b362399b
parent 38aefb05e4
31 changed files with 1695 additions and 43 deletions
--- a/dashboard/DEPLOYMENT.md
+++ b/dashboard/DEPLOYMENT.md
@ -10,6 +10,10 @@ This guide covers deploying both the DevOps Dashboard and Platform Admin Dashboa

 ## Public URLs

+For the full living bookmark list across all ByteLyst apps, APIs, Hermes
+dashboards, and last deploy timestamps, see
+[`../docs/app-url-bookmarks.md`](../docs/app-url-bookmarks.md).
+
 - **DevOps Dashboard**: `https://devops.bytelyst.com`
 - **Admin Dashboard**: `https://admin.bytelyst.com`
 - **API Gateway**: `https://api.bytelyst.com`
--- a/dashboard/backend/src/lib/dashboard-alerts.test.ts
+++ b/dashboard/backend/src/lib/dashboard-alerts.test.ts
@ -0,0 +1,44 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+const appendFileMock = vi.hoisted(() => vi.fn());
+vi.mock('fs/promises', () => ({ appendFile: appendFileMock }));
+
+const { appendDashboardWarning, clearDashboardWarningDedupe } = await import('./dashboard-alerts.js');
+
+describe('dashboard-alerts', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    clearDashboardWarningDedupe();
+    delete process.env.HERMES_DASHBOARD_ALERT_LOG;
+  });
+
+  it('does nothing when the alert log is not configured', async () => {
+    const wrote = await appendDashboardWarning({ severity: 'warn', instance: 'vijay', message: 'gateway down' });
+    expect(wrote).toBe(false);
+    expect(appendFileMock).not.toHaveBeenCalled();
+  });
+
+  it('writes a routed warning line when configured', async () => {
+    process.env.HERMES_DASHBOARD_ALERT_LOG = '/tmp/hermes-dashboard-warnings.log';
+    const wrote = await appendDashboardWarning(
+      { severity: 'critical', instance: 'bheem', message: 'backup missing' },
+      Date.parse('2026-05-31T07:00:00Z'),
+    );
+
+    expect(wrote).toBe(true);
+    expect(appendFileMock).toHaveBeenCalledWith(
+      '/tmp/hermes-dashboard-warnings.log',
+      '2026-05-31T07:00:00.000Z CRITICAL instance=bheem backup missing\n',
+      'utf8',
+    );
+  });
+
+  it('deduplicates for one hour and writes again after expiry', async () => {
+    process.env.HERMES_DASHBOARD_ALERT_LOG = '/tmp/hermes-dashboard-warnings.log';
+    const input = { severity: 'warn' as const, instance: 'all' as const, message: 'shared warning' };
+    expect(await appendDashboardWarning(input, 1_000)).toBe(true);
+    expect(await appendDashboardWarning(input, 2_000)).toBe(false);
+    expect(await appendDashboardWarning(input, 3_602_000)).toBe(true);
+    expect(appendFileMock).toHaveBeenCalledTimes(2);
+  });
+});
--- a/dashboard/backend/src/lib/dashboard-alerts.ts
+++ b/dashboard/backend/src/lib/dashboard-alerts.ts
@ -0,0 +1,48 @@
+import { appendFile } from 'fs/promises';
+
+type AlertSeverity = 'info' | 'warn' | 'critical';
+type AlertInstance = 'vijay' | 'bheem' | 'all';
+
+interface DashboardWarningInput {
+  severity: AlertSeverity;
+  instance: AlertInstance;
+  message: string;
+}
+
+const DEDUPE_WINDOW_MS = 60 * 60 * 1000;
+const recent = new Map<string, number>();
+
+function severityToken(severity: AlertSeverity): string {
+  if (severity === 'critical') return 'CRITICAL';
+  if (severity === 'warn') return 'WARNING';
+  return 'INFO';
+}
+
+function alertKey(input: DashboardWarningInput): string {
+  return `${input.severity}\0${input.instance}\0${input.message}`;
+}
+
+function purgeExpired(now: number): void {
+  for (const [key, at] of recent) {
+    if (now - at > DEDUPE_WINDOW_MS) recent.delete(key);
+  }
+}
+
+export async function appendDashboardWarning(input: DashboardWarningInput, now = Date.now()): Promise<boolean> {
+  const logPath = process.env.HERMES_DASHBOARD_ALERT_LOG;
+  if (!logPath) return false;
+
+  purgeExpired(now);
+  const key = alertKey(input);
+  const previous = recent.get(key);
+  if (previous && now - previous <= DEDUPE_WINDOW_MS) return false;
+
+  recent.set(key, now);
+  const line = `${new Date(now).toISOString()} ${severityToken(input.severity)} instance=${input.instance} ${input.message}\n`;
+  await appendFile(logPath, line, 'utf8');
+  return true;
+}
+
+export function clearDashboardWarningDedupe(): void {
+  recent.clear();
+}
--- a/dashboard/backend/src/modules/hermes-ops/hermes-ops.test.ts
+++ b/dashboard/backend/src/modules/hermes-ops/hermes-ops.test.ts
@ -146,6 +146,49 @@ describe('hermes-ops repository', () => {
    expect(bheem.gateway.status).toBe('up');
  });

+  it('prefers a sanitized per-instance ops export when one is present', async () => {
+    setExec(healthyHandler());
+    readFileMock.mockImplementation(async (p: string) => {
+      if (p === '/home/uma/.hermes/ops-export.json') {
+        return JSON.stringify({
+          gateway: { active: false, enabled: true, status: 'down' },
+          dashboard: { active: false, status: 'down' },
+          backupTimer: {
+            name: 'uma-hermes-backup.timer',
+            active: false,
+            status: 'down',
+            nextRun: null,
+            lastRun: null,
+          },
+          repo: {
+            path: '/home/uma/repos/uma_hostinger_hermes_vm',
+            branch: 'main',
+            clean: true,
+            head: 'export1',
+            lastCommitAt: '2026-05-31T00:00:00Z',
+            size: '1M',
+            status: 'up',
+          },
+          restoredFileCount: 42,
+          restoredCronJobs: 3,
+          googleWorkspaceToken: true,
+        });
+      }
+      if (p.endsWith('MANIFEST.json')) return JSON.stringify({ files: [1, 2, 3] });
+      if (p.endsWith('jobs.json')) return JSON.stringify({ jobs: [{ id: 'a' }, { id: 'b' }] });
+      throw new Error('no such file');
+    });
+
+    const snapshot = await getHermesOpsSnapshot({ force: true });
+    const bheem = snapshot.instances.find((i) => i.id === 'bheem')!;
+    expect(bheem.gateway.status).toBe('down');
+    expect(bheem.dashboard.status).toBe('down');
+    expect(bheem.backup.repo.head).toBe('export1');
+    expect(bheem.backup.restoredFileCount).toBe(42);
+    expect(bheem.backup.restoredCronJobs).toBe(3);
+    expect(bheem.google.workspaceToken).toBe(true);
+  });
+
  it('reports unknown repo status when git cannot be read', async () => {
    setExec((command, args) => {
      if (command === 'git') return enoentError();
--- a/dashboard/backend/src/modules/hermes-ops/repository.ts
+++ b/dashboard/backend/src/modules/hermes-ops/repository.ts
@ -2,6 +2,7 @@ import { execFile } from 'child_process';
 import { promisify } from 'util';
 import { readFile, stat } from 'fs/promises';
 import { existsSync } from 'fs';
+import { appendDashboardWarning } from '../../lib/dashboard-alerts.js';
 import type {
  HermesOpsCronJob,
  HermesOpsInstance,
@ -31,6 +32,7 @@ const instances = [
    dashboardPort: 9119,
    backupTimer: 'hermes-root-backup.timer',
    repoPath: '/root/repos/bytelyst_hostinger_hermes_vm',
+    opsExportPath: '/root/.hermes/ops-export.json',
    driveFolder: 'Vijay Drive',
  },
  {
@ -43,10 +45,21 @@ const instances = [
    dashboardPort: 9120,
    backupTimer: 'uma-hermes-backup.timer',
    repoPath: '/home/uma/repos/uma_hostinger_hermes_vm',
+    opsExportPath: '/home/uma/.hermes/ops-export.json',
    driveFolder: 'Bheem Drive',
  },
 ];

+interface OpsExport {
+  gateway?: { active?: boolean; enabled?: boolean; status?: ProbeStatus };
+  dashboard?: { active?: boolean; status?: ProbeStatus };
+  backupTimer?: HermesOpsTimer;
+  repo?: HermesOpsRepo;
+  restoredFileCount?: number | null;
+  restoredCronJobs?: number | null;
+  googleWorkspaceToken?: boolean;
+}
+
 interface ExecResult {
  // Trimmed stdout. Present even when the command exited non-zero (e.g.
  // `systemctl is-active` prints "inactive" and exits 3).
@ -223,6 +236,15 @@ async function tokenExists(path: string): Promise<boolean> {
  }
 }

+async function readOpsExport(path: string): Promise<OpsExport | null> {
+  try {
+    const parsed = JSON.parse(await readFile(path, 'utf8')) as OpsExport;
+    return parsed && typeof parsed === 'object' ? parsed : null;
+  } catch {
+    return null;
+  }
+}
+
 async function getTailscaleIp(): Promise<string | null> {
  const result = await exec('tailscale', ['ip', '-4']);
  if (!result.ran) return null;
@ -246,11 +268,12 @@ async function buildSnapshot(): Promise<HermesOpsSnapshot> {

  const results: HermesOpsInstance[] = [];
  for (const item of instances) {
+    const opsExport = await readOpsExport(item.opsExportPath);
    const gatewayActiveCheck =
      item.gatewayKind === 'uma-user' ? probeUmaGatewayActive() : probeSystemActive(item.gatewayService);
    const gatewayEnabledCheck =
      item.gatewayKind === 'uma-user' ? probeUmaGatewayEnabled() : probeSystemEnabled(item.gatewayService);
-    const [gateway, gatewayEnabled, dashboard, backupTimer, repo, stats, googleToken] = await Promise.all([
+    const [probedGateway, probedGatewayEnabled, probedDashboard, probedBackupTimer, probedRepo, probedStats, probedGoogleToken] = await Promise.all([
      gatewayActiveCheck,
      gatewayEnabledCheck,
      probeSystemActive(item.dashboardService),
@ -259,6 +282,22 @@ async function buildSnapshot(): Promise<HermesOpsSnapshot> {
      manifestStats(`${item.repoPath}/hermes_persistent_backup`),
      tokenExists(`${item.hermesHome}/google_token.json`),
    ]);
+    const gateway = opsExport?.gateway?.status ? {
+      active: Boolean(opsExport.gateway.active),
+      status: opsExport.gateway.status,
+    } : probedGateway;
+    const gatewayEnabled = typeof opsExport?.gateway?.enabled === 'boolean' ? opsExport.gateway.enabled : probedGatewayEnabled;
+    const dashboard = opsExport?.dashboard?.status ? {
+      active: Boolean(opsExport.dashboard.active),
+      status: opsExport.dashboard.status,
+    } : probedDashboard;
+    const backupTimer = opsExport?.backupTimer ?? probedBackupTimer;
+    const repo = opsExport?.repo ?? probedRepo;
+    const stats = {
+      files: typeof opsExport?.restoredFileCount === 'number' || opsExport?.restoredFileCount === null ? opsExport.restoredFileCount : probedStats.files,
+      cronJobs: typeof opsExport?.restoredCronJobs === 'number' || opsExport?.restoredCronJobs === null ? opsExport.restoredCronJobs : probedStats.cronJobs,
+    };
+    const googleToken = typeof opsExport?.googleWorkspaceToken === 'boolean' ? opsExport.googleWorkspaceToken : probedGoogleToken;

    const dashboardUrl = tailscaleIp ? `http://${tailscaleIp}:${item.dashboardPort}/` : `:${item.dashboardPort}`;

@ -316,6 +355,16 @@ async function buildSnapshot(): Promise<HermesOpsSnapshot> {
    warnings.push('Emergency Drive OAuth token is missing');
  }

+  await Promise.all(warnings.map((message) => {
+    const lower = message.toLowerCase();
+    const instance = lower.includes('bheem') || lower.includes('uma')
+      ? 'bheem'
+      : lower.includes('vijay') || lower.includes('root')
+        ? 'vijay'
+        : 'all';
+    return appendDashboardWarning({ severity: 'warn', instance, message });
+  }));
+
  const cronJobs: HermesOpsCronJob[] = [
    {
      name: emergencyDriveUpload.name,
--- a/dashboard/backend/src/modules/hermes-telemetry/hermes-telemetry.test.ts
+++ b/dashboard/backend/src/modules/hermes-telemetry/hermes-telemetry.test.ts
@ -7,7 +7,8 @@ vi.mock('child_process', () => ({ execFile: execFileMock }));

 const readFileMock = vi.hoisted(() => vi.fn());
 const statMock = vi.hoisted(() => vi.fn());
-vi.mock('fs/promises', () => ({ readFile: readFileMock, stat: statMock }));
+const readdirMock = vi.hoisted(() => vi.fn());
+vi.mock('fs/promises', () => ({ readFile: readFileMock, readdir: readdirMock, stat: statMock }));

 type Handler = (command: string, args: string[]) => { error?: NodeJS.ErrnoException; stdout?: string };

@ -42,6 +43,7 @@ describe('hermes-telemetry repository', () => {
    });
    statMock.mockRejectedValue(Object.assign(new Error('ENOENT'), { code: 'ENOENT' }));
    readFileMock.mockRejectedValue(Object.assign(new Error('ENOENT'), { code: 'ENOENT' }));
+    readdirMock.mockRejectedValue(Object.assign(new Error('ENOENT'), { code: 'ENOENT' }));

    const snapshot = await getHermesTelemetrySnapshot('vijay');
    // The whole shape must validate even when nothing was readable — that's
@ -84,6 +86,7 @@ describe('hermes-telemetry repository', () => {
      return { stdout: '' };
    });
    statMock.mockRejectedValue(Object.assign(new Error('ENOENT'), { code: 'ENOENT' }));
+    readdirMock.mockResolvedValue([]);

    const snapshot = await getHermesTelemetrySnapshot('vijay');
    expect(snapshot.sessions).toEqual({ totalSessions: 59, totalMessages: 5225, status: 'up' });
@ -102,6 +105,7 @@ describe('hermes-telemetry repository', () => {
      return { error: err };
    });
    statMock.mockResolvedValue({} as never);
+    readdirMock.mockResolvedValue([]);
    readFileMock.mockResolvedValue([
      '2026-01-01T12:34:56 WARNING gateway is degraded',
      '2026-01-01T12:35:01 CRITICAL backup repo HEAD missing',
@ -129,6 +133,7 @@ describe('hermes-telemetry repository', () => {
      return { error: err };
    });
    statMock.mockRejectedValue(Object.assign(new Error('ENOENT'), { code: 'ENOENT' }));
+    readdirMock.mockResolvedValue([]);

    const snapshot = await getHermesTelemetrySnapshot('vijay');
    expect(snapshot.backupHistory.status).toBe('up');
@ -144,6 +149,7 @@ describe('hermes-telemetry repository', () => {
      return { error: err };
    });
    statMock.mockRejectedValue(Object.assign(new Error('ENOENT'), { code: 'ENOENT' }));
+    readdirMock.mockRejectedValue(Object.assign(new Error('ENOENT'), { code: 'ENOENT' }));

    const a = await getHermesTelemetrySnapshot('vijay');
    const callsAfterFirst = calls;
@ -159,10 +165,37 @@ describe('hermes-telemetry repository', () => {
      return { error: err };
    });
    statMock.mockRejectedValue(Object.assign(new Error('ENOENT'), { code: 'ENOENT' }));
+    readdirMock.mockRejectedValue(Object.assign(new Error('ENOENT'), { code: 'ENOENT' }));

    const v = await getHermesTelemetrySnapshot('vijay');
    const b = await getHermesTelemetrySnapshot('bheem');
    expect(v.instanceId).toBe('vijay');
    expect(b.instanceId).toBe('bheem');
  });
+
+  it('parses sanitized Hermes session JSONL events without exposing raw message content', async () => {
+    setExec(() => {
+      const err = Object.assign(new Error('ENOENT'), { code: 'ENOENT' as const });
+      return { error: err };
+    });
+    statMock.mockRejectedValue(Object.assign(new Error('ENOENT'), { code: 'ENOENT' }));
+    readdirMock.mockResolvedValue(['20260101_session.jsonl']);
+    readFileMock.mockImplementation(async (path: string) => {
+      if (path.endsWith('.jsonl')) {
+        return [
+          JSON.stringify({ role: 'user', content: 'secret prompt', timestamp: '2026-01-01T00:00:00Z' }),
+          JSON.stringify({ role: 'assistant', finish_reason: 'tool_calls', tool_calls: [{ function: { name: 'exec_command' } }], timestamp: '2026-01-01T00:01:00Z' }),
+        ].join('\n');
+      }
+      throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' });
+    });
+
+    const snapshot = await getHermesTelemetrySnapshot('vijay');
+    expect(snapshot.sessionEvents.status).toBe('up');
+    expect(snapshot.sessionEvents.sourceCount).toBe(1);
+    expect(snapshot.sessionEvents.entries).toHaveLength(2);
+    expect(snapshot.sessionEvents.entries[0].summary).toBe('assistant tool call: exec_command');
+    expect(snapshot.sessionEvents.entries[1].summary).toBe('user message (content redacted)');
+    expect(JSON.stringify(snapshot.sessionEvents.entries)).not.toContain('secret prompt');
+  });
 });
--- a/dashboard/backend/src/modules/hermes-telemetry/repository.ts
+++ b/dashboard/backend/src/modules/hermes-telemetry/repository.ts
@ -1,6 +1,8 @@
 import { execFile } from 'child_process';
 import { promisify } from 'util';
-import { readFile, stat } from 'fs/promises';
+import { readdir, readFile, stat } from 'fs/promises';
+import { basename, join } from 'path';
+import { appendDashboardWarning } from '../../lib/dashboard-alerts.js';
 import { childLogger } from '../../lib/logger.js';
 import type {
  HermesBackupHistory,
@ -8,6 +10,10 @@ import type {
  HermesCronEntry,
  HermesCronList,
  HermesInstanceId,
+  HermesSessionEntry,
+  HermesSessionEvent,
+  HermesSessionEventList,
+  HermesSessionList,
  HermesMemoryList,
  HermesSessionStats,
  HermesSkillList,
@ -29,6 +35,8 @@ interface InstanceConfig {
  user: string | null; // null → run as the backend's own user (root in prod)
  repoPath: string;
  watchdogLog: string;
+  sessionsIndex: string;
+  sessionsDir: string;
 }

 const INSTANCES: Record<HermesInstanceId, InstanceConfig> = {
@ -37,12 +45,16 @@ const INSTANCES: Record<HermesInstanceId, InstanceConfig> = {
    user: null,
    repoPath: '/root/repos/bytelyst_hostinger_hermes_vm',
    watchdogLog: '/root/.hermes/logs/hermes-health-watchdog.log',
+    sessionsIndex: '/root/.hermes/sessions/sessions.json',
+    sessionsDir: '/root/.hermes/sessions',
  },
  bheem: {
    id: 'bheem',
    user: 'uma',
    repoPath: '/home/uma/repos/uma_hostinger_hermes_vm',
    watchdogLog: '/home/uma/.hermes/logs/hermes-health-watchdog.log',
+    sessionsIndex: '/home/uma/.hermes/sessions/sessions.json',
+    sessionsDir: '/home/uma/.hermes/sessions',
  },
 };

@ -103,6 +115,142 @@ async function readSessionStats(inst: InstanceConfig): Promise<HermesSessionStat
  }
 }

+async function readSessionList(inst: InstanceConfig): Promise<HermesSessionList> {
+  try {
+    const parsed = JSON.parse(await readFile(inst.sessionsIndex, 'utf8')) as Record<string, Record<string, unknown>>;
+    const entries: HermesSessionEntry[] = Object.values(parsed)
+      .map((row) => ({
+        id: String(row.session_id ?? row.id ?? row.session_key ?? ''),
+        sessionKey: String(row.session_key ?? ''),
+        platform: row.platform ? String(row.platform) : null,
+        chatType: row.chat_type ? String(row.chat_type) : null,
+        displayName: row.display_name ? String(row.display_name) : null,
+        createdAt: row.created_at ? String(row.created_at) : null,
+        updatedAt: row.updated_at ? String(row.updated_at) : null,
+        suspended: Boolean(row.suspended ?? false),
+        resumePending: Boolean(row.resume_pending ?? false),
+        totalTokens: typeof row.total_tokens === 'number' ? row.total_tokens : null,
+        estimatedCostUsd: typeof row.estimated_cost_usd === 'number' ? row.estimated_cost_usd : null,
+      }))
+      .filter((entry) => entry.id || entry.sessionKey)
+      .sort((a, b) => new Date(b.updatedAt ?? b.createdAt ?? 0).getTime() - new Date(a.updatedAt ?? a.createdAt ?? 0).getTime())
+      .slice(0, 50);
+    return { entries, status: 'up' };
+  } catch {
+    return { entries: [], status: 'unknown' };
+  }
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null && !Array.isArray(value);
+}
+
+function extractToolNames(row: Record<string, unknown>): string[] {
+  const names = new Set<string>();
+  const addName = (value: unknown) => {
+    if (typeof value === 'string' && value.trim()) names.add(value.trim());
+  };
+
+  const collectFromItem = (item: unknown) => {
+    if (!isRecord(item)) return;
+    addName(item.name);
+    if (isRecord(item.function)) addName(item.function.name);
+  };
+
+  if (Array.isArray(row.tool_calls)) row.tool_calls.forEach(collectFromItem);
+  if (Array.isArray(row.codex_message_items)) row.codex_message_items.forEach(collectFromItem);
+  return Array.from(names).slice(0, 8);
+}
+
+function extractItemTypes(row: Record<string, unknown>): string[] {
+  const itemTypes = new Set<string>();
+  if (Array.isArray(row.codex_message_items)) {
+    for (const item of row.codex_message_items) {
+      if (isRecord(item) && typeof item.type === 'string') itemTypes.add(item.type);
+    }
+  }
+  return Array.from(itemTypes).slice(0, 8);
+}
+
+function classifySessionEvent(row: Record<string, unknown>, toolNames: string[], itemTypes: string[]): HermesSessionEvent['eventType'] {
+  const role = typeof row.role === 'string' ? row.role : '';
+  if (role === 'session_meta') return 'system';
+  if (toolNames.length > 0 || row.finish_reason === 'tool_calls') return 'tool-call';
+  if (itemTypes.some((type) => type.includes('tool'))) return 'tool-result';
+  if (itemTypes.includes('reasoning') || row.reasoning) return 'reasoning';
+  if (role === 'user' || role === 'assistant' || typeof row.content === 'string') return 'message';
+  return 'unknown';
+}
+
+function summarizeSessionEvent(row: Record<string, unknown>, eventType: HermesSessionEvent['eventType'], toolNames: string[]): string {
+  const role = typeof row.role === 'string' ? row.role : 'unknown';
+  if (eventType === 'system') return 'session metadata recorded';
+  if (eventType === 'tool-call') {
+    const toolText = toolNames.length > 0 ? `: ${toolNames.join(', ')}` : '';
+    return `${role} tool call${toolNames.length === 1 ? '' : 's'}${toolText}`;
+  }
+  if (eventType === 'tool-result') return `${role} tool result recorded`;
+  if (eventType === 'reasoning') return `${role} reasoning item recorded`;
+  if (eventType === 'message') return `${role} message (content redacted)`;
+  return `${role} event recorded`;
+}
+
+function parseSessionJsonlLine(line: string, sessionFile: string, lineIndex: number): HermesSessionEvent | null {
+  if (!line.trim()) return null;
+  try {
+    const row = JSON.parse(line) as unknown;
+    if (!isRecord(row)) return null;
+    const toolNames = extractToolNames(row);
+    const itemTypes = extractItemTypes(row);
+    const eventType = classifySessionEvent(row, toolNames, itemTypes);
+    const timestamp = typeof row.timestamp === 'string' ? row.timestamp : null;
+    const status = typeof row.status === 'string'
+      ? row.status
+      : (typeof row.finish_reason === 'string' ? row.finish_reason : null);
+    return {
+      id: `${sessionFile}:${lineIndex}`,
+      sessionFile,
+      timestamp,
+      role: typeof row.role === 'string' ? row.role : null,
+      eventType,
+      summary: summarizeSessionEvent(row, eventType, toolNames),
+      toolNames,
+      itemTypes,
+      status,
+    };
+  } catch {
+    return null;
+  }
+}
+
+async function readSessionEvents(inst: InstanceConfig): Promise<HermesSessionEventList> {
+  try {
+    const files = (await readdir(inst.sessionsDir))
+      .filter((name) => name.endsWith('.jsonl'))
+      .sort()
+      .slice(-10);
+    if (files.length === 0) return { entries: [], status: 'up', sourceCount: 0 };
+
+    const entries: HermesSessionEvent[] = [];
+    for (const file of files) {
+      const sessionFile = basename(file);
+      const content = await readFile(join(inst.sessionsDir, file), 'utf8');
+      const lines = content.split('\n');
+      const start = Math.max(0, lines.length - 200);
+      for (let index = start; index < lines.length; index += 1) {
+        const event = parseSessionJsonlLine(lines[index], sessionFile, index + 1);
+        if (event) entries.push(event);
+      }
+    }
+
+    entries.sort((a, b) => new Date(b.timestamp ?? 0).getTime() - new Date(a.timestamp ?? 0).getTime());
+    return { entries: entries.slice(0, 100), status: 'up', sourceCount: files.length };
+  } catch (err) {
+    log.warn({ err, instance: inst.id, source: inst.sessionsDir }, 'failed to read Hermes session events');
+    return { entries: [], status: 'unknown', sourceCount: 0 };
+  }
+}
+
 // --- Cron -------------------------------------------------------------------
 //
 // `hermes cron list --json` is the canonical source. It's distinct from
@ -248,8 +396,10 @@ const inflight = new Map<HermesInstanceId, Promise<HermesTelemetrySnapshot>>();

 async function buildSnapshot(instanceId: HermesInstanceId): Promise<HermesTelemetrySnapshot> {
  const inst = INSTANCES[instanceId];
-  const [sessions, cron, memory, skills, watchdog, backupHistory] = await Promise.all([
+  const [sessions, sessionList, sessionEvents, cron, memory, skills, watchdog, backupHistory] = await Promise.all([
    readSessionStats(inst),
+    readSessionList(inst),
+    readSessionEvents(inst),
    readCron(inst),
    readMemory(inst),
    readSkills(inst),
@ -259,17 +409,28 @@ async function buildSnapshot(instanceId: HermesInstanceId): Promise<HermesTeleme

  const warnings: string[] = [];
  if (sessions.status === 'unknown') warnings.push(`${instanceId}: hermes sessions stats unavailable (CLI missing or non-zero exit)`);
+  if (sessionList.status === 'unknown') warnings.push(`${instanceId}: Hermes session index not readable`);
+  if (sessionEvents.status === 'unknown') warnings.push(`${instanceId}: Hermes session event JSONL not readable at ${inst.sessionsDir}`);
  if (cron.status === 'unknown') warnings.push(`${instanceId}: hermes cron list unavailable`);
  if (memory.status === 'unknown') warnings.push(`${instanceId}: hermes memory list unavailable`);
  if (skills.status === 'unknown') warnings.push(`${instanceId}: hermes skills list unavailable`);
  if (watchdog.status === 'unknown') warnings.push(`${instanceId}: watchdog log not readable at ${watchdog.source ?? 'unknown path'}`);
  if (backupHistory.status === 'unknown') warnings.push(`${instanceId}: backup repo not readable at ${backupHistory.repoPath ?? 'unknown path'}`);

+  await Promise.all([
+    ...warnings.map((message) => appendDashboardWarning({ severity: 'warn', instance: instanceId, message })),
+    ...watchdog.alerts
+      .filter((alert) => alert.severity === 'critical')
+      .map((alert) => appendDashboardWarning({ severity: 'critical', instance: instanceId, message: alert.message })),
+  ]);
+
  return {
    generatedAt: new Date().toISOString(),
    cached: false,
    instanceId,
    sessions,
+    sessionList,
+    sessionEvents,
    cron,
    memory,
    skills,
--- a/dashboard/backend/src/modules/hermes-telemetry/types.ts
+++ b/dashboard/backend/src/modules/hermes-telemetry/types.ts
@ -18,6 +18,47 @@ export const HermesSessionStatsSchema = z.object({
 });
 export type HermesSessionStats = z.infer<typeof HermesSessionStatsSchema>;

+export const HermesSessionEntrySchema = z.object({
+  id: z.string(),
+  sessionKey: z.string(),
+  platform: z.string().nullable(),
+  chatType: z.string().nullable(),
+  displayName: z.string().nullable(),
+  createdAt: z.string().nullable(),
+  updatedAt: z.string().nullable(),
+  suspended: z.boolean(),
+  resumePending: z.boolean(),
+  totalTokens: z.number().nullable(),
+  estimatedCostUsd: z.number().nullable(),
+});
+export type HermesSessionEntry = z.infer<typeof HermesSessionEntrySchema>;
+
+export const HermesSessionListSchema = z.object({
+  entries: z.array(HermesSessionEntrySchema),
+  status: ProbeStatusSchema,
+});
+export type HermesSessionList = z.infer<typeof HermesSessionListSchema>;
+
+export const HermesSessionEventSchema = z.object({
+  id: z.string(),
+  sessionFile: z.string(),
+  timestamp: z.string().nullable(),
+  role: z.string().nullable(),
+  eventType: z.enum(['message', 'tool-call', 'tool-result', 'reasoning', 'system', 'unknown']),
+  summary: z.string(),
+  toolNames: z.array(z.string()),
+  itemTypes: z.array(z.string()),
+  status: z.string().nullable(),
+});
+export type HermesSessionEvent = z.infer<typeof HermesSessionEventSchema>;
+
+export const HermesSessionEventListSchema = z.object({
+  entries: z.array(HermesSessionEventSchema),
+  status: ProbeStatusSchema,
+  sourceCount: z.number(),
+});
+export type HermesSessionEventList = z.infer<typeof HermesSessionEventListSchema>;
+
 export const HermesCronEntrySchema = z.object({
  id: z.string(),
  name: z.string(),
@ -106,6 +147,8 @@ export const HermesTelemetrySnapshotSchema = z.object({
  cached: z.boolean(),
  instanceId: HermesInstanceIdSchema,
  sessions: HermesSessionStatsSchema,
+  sessionList: HermesSessionListSchema,
+  sessionEvents: HermesSessionEventListSchema,
  cron: HermesCronListSchema,
  memory: HermesMemoryListSchema,
  skills: HermesSkillListSchema,
--- a/dashboard/docker-compose.yml
+++ b/dashboard/docker-compose.yml
@ -25,6 +25,7 @@ services:
    environment:
      - VM_SCRIPTS_PATH=/vm-scripts/VMs/HostingerVM
      - VM_LOG_DIR=/host-logs
+      - HERMES_DASHBOARD_ALERT_LOG=/var/log/hermes-dashboard-warnings.log
    ports:
      - '127.0.0.1:4004:4004'
    networks:
@ -37,6 +38,7 @@ services:
      - /var/log/vm-cleanup.log:/host-logs/vm-cleanup.log
      - /var/log/vm-health-check.log:/host-logs/vm-health-check.log
      - /var/log/docker-watchdog.log:/host-logs/docker-watchdog.log
+      - /var/log/hermes-dashboard-warnings.log:/var/log/hermes-dashboard-warnings.log
      # Docker socket — allows running docker commands against the host daemon
      # (same pattern as Portainer/cAdvisor; container already runs as root)
      - /var/run/docker.sock:/var/run/docker.sock
--- a/dashboard/web/e2e/hermes.spec.ts
+++ b/dashboard/web/e2e/hermes.spec.ts
@ -40,6 +40,87 @@ const hermesOpsSnapshot = {
  warnings: [],
 };

+const hermesTelemetrySnapshot = (instanceId: 'vijay' | 'bheem') => ({
+  generatedAt: '2026-01-01T00:00:00.000Z',
+  cached: false,
+  instanceId,
+  sessions: { totalSessions: instanceId === 'vijay' ? 12 : 7, totalMessages: instanceId === 'vijay' ? 480 : 210, status: 'up' },
+  sessionList: {
+    status: 'up',
+    entries: [
+      {
+        id: `${instanceId}-session-1`,
+        sessionKey: `agent:main:telegram:dm:${instanceId}`,
+        platform: 'telegram',
+        chatType: 'dm',
+        displayName: instanceId === 'vijay' ? 'S' : 'Uma',
+        createdAt: '2026-01-01T00:00:00.000Z',
+        updatedAt: '2026-01-01T00:06:00.000Z',
+        suspended: false,
+        resumePending: false,
+        totalTokens: 100,
+        estimatedCostUsd: 0,
+      },
+    ],
+  },
+  sessionEvents: {
+    status: 'up',
+    sourceCount: 1,
+    entries: [
+      {
+        id: `${instanceId}-events.jsonl:3`,
+        sessionFile: `${instanceId}-events.jsonl`,
+        timestamp: '2026-01-01T00:06:00.000Z',
+        role: 'assistant',
+        eventType: 'tool-call',
+        summary: 'assistant tool call: exec_command',
+        toolNames: ['exec_command'],
+        itemTypes: [],
+        status: 'tool_calls',
+      },
+    ],
+  },
+  cron: {
+    status: 'up',
+    entries: [
+      {
+        id: `${instanceId}-digest`,
+        name: `${instanceId} digest`,
+        schedule: '0 * * * *',
+        lastRun: '2026-01-01T00:00:00.000Z',
+        nextRun: '2026-01-01T01:00:00.000Z',
+        lastStatus: 'ok',
+        active: true,
+      },
+    ],
+  },
+  memory: { status: 'up', items: [] },
+  skills: { status: 'up', items: [] },
+  watchdog: {
+    source: `/tmp/${instanceId}-watchdog.log`,
+    status: 'up',
+    alerts: [
+      {
+        timestamp: '2026-01-01T00:05:00.000Z',
+        severity: 'info',
+        message: `${instanceId} watchdog healthy`,
+      },
+    ],
+  },
+  backupHistory: {
+    repoPath: `/tmp/${instanceId}-repo`,
+    status: 'up',
+    entries: [
+      {
+        sha: `${instanceId}123456`,
+        committedAt: '2026-01-01T00:03:00.000Z',
+        subject: `${instanceId} backup`,
+      },
+    ],
+  },
+  warnings: [],
+});
+
 test.describe('Hermes Mission Control', () => {
  test.beforeEach(async ({ page }) => {
    await page.addInitScript(() => {
@ -59,6 +140,22 @@ test.describe('Hermes Mission Control', () => {
      });
    });

+    await page.route('**/api/hermes/telemetry/vijay', async (route) => {
+      await route.fulfill({
+        status: 200,
+        contentType: 'application/json',
+        body: JSON.stringify(hermesTelemetrySnapshot('vijay')),
+      });
+    });
+
+    await page.route('**/api/hermes/telemetry/bheem', async (route) => {
+      await route.fulfill({
+        status: 200,
+        contentType: 'application/json',
+        body: JSON.stringify(hermesTelemetrySnapshot('bheem')),
+      });
+    });
+
    // /hermes/products fetches the real service registry + health module
    // (Phase 3 slice 2). Backend isn't running in CI, so we satisfy those
    // routes the same way the dashboard spec does.
@ -82,11 +179,11 @@ test.describe('Hermes Mission Control', () => {

    await page.getByRole('link', { name: 'Task Ledger' }).click();
    await expect(page.getByRole('heading', { name: 'Task Ledger' })).toBeVisible();
-    await expect(page.getByText('Task table')).toBeVisible();
+    await expect(page.getByRole('heading', { name: 'Task table' })).toBeVisible();

    await page.goto('/hermes/tasks/task-1');
    await expect(page.getByRole('heading', { name: 'Hermes learning' })).toBeVisible();
-    await expect(page.getByText('Timeline')).toBeVisible();
+    await expect(page.getByRole('heading', { name: 'Timeline', exact: true })).toBeVisible();

    await page.goto('/hermes/products');
    await expect(page.getByRole('heading', { name: 'Product Portfolio' })).toBeVisible();
@ -111,7 +208,7 @@ test.describe('Hermes Mission Control', () => {

    await page.goto('/hermes/tasks/task-1');
    await expect(page.getByRole('heading', { name: 'Hermes learning' })).toBeVisible();
-    await expect(page.getByRole('heading', { name: 'Timeline' })).toBeVisible();
+    await expect(page.getByRole('heading', { name: 'Timeline', exact: true })).toBeVisible();
  });

  test('exposes a global instance switcher with All / Vijay / Bheem', async ({ page }) => {
--- a/dashboard/web/src/app/hermes/agents/page.tsx
+++ b/dashboard/web/src/app/hermes/agents/page.tsx
@ -7,8 +7,12 @@ import { useEffect, useMemo, useState } from 'react';
 import { HermesShell, MetricCard, SectionCard } from '@/components/hermes-shell';
 import { HermesInstanceBadge } from '@/components/hermes-instance-switcher';
 import { useHermesInstance } from '@/lib/hermes-instance-context';
-import { getHermesAgents, HERMES_INSTANCES, type HermesInstanceId } from '@/lib/hermes';
-import { api, type HermesTelemetrySnapshot } from '@/lib/api';
+import { getHermesAgents, HERMES_INSTANCES } from '@/lib/hermes';
+import {
+  emptyTelemetryState,
+  loadAllHermesTelemetry,
+  type HermesTelemetryState,
+} from '@/lib/hermes-telemetry-client';

 export default function HermesAgentsPage() {
  const { selectedInstance } = useHermesInstance();
@ -21,19 +25,16 @@ export default function HermesAgentsPage() {
  // endpoint. The agent statuses above remain seed-data (status observability
  // needs a separate ingestion contract); the inventory below is genuine
  // when the `hermes` CLI is reachable, status:'unknown' otherwise.
-  const [telemetry, setTelemetry] = useState<Record<HermesInstanceId, HermesTelemetrySnapshot | null>>({ vijay: null, bheem: null });
+  const [telemetry, setTelemetry] = useState<HermesTelemetryState>(emptyTelemetryState);
  const [telemetryError, setTelemetryError] = useState<string | null>(null);

  useEffect(() => {
    const controller = new AbortController();
    const load = async () => {
      try {
-        const [vijay, bheem] = await Promise.all([
-          api.getHermesTelemetry('vijay'),
-          api.getHermesTelemetry('bheem'),
-        ]);
+        const next = await loadAllHermesTelemetry();
        if (controller.signal.aborted) return;
-        setTelemetry({ vijay, bheem });
+        setTelemetry(next);
        setTelemetryError(null);
      } catch (err) {
        if (controller.signal.aborted) return;
--- a/dashboard/web/src/app/hermes/history/page.tsx
+++ b/dashboard/web/src/app/hermes/history/page.tsx
@ -1,15 +1,28 @@
 'use client';

 import Link from 'next/link';
-import { ArrowLeft, Clock3, Flame, TrendingDown, TrendingUp } from 'lucide-react';
+import { ArrowLeft, Clock3, Flame, History, TrendingDown, TrendingUp } from 'lucide-react';
 import { Badge, Button } from '@/components/ui/Primitives';
-import { useMemo } from 'react';
+import { useEffect, useMemo, useState } from 'react';
 import { HermesShell, MetricCard, SectionCard } from '@/components/hermes-shell';
+import { HermesInstanceBadge } from '@/components/hermes-instance-switcher';
 import { useHermesInstance } from '@/lib/hermes-instance-context';
 import { getHermesHistory, hermesTasks } from '@/lib/hermes';
+import {
+  collectBackupEntries,
+  collectCronEntries,
+  collectSessionEvents,
+  collectSessionEntries,
+  collectWatchdogAlerts,
+  emptyTelemetryState,
+  loadAllHermesTelemetry,
+  type HermesTelemetryState,
+} from '@/lib/hermes-telemetry-client';

 export default function HermesHistoryPage() {
  const { selectedInstance } = useHermesInstance();
+  const [telemetry, setTelemetry] = useState<HermesTelemetryState>(emptyTelemetryState);
+  const [telemetryError, setTelemetryError] = useState<string | null>(null);
  const history = useMemo(() => getHermesHistory(selectedInstance), [selectedInstance]);
  const filteredTasks = useMemo(
    () => (selectedInstance === 'all' ? hermesTasks : hermesTasks.filter((task) => task.instanceId === selectedInstance)),
@ -26,6 +39,30 @@ export default function HermesHistoryPage() {
    tasksWithDuration.reduce((sum, task) => sum + (task.durationMs ?? 0), 0) /
    Math.max(1, tasksWithDuration.length) / 60000,
  );
+  const liveAlerts = useMemo(() => collectWatchdogAlerts(telemetry, selectedInstance).slice(0, 10), [telemetry, selectedInstance]);
+  const liveBackups = useMemo(() => collectBackupEntries(telemetry, selectedInstance).slice(0, 10), [telemetry, selectedInstance]);
+  const liveCron = useMemo(() => collectCronEntries(telemetry, selectedInstance).slice(0, 10), [telemetry, selectedInstance]);
+  const liveSessions = useMemo(() => collectSessionEntries(telemetry, selectedInstance).slice(0, 10), [telemetry, selectedInstance]);
+  const liveEvents = useMemo(() => collectSessionEvents(telemetry, selectedInstance).slice(0, 10), [telemetry, selectedInstance]);
+
+  useEffect(() => {
+    let active = true;
+    const load = async () => {
+      try {
+        const next = await loadAllHermesTelemetry();
+        if (!active) return;
+        setTelemetry(next);
+        setTelemetryError(null);
+      } catch (err) {
+        if (!active) return;
+        setTelemetryError(err instanceof Error ? err.message : String(err));
+      }
+    };
+    void load();
+    return () => {
+      active = false;
+    };
+  }, []);

  const failureReasons = [
    ['CI failures', 9],
@ -48,6 +85,86 @@ export default function HermesHistoryPage() {
        <MetricCard label="Avg task duration" value={`${avgDuration}m`} tone="info" icon={<Clock3 className="h-5 w-5" />} />
      </section>

+      <section className="grid gap-4 md:grid-cols-2 xl:grid-cols-4">
+        <MetricCard label="Live events" value={liveEvents.length} tone="info" icon={<History className="h-5 w-5" />} helpText="From Hermes session JSONL" />
+        <MetricCard label="Live cron jobs" value={liveCron.length} tone="info" icon={<Clock3 className="h-5 w-5" />} helpText="From hermes cron list" />
+        <MetricCard label="Watchdog alerts" value={liveAlerts.length} tone={liveAlerts.some((a) => a.severity === 'critical') ? 'danger' : liveAlerts.some((a) => a.severity === 'warn') ? 'warning' : 'default'} icon={<Flame className="h-5 w-5" />} helpText="From watchdog logs" />
+        <MetricCard label="Backup commits" value={liveBackups.length} tone="success" icon={<TrendingUp className="h-5 w-5" />} helpText="From backup git history" />
+      </section>
+
+      <SectionCard
+        title="Live artifact timeline"
+        subtitle="Real session events, sessions, cron, watchdog, and backup history from the Hermes telemetry endpoint. Message content is redacted at the backend."
+        actions={<Badge variant={telemetryError ? 'error' : 'success'}>{telemetryError ? 'Telemetry unavailable' : 'Live telemetry'}</Badge>}
+      >
+        {telemetryError ? (
+          <p className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4 text-sm text-[var(--bl-warning)]">
+            Could not load telemetry: {telemetryError}
+          </p>
+        ) : (
+          <div className="grid gap-4 lg:grid-cols-2 xl:grid-cols-5">
+            <div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
+              <p className="text-xs uppercase tracking-[0.2em] text-[var(--bl-text-tertiary)]">Recent events</p>
+              <div className="mt-3 space-y-2 text-sm text-[var(--bl-text-secondary)]">
+                {liveEvents.length > 0 ? liveEvents.map((event) => (
+                  <div key={`${event.instanceId}-${event.id}`} className="flex items-start justify-between gap-3">
+                    <span className="line-clamp-2">{event.summary}</span>
+                    <HermesInstanceBadge instanceId={event.instanceId} />
+                  </div>
+                )) : <p>No session events returned.</p>}
+              </div>
+            </div>
+            <div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
+              <p className="text-xs uppercase tracking-[0.2em] text-[var(--bl-text-tertiary)]">Recent sessions</p>
+              <div className="mt-3 space-y-2 text-sm text-[var(--bl-text-secondary)]">
+                {liveSessions.length > 0 ? liveSessions.map((session) => (
+                  <div key={`${session.instanceId}-${session.id}`} className="flex items-center justify-between gap-3">
+                    <span className="truncate">{session.displayName ?? session.sessionKey}</span>
+                    <HermesInstanceBadge instanceId={session.instanceId} />
+                  </div>
+                )) : <p>No session entries returned.</p>}
+              </div>
+            </div>
+            <div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
+              <p className="text-xs uppercase tracking-[0.2em] text-[var(--bl-text-tertiary)]">Recent watchdog alerts</p>
+              <div className="mt-3 space-y-2 text-sm text-[var(--bl-text-secondary)]">
+                {liveAlerts.length > 0 ? liveAlerts.map((alert) => (
+                  <div key={`${alert.instanceId}-${alert.timestamp}-${alert.message}`} className="flex items-start justify-between gap-3">
+                    <span className="line-clamp-2">{alert.message}</span>
+                    <div className="flex shrink-0 items-center gap-2">
+                      <Badge variant={alert.severity === 'critical' ? 'error' : alert.severity === 'warn' ? 'warning' : 'info'}>{alert.severity}</Badge>
+                      <HermesInstanceBadge instanceId={alert.instanceId} />
+                    </div>
+                  </div>
+                )) : <p>No watchdog alerts returned.</p>}
+              </div>
+            </div>
+            <div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
+              <p className="text-xs uppercase tracking-[0.2em] text-[var(--bl-text-tertiary)]">Cron entries</p>
+              <div className="mt-3 space-y-2 text-sm text-[var(--bl-text-secondary)]">
+                {liveCron.length > 0 ? liveCron.map((entry) => (
+                  <div key={`${entry.instanceId}-${entry.id}`} className="flex items-center justify-between gap-3">
+                    <span className="truncate">{entry.name}</span>
+                    <HermesInstanceBadge instanceId={entry.instanceId} />
+                  </div>
+                )) : <p>No cron entries returned.</p>}
+              </div>
+            </div>
+            <div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
+              <p className="text-xs uppercase tracking-[0.2em] text-[var(--bl-text-tertiary)]">Backup history</p>
+              <div className="mt-3 space-y-2 text-sm text-[var(--bl-text-secondary)]">
+                {liveBackups.length > 0 ? liveBackups.map((entry) => (
+                  <div key={`${entry.instanceId}-${entry.sha}`} className="flex items-center justify-between gap-3">
+                    <span className="truncate">{entry.subject}</span>
+                    <HermesInstanceBadge instanceId={entry.instanceId} />
+                  </div>
+                )) : <p>No backup commits returned.</p>}
+              </div>
+            </div>
+          </div>
+        )}
+      </SectionCard>
+
      <SectionCard title="Weekly activity chart" subtitle="Accessible bar chart built with standard layout primitives.">
        <div className="overflow-x-auto">
          <div className="flex min-w-[48rem] items-end gap-4 rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-5">
--- a/dashboard/web/src/app/hermes/page.tsx
+++ b/dashboard/web/src/app/hermes/page.tsx
@ -1,8 +1,8 @@
 'use client';

-import { useMemo } from 'react';
+import { useEffect, useMemo, useState } from 'react';
 import Link from 'next/link';
-import { ArrowRight, BadgeCheck, Bot, CheckCircle2, Clock3, LayoutDashboard, OctagonAlert, Rocket, ShieldAlert, Sparkles, TriangleAlert } from 'lucide-react';
+import { ArrowRight, BadgeCheck, BellRing, Bot, CheckCircle2, Clock3, LayoutDashboard, OctagonAlert, Rocket, ShieldAlert, Sparkles, TriangleAlert } from 'lucide-react';
 import { Badge, Button } from '@/components/ui/Primitives';
 import { HermesShell, MetricCard, SectionCard } from '@/components/hermes-shell';
 import { HermesInstanceBadge } from '@/components/hermes-instance-switcher';
@ -19,6 +19,15 @@ import {
  type HermesProduct,
  type HermesTask,
 } from '@/lib/hermes';
+import {
+  collectBackupEntries,
+  collectCronEntries,
+  collectWatchdogAlerts,
+  emptyTelemetryState,
+  loadAllHermesTelemetry,
+  telemetryForFilter,
+  type HermesTelemetryState,
+} from '@/lib/hermes-telemetry-client';

 const fmtDate = new Intl.DateTimeFormat('en', {
  month: 'short',
@ -80,6 +89,8 @@ function ProductMiniCard({ product }: { product: HermesProduct }) {

 export default function HermesMissionControlPage() {
  const { selectedInstance } = useHermesInstance();
+  const [telemetry, setTelemetry] = useState<HermesTelemetryState>(emptyTelemetryState);
+  const [telemetryError, setTelemetryError] = useState<string | null>(null);
  const overview = useMemo(() => getHermesOverview(selectedInstance), [selectedInstance]);
  // Per-instance roll-up cards always show both Vijay and Bheem regardless of
  // the active filter — they're the "comparison" view that sits next to the
@ -124,6 +135,32 @@ export default function HermesMissionControlPage() {
  );
  const actionableProducts = filteredProducts.filter((product) => product.needsAttention).slice(0, 6);
  const agentStatuses = useMemo(() => getHermesAgents(selectedInstance), [selectedInstance]);
+  const liveSnapshots = useMemo(() => telemetryForFilter(telemetry, selectedInstance), [telemetry, selectedInstance]);
+  const liveAlerts = useMemo(() => collectWatchdogAlerts(telemetry, selectedInstance).slice(0, 8), [telemetry, selectedInstance]);
+  const liveBackups = useMemo(() => collectBackupEntries(telemetry, selectedInstance).slice(0, 6), [telemetry, selectedInstance]);
+  const liveCron = useMemo(() => collectCronEntries(telemetry, selectedInstance).slice(0, 6), [telemetry, selectedInstance]);
+
+  useEffect(() => {
+    let active = true;
+    const load = async () => {
+      try {
+        const next = await loadAllHermesTelemetry();
+        if (!active) return;
+        setTelemetry(next);
+        setTelemetryError(null);
+      } catch (err) {
+        if (!active) return;
+        setTelemetryError(err instanceof Error ? err.message : String(err));
+      }
+    };
+    void load();
+    const timer = window.setInterval(load, 60_000);
+    return () => {
+      active = false;
+      window.clearInterval(timer);
+    };
+  }, []);
+
  const autoActions = [
    'Continue the queued execution lane for high-priority product updates.',
    'Publish a weekly digest from completed and failed work.',
@ -185,6 +222,77 @@ export default function HermesMissionControlPage() {

      <HermesOpsPanel />

+      <SectionCard
+        title="Unified live alerts"
+        subtitle="Cross-instance alert, cron, session, and backup signals from the real Hermes telemetry endpoint."
+        actions={<Badge variant={telemetryError ? 'error' : 'success'}>{telemetryError ? 'Telemetry unavailable' : 'Live telemetry'}</Badge>}
+      >
+        {telemetryError ? (
+          <p className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4 text-sm text-[var(--bl-warning)]">
+            Could not load telemetry: {telemetryError}
+          </p>
+        ) : (
+          <div className="grid gap-4 xl:grid-cols-[1.2fr_0.8fr]">
+            <div className="space-y-3">
+              {liveAlerts.length > 0 ? liveAlerts.map((alert) => (
+                <div key={`${alert.instanceId}-${alert.timestamp}-${alert.message}`} className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
+                  <div className="flex flex-wrap items-start justify-between gap-3">
+                    <div className="min-w-0">
+                      <div className="flex flex-wrap items-center gap-2">
+                        <Badge variant={alert.severity === 'critical' ? 'error' : alert.severity === 'warn' ? 'warning' : 'info'}>{alert.severity}</Badge>
+                        <HermesInstanceBadge instanceId={alert.instanceId} />
+                        <span className="text-xs text-[var(--bl-text-tertiary)]">{fmtDate.format(new Date(alert.timestamp))}</span>
+                      </div>
+                      <p className="mt-2 text-sm text-[var(--bl-text-primary)]">{alert.message}</p>
+                    </div>
+                    <BellRing className="h-4 w-4 text-[var(--bl-text-tertiary)]" />
+                  </div>
+                </div>
+              )) : (
+                <div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4 text-sm text-[var(--bl-text-secondary)]">
+                  No watchdog alerts were returned for the selected instance filter.
+                </div>
+              )}
+            </div>
+            <div className="grid gap-3">
+              <div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
+                <p className="text-xs uppercase tracking-[0.2em] text-[var(--bl-text-tertiary)]">Sessions</p>
+                <div className="mt-3 grid gap-2">
+                  {liveSnapshots.map((snapshot) => (
+                    <div key={snapshot.instanceId} className="flex items-center justify-between gap-3 text-sm">
+                      <HermesInstanceBadge instanceId={snapshot.instanceId} />
+                      <span className="text-[var(--bl-text-secondary)]">{snapshot.sessions.totalSessions} sessions · {snapshot.sessions.totalMessages} messages</span>
+                    </div>
+                  ))}
+                </div>
+              </div>
+              <div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
+                <p className="text-xs uppercase tracking-[0.2em] text-[var(--bl-text-tertiary)]">Upcoming Hermes cron</p>
+                <div className="mt-3 space-y-2 text-sm text-[var(--bl-text-secondary)]">
+                  {liveCron.length > 0 ? liveCron.map((entry) => (
+                    <div key={`${entry.instanceId}-${entry.id}`} className="flex items-center justify-between gap-3">
+                      <span className="truncate">{entry.name}</span>
+                      <HermesInstanceBadge instanceId={entry.instanceId} />
+                    </div>
+                  )) : <p>No cron entries returned.</p>}
+                </div>
+              </div>
+              <div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
+                <p className="text-xs uppercase tracking-[0.2em] text-[var(--bl-text-tertiary)]">Recent backup commits</p>
+                <div className="mt-3 space-y-2 text-sm text-[var(--bl-text-secondary)]">
+                  {liveBackups.length > 0 ? liveBackups.map((entry) => (
+                    <div key={`${entry.instanceId}-${entry.sha}`} className="flex items-center justify-between gap-3">
+                      <span className="truncate">{entry.subject}</span>
+                      <HermesInstanceBadge instanceId={entry.instanceId} />
+                    </div>
+                  )) : <p>No backup commits returned.</p>}
+                </div>
+              </div>
+            </div>
+          </div>
+        )}
+      </SectionCard>
+
      <div className="grid gap-6 xl:grid-cols-[1.5fr_1fr]">
        <SectionCard title="Active Missions" subtitle="What Hermes is currently running or waiting on." actions={<Button asChild variant="ghost" size="sm"><Link href="/hermes/tasks">View all tasks <ArrowRight className="ml-2 h-4 w-4" /></Link></Button>}>
          <div className="space-y-3">
--- a/dashboard/web/src/app/hermes/tasks/[id]/page.tsx
+++ b/dashboard/web/src/app/hermes/tasks/[id]/page.tsx
@ -2,10 +2,18 @@

 import Link from 'next/link';
 import { useParams } from 'next/navigation';
+import { useEffect, useMemo, useState } from 'react';
 import { ArrowLeft, CircleDashed, Clock3, ShieldAlert, Sparkles } from 'lucide-react';
 import { Badge, Button } from '@/components/ui/Primitives';
 import { HermesShell, MetricCard, SectionCard } from '@/components/hermes-shell';
 import { getHermesProductById, getHermesTaskById, getHermesTaskEvents } from '@/lib/hermes';
+import {
+  collectSessionEvents,
+  collectSessionEntries,
+  emptyTelemetryState,
+  loadAllHermesTelemetry,
+  type HermesTelemetryState,
+} from '@/lib/hermes-telemetry-client';

 const fmt = new Intl.DateTimeFormat('en', { month: 'short', day: 'numeric', hour: 'numeric', minute: '2-digit' });

@ -24,6 +32,29 @@ export default function HermesTaskDetailPage({ params }: { params: { id: string
  const taskId = routeParams?.id ?? params.id;
  const task = getHermesTaskById(taskId);
  const events = getHermesTaskEvents(taskId);
+  const [telemetry, setTelemetry] = useState<HermesTelemetryState>(emptyTelemetryState);
+  const [telemetryError, setTelemetryError] = useState<string | null>(null);
+  const liveSessions = useMemo(() => collectSessionEntries(telemetry, 'all').slice(0, 8), [telemetry]);
+  const liveEvents = useMemo(() => collectSessionEvents(telemetry, 'all').slice(0, 12), [telemetry]);
+
+  useEffect(() => {
+    let active = true;
+    const load = async () => {
+      try {
+        const next = await loadAllHermesTelemetry();
+        if (!active) return;
+        setTelemetry(next);
+        setTelemetryError(null);
+      } catch (err) {
+        if (!active) return;
+        setTelemetryError(err instanceof Error ? err.message : String(err));
+      }
+    };
+    void load();
+    return () => {
+      active = false;
+    };
+  }, []);

  if (!task) {
    return (
@ -40,7 +71,6 @@ export default function HermesTaskDetailPage({ params }: { params: { id: string
  }

  const product = getHermesProductById(task.productId);
-  const lastEvent = events[0];
  const timeline = events.slice().sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime());

  return (
@ -110,6 +140,57 @@ export default function HermesTaskDetailPage({ params }: { params: { id: string
        </SectionCard>
      </div>

+      <SectionCard
+        title="Live Hermes event timeline"
+        subtitle="Sanitized session JSONL events read from Hermes homes, paired with durable session index context. Message content is redacted at the backend."
+        actions={<Badge variant={telemetryError ? 'error' : 'success'}>{telemetryError ? 'Telemetry unavailable' : 'Live sessions'}</Badge>}
+      >
+        {telemetryError ? (
+          <p className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4 text-sm text-[var(--bl-warning)]">
+            Could not load telemetry: {telemetryError}
+          </p>
+        ) : (
+          <div className="grid gap-4 xl:grid-cols-[1.2fr_0.8fr]">
+            <div className="space-y-3">
+              {liveEvents.map((event) => (
+                <div key={`${event.instanceId}-${event.id}`} className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
+                  <div className="flex flex-wrap items-start justify-between gap-3">
+                    <div className="min-w-0">
+                      <div className="flex flex-wrap items-center gap-2">
+                        <Badge variant={event.eventType === 'tool-call' ? 'info' : event.eventType === 'system' ? 'neutral' : 'success'}>{event.eventType}</Badge>
+                        <Badge variant="neutral">{event.instanceId}</Badge>
+                        {event.status ? <Badge variant="neutral">{event.status}</Badge> : null}
+                      </div>
+                      <p className="mt-2 font-medium text-[var(--bl-text-primary)]">{event.summary}</p>
+                      <p className="mt-1 truncate text-xs text-[var(--bl-text-secondary)]">{event.sessionFile}</p>
+                    </div>
+                    <p className="text-xs text-[var(--bl-text-tertiary)]">{event.timestamp ? fmt.format(new Date(event.timestamp)) : 'unknown'}</p>
+                  </div>
+                </div>
+              ))}
+              {liveEvents.length === 0 ? (
+                <p className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4 text-sm text-[var(--bl-text-secondary)]">No live session events were returned.</p>
+              ) : null}
+            </div>
+            <div className="grid gap-3 sm:grid-cols-2 xl:grid-cols-1">
+              {liveSessions.map((session) => (
+                <div key={`${session.instanceId}-${session.id}`} className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
+                  <div className="flex items-center justify-between gap-3">
+                    <Badge variant={session.resumePending || session.suspended ? 'warning' : 'info'}>{session.platform ?? 'session'}</Badge>
+                    <Badge variant="neutral">{session.instanceId}</Badge>
+                  </div>
+                  <p className="mt-3 truncate font-medium text-[var(--bl-text-primary)]">{session.displayName ?? session.sessionKey}</p>
+                  <p className="mt-1 text-xs text-[var(--bl-text-secondary)]">Updated {session.updatedAt ? fmt.format(new Date(session.updatedAt)) : 'unknown'}</p>
+                </div>
+              ))}
+              {liveSessions.length === 0 ? (
+                <p className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4 text-sm text-[var(--bl-text-secondary)]">No live session entries were returned.</p>
+              ) : null}
+            </div>
+          </div>
+        )}
+      </SectionCard>
+
      <SectionCard title="Timeline" subtitle="Chronological event stream for the task lifecycle.">
        <ol className="space-y-4">
          {timeline.map((event) => (
--- a/dashboard/web/src/app/hermes/tasks/page.tsx
+++ b/dashboard/web/src/app/hermes/tasks/page.tsx
@ -1,8 +1,8 @@
 'use client';

-import { Fragment, useMemo, useState } from 'react';
+import { Fragment, useEffect, useMemo, useState } from 'react';
 import Link from 'next/link';
-import { Download, Filter, Search, ChevronDown, ChevronUp, ArrowLeftRight } from 'lucide-react';
+import { Download, Filter, Search, ChevronDown, ChevronUp, ArrowLeftRight, Activity } from 'lucide-react';
 import { Badge, Button, Input } from '@/components/ui/Primitives';
 import { HermesShell, MetricCard, SectionCard } from '@/components/hermes-shell';
 import { HermesInstanceBadge } from '@/components/hermes-instance-switcher';
@ -17,6 +17,16 @@ import {
  type HermesTaskSource,
  type HermesTask,
 } from '@/lib/hermes';
+import {
+  collectBackupEntries,
+  collectCronEntries,
+  collectSessionEntries,
+  collectWatchdogAlerts,
+  emptyTelemetryState,
+  loadAllHermesTelemetry,
+  telemetryForFilter,
+  type HermesTelemetryState,
+} from '@/lib/hermes-telemetry-client';

 const statuses: Array<HermesTaskStatus | 'all'> = ['all', 'queued', 'running', 'blocked', 'completed', 'failed', 'skipped', 'cancelled'];
 const priorities: Array<HermesPriority | 'all'> = ['all', 'P0', 'P1', 'P2', 'P3'];
@ -50,6 +60,8 @@ export default function HermesTaskLedgerPage() {
  const [sort, setSort] = useState<(typeof sortOptions)[number]>('newest');
  const [page, setPage] = useState(1);
  const [expandedTaskId, setExpandedTaskId] = useState<string | null>(null);
+  const [telemetry, setTelemetry] = useState<HermesTelemetryState>(emptyTelemetryState);
+  const [telemetryError, setTelemetryError] = useState<string | null>(null);

  const { selectedInstance } = useHermesInstance();
  const tasks = useMemo(
@ -67,6 +79,68 @@ export default function HermesTaskLedgerPage() {
  }), [tasks]);

  const visibleProducts = hermesProducts.slice(0, 20);
+  const liveSnapshots = useMemo(() => telemetryForFilter(telemetry, selectedInstance), [telemetry, selectedInstance]);
+  const liveCron = useMemo(() => collectCronEntries(telemetry, selectedInstance), [telemetry, selectedInstance]);
+  const liveAlerts = useMemo(() => collectWatchdogAlerts(telemetry, selectedInstance), [telemetry, selectedInstance]);
+  const liveBackups = useMemo(() => collectBackupEntries(telemetry, selectedInstance), [telemetry, selectedInstance]);
+  const liveSessions = useMemo(() => collectSessionEntries(telemetry, selectedInstance), [telemetry, selectedInstance]);
+  const liveActivityRows = useMemo(() => [
+    ...liveSessions.map((entry) => ({
+      id: `session-${entry.instanceId}-${entry.id}`,
+      instanceId: entry.instanceId,
+      kind: 'session',
+      title: entry.displayName ? `${entry.displayName} session` : entry.sessionKey,
+      detail: entry.resumePending ? 'resume pending' : entry.suspended ? 'suspended' : entry.platform ?? 'session',
+      time: entry.updatedAt ?? entry.createdAt,
+      tone: entry.resumePending || entry.suspended ? 'warning' as const : 'info' as const,
+    })),
+    ...liveCron.map((entry) => ({
+      id: `cron-${entry.instanceId}-${entry.id}`,
+      instanceId: entry.instanceId,
+      kind: 'cron',
+      title: entry.name,
+      detail: entry.lastStatus ?? entry.schedule ?? 'Hermes cron entry',
+      time: entry.nextRun ?? entry.lastRun,
+      tone: entry.active ? 'success' as const : 'neutral' as const,
+    })),
+    ...liveAlerts.map((alert) => ({
+      id: `alert-${alert.instanceId}-${alert.timestamp}-${alert.message}`,
+      instanceId: alert.instanceId,
+      kind: 'alert',
+      title: alert.message,
+      detail: alert.severity,
+      time: alert.timestamp,
+      tone: alert.severity === 'critical' ? 'error' as const : alert.severity === 'warn' ? 'warning' as const : 'info' as const,
+    })),
+    ...liveBackups.map((entry) => ({
+      id: `backup-${entry.instanceId}-${entry.sha}`,
+      instanceId: entry.instanceId,
+      kind: 'backup',
+      title: entry.subject,
+      detail: entry.sha.slice(0, 8),
+      time: entry.committedAt,
+      tone: 'success' as const,
+    })),
+  ].sort((a, b) => new Date(b.time ?? 0).getTime() - new Date(a.time ?? 0).getTime()).slice(0, 12), [liveSessions, liveCron, liveAlerts, liveBackups]);
+
+  useEffect(() => {
+    let active = true;
+    const load = async () => {
+      try {
+        const next = await loadAllHermesTelemetry();
+        if (!active) return;
+        setTelemetry(next);
+        setTelemetryError(null);
+      } catch (err) {
+        if (!active) return;
+        setTelemetryError(err instanceof Error ? err.message : String(err));
+      }
+    };
+    void load();
+    return () => {
+      active = false;
+    };
+  }, []);

  return (
    <HermesShell
@ -86,6 +160,68 @@ export default function HermesTaskLedgerPage() {
        <MetricCard label="Failed" value={counts.failed} tone="danger" />
      </section>

+      <SectionCard
+        title="Live Hermes activity ledger"
+        subtitle="Real cron entries, watchdog alerts, backup commits, and session totals from the telemetry endpoint. The task table below remains the planner-style seed ledger until Hermes emits task-level events."
+        actions={<Badge variant={telemetryError ? 'error' : 'success'}>{telemetryError ? 'Telemetry unavailable' : 'Live telemetry'}</Badge>}
+      >
+        {telemetryError ? (
+          <p className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4 text-sm text-[var(--bl-warning)]">
+            Could not load telemetry: {telemetryError}
+          </p>
+        ) : (
+          <div className="grid gap-4 xl:grid-cols-[1fr_2fr]">
+            <div className="grid gap-3">
+              {liveSnapshots.map((snapshot) => (
+                <div key={snapshot.instanceId} className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
+                  <div className="flex items-center justify-between gap-3">
+                    <HermesInstanceBadge instanceId={snapshot.instanceId} />
+                    <Badge variant={snapshot.sessions.status === 'up' ? 'success' : 'warning'}>{snapshot.sessions.status}</Badge>
+                  </div>
+                  <p className="mt-3 text-2xl font-semibold text-[var(--bl-text-primary)]">{snapshot.sessions.totalSessions}</p>
+                  <p className="text-sm text-[var(--bl-text-secondary)]">{snapshot.sessions.totalMessages} session messages observed</p>
+                </div>
+              ))}
+            </div>
+            <div className="overflow-hidden rounded-2xl border border-[var(--bl-border)]">
+              <table className="min-w-full divide-y divide-[var(--bl-border)] text-left text-sm">
+                <thead className="bg-[var(--bl-surface-muted)] text-xs uppercase tracking-[0.18em] text-[var(--bl-text-tertiary)]">
+                  <tr>
+                    <th className="px-4 py-3">Artifact</th>
+                    <th className="px-4 py-3">Instance</th>
+                    <th className="px-4 py-3">Signal</th>
+                    <th className="px-4 py-3">Time</th>
+                  </tr>
+                </thead>
+                <tbody className="divide-y divide-[var(--bl-border)] bg-[var(--bl-surface-card)]">
+                  {liveActivityRows.map((row) => (
+                    <tr key={row.id}>
+                      <td className="px-4 py-4">
+                        <div className="flex items-start gap-2">
+                          <Activity className="mt-0.5 h-4 w-4 text-[var(--bl-text-tertiary)]" />
+                          <div>
+                            <p className="font-medium text-[var(--bl-text-primary)]">{row.title}</p>
+                            <p className="text-xs text-[var(--bl-text-secondary)]">{row.kind}</p>
+                          </div>
+                        </div>
+                      </td>
+                      <td className="px-4 py-4"><HermesInstanceBadge instanceId={row.instanceId} /></td>
+                      <td className="px-4 py-4"><Badge variant={row.tone}>{row.detail}</Badge></td>
+                      <td className="px-4 py-4 text-[var(--bl-text-secondary)]">{row.time ? prettyDate(row.time) : '—'}</td>
+                    </tr>
+                  ))}
+                  {liveActivityRows.length === 0 ? (
+                    <tr>
+                      <td colSpan={4} className="px-4 py-10 text-center text-[var(--bl-text-secondary)]">No live activity artifacts were returned for the current instance filter.</td>
+                    </tr>
+                  ) : null}
+                </tbody>
+              </table>
+            </div>
+          </div>
+        )}
+      </SectionCard>
+
      <SectionCard title="Filters" subtitle="Find work by status, product, priority, type, source, or age.">
        <div className="grid gap-3 lg:grid-cols-4 xl:grid-cols-7">
          <Input value={query} onChange={(event) => { setQuery(event.target.value); setPage(1); }} placeholder="Search tasks..." aria-label="Search tasks" className="xl:col-span-2" />
--- a/dashboard/web/src/lib/api.ts
+++ b/dashboard/web/src/lib/api.ts
@ -130,6 +130,43 @@ export interface HermesSessionStats {
  status: HermesProbeStatus;
 }

+export interface HermesSessionEntry {
+  id: string;
+  sessionKey: string;
+  platform: string | null;
+  chatType: string | null;
+  displayName: string | null;
+  createdAt: string | null;
+  updatedAt: string | null;
+  suspended: boolean;
+  resumePending: boolean;
+  totalTokens: number | null;
+  estimatedCostUsd: number | null;
+}
+
+export interface HermesSessionList {
+  entries: HermesSessionEntry[];
+  status: HermesProbeStatus;
+}
+
+export interface HermesSessionEvent {
+  id: string;
+  sessionFile: string;
+  timestamp: string | null;
+  role: string | null;
+  eventType: 'message' | 'tool-call' | 'tool-result' | 'reasoning' | 'system' | 'unknown';
+  summary: string;
+  toolNames: string[];
+  itemTypes: string[];
+  status: string | null;
+}
+
+export interface HermesSessionEventList {
+  entries: HermesSessionEvent[];
+  status: HermesProbeStatus;
+  sourceCount: number;
+}
+
 export interface HermesCronEntry {
  id: string;
  name: string;
@ -201,6 +238,8 @@ export interface HermesTelemetrySnapshot {
  cached: boolean;
  instanceId: 'vijay' | 'bheem';
  sessions: HermesSessionStats;
+  sessionList: HermesSessionList;
+  sessionEvents: HermesSessionEventList;
  cron: HermesCronList;
  memory: HermesMemoryList;
  skills: HermesSkillList;
--- a/dashboard/web/src/lib/hermes-telemetry-client.ts
+++ b/dashboard/web/src/lib/hermes-telemetry-client.ts
@ -0,0 +1,54 @@
+import { api, type HermesTelemetrySnapshot, type HermesWatchdogAlert } from '@/lib/api';
+import type { HermesInstanceId, HermesInstanceFilter } from '@/lib/hermes';
+
+export type HermesTelemetryState = Record<HermesInstanceId, HermesTelemetrySnapshot | null>;
+
+export const emptyTelemetryState: HermesTelemetryState = { vijay: null, bheem: null };
+
+export async function loadAllHermesTelemetry(): Promise<HermesTelemetryState> {
+  const [vijay, bheem] = await Promise.all([
+    api.getHermesTelemetry('vijay'),
+    api.getHermesTelemetry('bheem'),
+  ]);
+  return { vijay, bheem };
+}
+
+export function telemetryForFilter(
+  telemetry: HermesTelemetryState,
+  selectedInstance: HermesInstanceFilter,
+): HermesTelemetrySnapshot[] {
+  if (selectedInstance === 'all') return [telemetry.vijay, telemetry.bheem].filter(Boolean) as HermesTelemetrySnapshot[];
+  return telemetry[selectedInstance] ? [telemetry[selectedInstance]] : [];
+}
+
+export function collectWatchdogAlerts(
+  telemetry: HermesTelemetryState,
+  selectedInstance: HermesInstanceFilter,
+): Array<HermesWatchdogAlert & { instanceId: HermesInstanceId }> {
+  return telemetryForFilter(telemetry, selectedInstance)
+    .flatMap((snapshot) => snapshot.watchdog.alerts.map((alert) => ({ ...alert, instanceId: snapshot.instanceId })))
+    .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime());
+}
+
+export function collectBackupEntries(telemetry: HermesTelemetryState, selectedInstance: HermesInstanceFilter) {
+  return telemetryForFilter(telemetry, selectedInstance)
+    .flatMap((snapshot) => snapshot.backupHistory.entries.map((entry) => ({ ...entry, instanceId: snapshot.instanceId })))
+    .sort((a, b) => new Date(b.committedAt).getTime() - new Date(a.committedAt).getTime());
+}
+
+export function collectCronEntries(telemetry: HermesTelemetryState, selectedInstance: HermesInstanceFilter) {
+  return telemetryForFilter(telemetry, selectedInstance)
+    .flatMap((snapshot) => snapshot.cron.entries.map((entry) => ({ ...entry, instanceId: snapshot.instanceId })));
+}
+
+export function collectSessionEntries(telemetry: HermesTelemetryState, selectedInstance: HermesInstanceFilter) {
+  return telemetryForFilter(telemetry, selectedInstance)
+    .flatMap((snapshot) => snapshot.sessionList.entries.map((entry) => ({ ...entry, instanceId: snapshot.instanceId })))
+    .sort((a, b) => new Date(b.updatedAt ?? b.createdAt ?? 0).getTime() - new Date(a.updatedAt ?? a.createdAt ?? 0).getTime());
+}
+
+export function collectSessionEvents(telemetry: HermesTelemetryState, selectedInstance: HermesInstanceFilter) {
+  return telemetryForFilter(telemetry, selectedInstance)
+    .flatMap((snapshot) => snapshot.sessionEvents.entries.map((entry) => ({ ...entry, instanceId: snapshot.instanceId })))
+    .sort((a, b) => new Date(b.timestamp ?? 0).getTime() - new Date(a.timestamp ?? 0).getTime());
+}
--- a/docs/app-url-bookmarks.md
+++ b/docs/app-url-bookmarks.md
@ -0,0 +1,98 @@
+# ByteLyst App URL Bookmarks
+
+**Owner:** ByteLyst DevOps
+**Last updated:** 2026-05-31T08:14:55+00:00
+**Source of truth for bookmarks:** this file
+**Exposure/security companion:** [`docs/vm-exposure-inventory.md`](vm-exposure-inventory.md)
+
+Use this as the living bookmark/reference list for deployed apps, dashboards,
+APIs, and private admin surfaces. When a new app is deployed, add it here in
+the same change that adds its Caddy route, Compose service, or systemd unit.
+
+`Last deployed / restarted` means the latest timestamp we have evidence for.
+For Docker services it is the container `StartedAt` timestamp from
+`docker inspect`; for systemd services it is the service active-since timestamp.
+If the deploy time is not known, use `unknown` and update it during the next
+verified deploy.
+
+## Update Checklist
+
+When deploying or changing an app:
+
+1. Add or update the row in this file.
+2. Update `Last deployed / restarted` with an exact UTC timestamp.
+3. Record the repo/service owner and access model.
+4. If exposure changes, also update [`docs/vm-exposure-inventory.md`](vm-exposure-inventory.md).
+5. If it is a DevOps dashboard endpoint, also update [`dashboard/ENDPOINTS.md`](../dashboard/ENDPOINTS.md).
+
+## Primary Dashboards
+
+| Name | URL | Access | Backend/API | Runtime owner | Last deployed / restarted | Notes |
+| --- | --- | --- | --- | --- | --- | --- |
+| DevOps custom dashboard | `https://devops.bytelyst.com` | private-admin/auth | `https://api.bytelyst.com/devops` | `dashboard/docker-compose.yml` (`devops-web`, `devops-backend`) | `2026-05-31T04:02:24Z` web, `2026-05-31T04:02:23Z` backend | Unified ByteLyst DevOps dashboard. Hermes Mission Control lives under `/hermes`. |
+| DevOps Tailscale entry | `https://srv1491630.tailf85608.ts.net/login` | Tailscale/private-admin/auth | `http://127.0.0.1:4004` | Tailscale serve -> `localhost:3049` | `2026-05-31T04:02:24Z` | Private login path used for VM-side dashboard review. |
+| Platform admin dashboard | `https://admin.bytelyst.com` | private-admin/auth | `https://api.bytelyst.com/platform/api` | common platform `admin-web` | `unknown` | Caddy route is documented; container was not present in the 2026-05-27 exposure inventory. Verify before relying on it. |
+| Hermes Mission Control | `https://devops.bytelyst.com/hermes` | private-admin/auth | `https://api.bytelyst.com/devops/api/hermes/*` | DevOps custom dashboard | `2026-05-31T04:02:24Z` | Unified custom Hermes dashboard over Vijay/root and Bheem/Uma. |
+| Hermes native Vijay dashboard | `http://100.87.53.10:9119/` | Tailscale-only/private-admin | native Hermes service | `hermes-root-dashboard.service` | `2026-05-31T04:02:20Z` | Built-in Hermes dashboard for root/Vijay. No public Caddy route. |
+| Hermes native Bheem dashboard | `http://100.87.53.10:9120/` | Tailscale-only/private-admin | native Hermes service | `uma-hermes-dashboard.service` | `2026-05-31T04:02:20Z` | Built-in Hermes dashboard for Uma/Bheem. No public Caddy route. |
+| LLM Lab dashboard | `https://llmlab.bytelyst.com` | private-admin | local/dashboard service | common platform `llmlab-dashboard` | `2026-05-31T04:02:24Z` | Keep private/auth-gated. Local host port `127.0.0.1:3075`. |
+
+## Public Apps
+
+| App | Public URL | API URL | Runtime owner | Last deployed / restarted | Notes |
+| --- | --- | --- | --- | --- | --- |
+| InvtTrdg | `https://invttrdg.bytelyst.com` | `https://api.bytelyst.com/invttrdg/*` | `/opt/bytelyst/learning_ai_invt_trdg` | `unknown` | Exposure inventory maps web to `:3085` and backend to `:4025`. |
+| Clock / Chronomind | `https://clock.bytelyst.com` | `https://api.bytelyst.com/chronomind/*` | `/opt/bytelyst/learning_ai_clock` | `2026-05-31T04:02:24Z` web/backend | Local web `127.0.0.1:3030`, backend `127.0.0.1:4011`. |
+| Notes / Notelett | `https://notes.bytelyst.com` | `https://api.bytelyst.com/notelett/*` | `/opt/bytelyst/learning_ai_notes` | `2026-05-31T04:02:23Z` web, `2026-05-31T04:02:24Z` backend | Local web `127.0.0.1:3000`, backend `127.0.0.1:4016`. |
+| Tracker | `https://tracker.bytelyst.com` | n/a | `/opt/bytelyst/learning_ai_common_plat` | `2026-05-31T04:02:24Z` | Local web `127.0.0.1:3003`. |
+| PeakPulse | n/a | `https://api.bytelyst.com/peakpulse/*` | `/opt/bytelyst/learning_ai_common_plat` | `2026-05-31T04:02:24Z` platform stack | Backend is Docker-internal `peakpulse-backend:4010`. |
+| Jarvis Jr | n/a | `https://api.bytelyst.com/jarvisjr/*` | `/opt/bytelyst/learning_ai_common_plat` | `2026-05-31T04:02:23Z` web, `2026-05-31T04:02:24Z` platform stack | Local web `127.0.0.1:3035`, backend Docker-internal `jarvisjr-backend:4012`. |
+| Nomgap | Vercel / external | `https://api.bytelyst.com/nomgap/*` | `/opt/bytelyst/learning_ai_common_plat` | `2026-05-31T04:02:24Z` platform stack | Old local `nomgap-web` was retired; backend remains Docker-internal. |
+| Mindlyst | n/a | `https://api.bytelyst.com/mindlyst/*` | `/opt/bytelyst/learning_ai_common_plat` | `2026-05-31T04:02:24Z` web/platform stack | Local web `127.0.0.1:3050`, backend Docker-internal `mindlyst-backend:4014`. |
+| LysnrAI | n/a | `https://api.bytelyst.com/lysnrai/*` | `/opt/bytelyst/learning_ai_common_plat` | `2026-05-31T04:02:23Z` dashboard, `2026-05-31T04:02:24Z` platform stack | Local dashboard `127.0.0.1:3002`, backend Docker-internal `lysnrai-backend:4015`. |
+| Flowmonk | n/a | `https://api.bytelyst.com/flowmonk/*` | `/opt/bytelyst/learning_ai_common_plat` | `2026-05-31T04:02:24Z` web/platform stack | Local web `127.0.0.1:3040`, backend Docker-internal `flowmonk-backend:4017`. |
+| ActionTrail | n/a | `https://api.bytelyst.com/actiontrail/*` | `/opt/bytelyst/learning_ai_common_plat` | `2026-05-31T04:02:23Z` web, `2026-05-31T04:02:24Z` platform stack | Local web `127.0.0.1:3060`; exposure inventory notes route/backend mapping needs verification. |
+| LocalMemGPT | n/a | `https://api.bytelyst.com/localmemgpt/*` | `/opt/bytelyst/learning_ai_common_plat` | `2026-05-31T04:02:24Z` web/platform stack | Local web `127.0.0.1:3070`, backend Docker-internal `localmemgpt-backend:4019`. |
+
+## Shared APIs And Infrastructure
+
+| Service | URL | Access | Runtime owner | Last deployed / restarted | Notes |
+| --- | --- | --- | --- | --- | --- |
+| API gateway | `https://api.bytelyst.com` | public gateway | Caddy/common platform | `2026-05-31T04:02:24Z` caddy | Routes app APIs by path. |
+| Platform API | `https://api.bytelyst.com/platform/api` | public/auth-required | common platform `platform-service` | `2026-05-31T04:02:24Z` | Auth and platform data API. |
+| Extraction API | `https://api.bytelyst.com/extraction/*` | public/API-controlled | common platform `extraction-service` | `2026-05-31T04:02:23Z` | Confirm auth posture before exposing new consumers. |
+| MCP API | `https://api.bytelyst.com/mcp/*` | public/API-controlled | common platform `mcp-server` | `2026-05-31T04:02:23Z` | Confirm public need before widening access. |
+| Gitea | `https://gitea.bytelyst.com` | public/admin-auth | `gitea-npm-registry` | `2026-05-31T04:02:23Z` | Local direct registry also listens on `:3300`; see registry docs. |
+| Gitea npm registry | `http://localhost:3300/api/packages/bytelyst/npm/` | VM/local or tunnel | `gitea-npm-registry` | `2026-05-31T04:02:23Z` | Do not use from laptop unless tunneled. See [`docs/gitea-registry-and-package-resolution.md`](gitea-registry-and-package-resolution.md). |
+| Ollama endpoint | `https://ollama.bytelyst.com` | private-admin target | host `ollama` service | `unknown` | Must not be unauthenticated public. |
+| Mailpit UI | `http://127.0.0.1:8025` | loopback-only | common platform `mailpit` | `2026-05-31T04:02:23Z` | Dev/test mail UI. |
+| Loki | `http://127.0.0.1:3100` | loopback-only | common platform `loki` | `2026-05-31T04:02:24Z` | Observability internal. |
+| Cosmos emulator UI | `http://127.0.0.1:1234` / `http://127.0.0.1:8081` | loopback-only | common platform `cosmos-emulator` | `2026-05-31T04:02:23Z` | Dev/test only; current production data uses real Cosmos for platform. |
+| Azurite | `http://127.0.0.1:10000` | loopback-only target | common platform `azurite` | `2026-05-31T04:02:24Z` | Check exposure inventory before relying on external access. |
+
+## Local Host Ports
+
+These are operational shortcuts, not public bookmarks.
+
+| Service | Local URL | Public/private equivalent |
+| --- | --- | --- |
+| DevOps web container | `http://127.0.0.1:3049` | `https://devops.bytelyst.com` or Tailscale URL |
+| DevOps backend health | `http://127.0.0.1:4004/health` | `https://api.bytelyst.com/devops/health` if routed |
+| Platform service health | `http://127.0.0.1:4003/health` | `https://api.bytelyst.com/platform/api` |
+| Clock web | `http://127.0.0.1:3030` | `https://clock.bytelyst.com` |
+| Notes web | `http://127.0.0.1:3000` | `https://notes.bytelyst.com` |
+| InvtTrdg web | `http://127.0.0.1:3085` | `https://invttrdg.bytelyst.com` |
+| Tracker web | `http://127.0.0.1:3003` | `https://tracker.bytelyst.com` |
+| Hermes Vijay dashboard | `http://100.87.53.10:9119/` | Tailscale-only |
+| Hermes Bheem dashboard | `http://100.87.53.10:9120/` | Tailscale-only |
+
+## Open Verification Items
+
+- Confirm whether `admin.bytelyst.com` is currently backed by a running
+  `admin-web` container.
+- Confirm product-facing public URLs for apps listed as `n/a` before sharing
+  them outside the admin team.
+- Confirm `actiontrail` API route/container port mapping; historical inventory
+  used `api.bytelyst.com/actiontrail/*` while current container metadata shows
+  `actiontrail-backend` as part of the common platform stack.
+- Replace any `unknown` deploy timestamp during the next verified deploy.
--- a/docs/hermes-operations.md
+++ b/docs/hermes-operations.md
@ -37,6 +37,20 @@ Observed on 2026-05-27:

 Before adding any new Caddy hostname, Docker port, or dashboard/API feature, verify that it is not a Hermes dashboard/API public exposure.

+Session privacy policy for dashboard/telemetry surfaces:
+
+- Treat gateway session content as private by default for both Vijay and Bheem.
+- Dashboard routes may show counts, statuses, timestamps, IDs, sanitized warning
+  messages, cron names, skill/memory names, and backup commit subjects.
+- Dashboard telemetry may show sanitized session JSONL event projections:
+  event type, role, timestamp, source filename, tool names, item types, and
+  status. Raw message content remains redacted before it reaches the UI.
+- Dashboard routes must not expose raw prompts, full session transcripts, raw
+  command output containing secrets, `.env` values, OAuth payloads, raw
+  `state.db`, Telegram tokens, provider keys, or personal message content.
+- If a future session-event pipeline is added, enable secret and PII redaction
+  at ingestion time and store only the redacted event projection used by the UI.
+
 ```bash
 # Inspect public Caddy routes and obvious Hermes/API/dashboard references.
 docker ps --format '{{.Names}} {{.Ports}}' | grep -i caddy || true
@ -85,6 +99,60 @@ systemd/hermes-root-backup.service
 systemd/hermes-root-backup.timer
 systemd/uma-hermes-backup.service
 systemd/uma-hermes-backup.timer
+systemd/hermes-health-watchdog.service
+systemd/hermes-health-watchdog.timer
+systemd/uma-hermes-health-watchdog.service
+systemd/uma-hermes-health-watchdog.timer
+systemd/hermes-ops-exporter.service
+systemd/hermes-ops-exporter.timer
+systemd/uma-hermes-ops-exporter.service
+systemd/uma-hermes-ops-exporter.timer
+```
+
+## Mission Control ops exporter
+
+Mission Control can read a sanitized per-instance ops export before falling back
+to live cross-user probes. This reduces brittle root-to-Uma inspection and keeps
+the dashboard contract free of secrets or session content.
+
+Tracked exporter:
+
+```bash
+scripts/hermes-ops-exporter.py
+```
+
+Output paths:
+
+```text
+/root/.hermes/ops-export.json
+/home/uma/.hermes/ops-export.json
+```
+
+The JSON contains only service booleans/status, timer timestamps, short Git
+metadata, restore counts, and whether a Google token file exists. It does not
+include token values, raw `state.db`, logs, prompt/session text, OAuth payloads,
+or environment files.
+
+Install root exporter:
+
+```bash
+cp systemd/hermes-ops-exporter.service /etc/systemd/system/hermes-ops-exporter.service
+cp systemd/hermes-ops-exporter.timer /etc/systemd/system/hermes-ops-exporter.timer
+systemctl daemon-reload
+systemctl enable --now hermes-ops-exporter.timer
+systemctl status hermes-ops-exporter.timer --no-pager
+```
+
+Install Uma exporter as user systemd:
+
+```bash
+install -d -o uma -g uma /home/uma/.config/systemd/user
+cp systemd/uma-hermes-ops-exporter.service /home/uma/.config/systemd/user/uma-hermes-ops-exporter.service
+cp systemd/uma-hermes-ops-exporter.timer /home/uma/.config/systemd/user/uma-hermes-ops-exporter.timer
+chown uma:uma /home/uma/.config/systemd/user/uma-hermes-ops-exporter.*
+runuser -u uma -- systemctl --user daemon-reload
+runuser -u uma -- systemctl --user enable --now uma-hermes-ops-exporter.timer
+runuser -u uma -- systemctl --user status uma-hermes-ops-exporter.timer --no-pager
 ```

 ## Health baseline commands
@ -164,6 +232,48 @@ python3 ~/.hermes/scripts/hermes_health_watchdog.py
 # Healthy output should be empty.
 ```

+Tracked systemd watchdog timers:
+
+```bash
+systemctl status hermes-health-watchdog.timer --no-pager
+systemctl --user --machine=uma@.host status uma-hermes-health-watchdog.timer --no-pager
+tail -n 20 /root/.hermes/logs/hermes-health-watchdog.log
+tail -n 20 /home/uma/.hermes/logs/hermes-health-watchdog.log
+```
+
+Dashboard warning bridge:
+
+```bash
+/var/log/hermes-dashboard-warnings.log
+```
+
+The dashboard backend appends deduplicated warning lines there when
+`HERMES_DASHBOARD_ALERT_LOG` is configured. Both watchdogs tail the same file
+and route by `instance=vijay`, `instance=bheem`, or `instance=all`.
+Telegram delivery is attempted only when `~<user>/.config/hermes/telegram`
+exists with `BOT_TOKEN=`/`CHAT_ID=` or `TELEGRAM_BOT_TOKEN=`/`TELEGRAM_CHAT_ID=`.
+If that file is absent, the watchdog still writes a local warning log line and
+records `Telegram delivery skipped or failed`.
+
+2026-05-31 Telegram delivery validation:
+
+- `instance=bheem` synthetic warning: consumed only by Uma watchdog; root log
+  had zero matches; Telegram delivery succeeded.
+- `instance=vijay` synthetic warning: consumed only by root watchdog; Uma log
+  had zero matches; Telegram delivery succeeded.
+- `instance=all` synthetic warning: consumed by both watchdogs; Telegram
+  delivery succeeded for both chats.
+- Recovery messages: after each alert, the next healthy watchdog pass sent
+  `recovery: back to healthy` and logged `Telegram recovery delivery succeeded`.
+- Approval prompt/media validation: root and Uma bots returned Telegram `200`
+  for harmless inline-button prompt delivery and small document upload.
+- Approval callback execution evidence: live gateway logs contain real
+  `Telegram button resolved 1 approval(s)` entries for root through
+  2026-05-30, including a deny choice, and for Uma on 2026-05-25. Telegram's
+  Bot API cannot synthesize user callback clicks, so callback execution proof
+  comes from these receiver logs plus source review of the Telegram callback
+  handler.
+
 Persistent backup timers:

 ```bash
@ -424,9 +534,33 @@ alerts today) follow a small set of conventions worth keeping consistent.
  (✅ approve / ❌ deny). The dashboard does not yet trigger these — see the
  Phase 8 delegation brief in `docs/prompts/phase8-telegram-loop.md` for the
  design that closes the loop end-to-end.
+- 2026-05-31 delivery smoke test: root and Uma bots both returned Telegram
+  `200` for a harmless inline-button approval prompt. Callback handling was not
+  exercised because that requires a human button press and an action receiver.
+
+**Media/file delivery**
+- 2026-05-31 delivery smoke test: root and Uma bots both returned Telegram
+  `200` for a small text document upload.

 **Don't paste secrets**
 - Bot tokens and chat IDs live in `~<user>/.config/hermes/telegram` mode `600`,
  never in repo files. The dashboard's `lib/logger.ts` redacts
  `Authorization` / `Cookie` / `*.token` paths from any logged object so an
  accidental `req.log.info({ tg })` won't dump credentials.
+
+## Token audit status
+
+Checked on 2026-05-31 without printing token values:
+
+- Gitea package tokens exist at `/opt/bytelyst/.gitea_token`,
+  `/root/.gitea_npm_token`, and `/root/.gitea_npm_token_home`, mode `600`.
+  They can read package metadata from the local Gitea npm registry and receive
+  `403` from `/api/v1/user`, which is consistent with package-only/no-profile
+  scope.
+- Root GitHub credentials exist in `/root/.git-credentials`. GitHub API scope
+  headers report `gist, read:org, repo, workflow`; this is broader than the
+  desired least-privilege backup scope.
+- No Uma-owned GitHub token file was found under `/home/uma` during the metadata
+  scan, and the active `uma-hermes-backup.service` still runs as root. Keep the
+  existing backup path running until a fine-grained Uma-owned token is provided,
+  then migrate Bheem self-push and re-audit.
--- a/docs/hermes_dashboard_v2_roadmap.md
+++ b/docs/hermes_dashboard_v2_roadmap.md
@ -87,7 +87,7 @@ The `hermes-ops` snapshot becomes the single source of truth for live status. Be
 - [x] Stop swallowing every failure to `null` indiscriminately: distinguish "unit inactive" from "probe failed/timed out" and surface per-field status so the UI can show *unknown* vs *down*.
 - [x] Add Zod validation + a stable typed contract for `HermesOpsSnapshot` on the route.
 - [x] **Add unit tests for the `hermes-ops` repository** (mock `execFile`/fs) — closes the REVIEW_ACTIONS "only `services` has tests" gap for this module.
- [ ] Read Bheem/Uma state via a **self-reporting ops exporter** (Decision #2): a read-only `uma` user-systemd timer writes a sanitized JSON snapshot to a known path; the root backend reads + aggregates it (Vijay gets a symmetric exporter). Interim stopgap until it ships: `runuser -u uma -- systemctl --user is-active/is-enabled` instead of the `ps`/`existsSync` checks.
+- [x] Read Bheem/Uma state via a **self-reporting ops exporter** (Decision #2): a read-only `uma` user-systemd timer writes a sanitized JSON snapshot to a known path; the root backend reads + aggregates it (Vijay gets a symmetric exporter). *(Repo implementation complete 2026-05-31: new `scripts/hermes-ops-exporter.py`, root/Uma systemd timer templates, and backend support for `/root/.hermes/ops-export.json` + `/home/uma/.hermes/ops-export.json` with live probe fallback. VM enablement still belongs to Phase 4 verification.)*

 ## Phase 2 — Instance dimension across Mission Control (G2)

@ -107,9 +107,9 @@ Define the ingestion contract first, then convert panes. Keep any pane with no r
  - [x] Memory + skills inventory (`hermes memory list --json`, `hermes skills list --json`).
  - [x] Watchdog alerts feed (tails `~/.hermes/logs/hermes-health-watchdog.log`, severity-bucketed `info`/`warn`/`critical`).
  - [x] Backup history (`git -C <repo> log` — last 20 commits per backup repo).
- [ ] Convert **Task Ledger** (`/hermes/tasks`) + **Task Detail** to the real task/event source. *(Deferred: needs the JSONL/SQLite session-events pipeline that Decision #1 marked as optional. Task Ledger remains seed-data; flip when a real source ships.)*
+- [~] Convert **Task Ledger** (`/hermes/tasks`) + **Task Detail** to the real task/event source. *(Advanced 2026-05-31: telemetry now reads real `sessions/sessions.json` indexes plus sanitized Hermes session JSONL events per instance. Task Detail renders a live Hermes event timeline with message content redacted at the backend. The planner-style task table remains seed-data until Hermes emits a durable task-id/task-state ledger rather than only session events.)*
 - [~] Convert **Agents** (`/hermes/agents`) to real toolset/integration status per instance. *(Partial: `/hermes/agents` now renders a "Memory & Skills inventory (live)" SectionCard backed by the Phase 3 telemetry endpoint per instance — `hermes memory list` / `hermes skills list` rendered with per-section probe-status badges, item counts, and the first N entries each. Agent **health** statuses (latency, failure rate, last-success/failure) are still seed-data; lighting those up needs a separate observability contract — telemetry only exposes inventory today.)*
- [ ] Convert **History** (`/hermes/history`) to real session/cron/backup trends. *(Deferred: depends on real session timeseries.)*
+- [~] Convert **History** (`/hermes/history`) to real session/cron/backup trends. *(Advanced 2026-05-31: History now renders live sanitized session JSONL events, session index entries, cron count, watchdog alert count, backup commit count, and a live artifact timeline from telemetry. The weekly chart/failure categories remain seed trend models until Hermes emits an aggregate durable analytics timeseries.)*
 - [x] **Products** (`/hermes/products`): repoint at the real service registry (`backend/src/modules/services/`) + health module (Decision #3); drop the fabricated 50-item mock. Optional manual entries for not-yet-deployed products come later. *(Page rewritten: top "Live services" section sources from `api.getServices()` joined with `api.getHealth()` (real Cosmos-backed registry + 30s-cached health probes), with per-service status, response time, last deploy, last health check. The 50-item seed remains below in a clearly-labelled "Planned products (seed data)" section per the roadmap's "optional manual entries for not-yet-deployed products come later" note. New E2E mocks for `/api/services` + `/api/health` keep the suite deterministic.)*

 ## Phase 4 — Bheem/Uma parity so the dashboard shows two equal instances (G7)
@ -118,11 +118,11 @@ This is the biggest operational asymmetry and the reason half the ops-panel warn

 > **VM ops, not codebase work.** This phase requires sudo on the Hostinger VM, Uma-owned GitHub credentials, and Telegram bot tokens — none of it is editable in this repo. The full delegation brief is in [`docs/prompts/phase4-bheem-uma-parity.md`](./prompts/phase4-bheem-uma-parity.md). When the brief's Definition-of-Done is met, tick the boxes below and the summary line at the bottom of this file.

- [ ] Stand up a **Uma persistent backup repo + `uma-hermes-backup.timer`** mirroring the root design (sanitized `hermes_persistent_backup/`, secrets and `state.db` excluded), pushing to `umadev0931/uma_hostinger_hermes_vm` **with a Uma-owned, repo-scoped token (Bheem self-pushes; root no longer pushes Uma's backup — Decision #5)**.
- [ ] Install a **Uma health watchdog** (mirror `scripts/hermes-health-watchdog.py`), silent-on-success, alerting Uma's Telegram.
+- [~] Stand up a **Uma persistent backup repo + `uma-hermes-backup.timer`** mirroring the root design (sanitized `hermes_persistent_backup/`, secrets and `state.db` excluded), pushing to `umadev0931/uma_hostinger_hermes_vm` **with a Uma-owned, repo-scoped token (Bheem self-pushes; root no longer pushes Uma's backup — Decision #5)**. *(Live read-only check 2026-05-31: `uma-hermes-backup.timer` is active, repo HEAD is `a4828db`, repo status is clean, and `/home/uma/.hermes/google_token.json` exists. Still needs explicit token-scope/ownership audit before marking fully complete.)*
+- [~] Install a **Uma health watchdog** (mirror `scripts/hermes-health-watchdog.py`), silent-on-success, alerting Uma's Telegram. *(Installed 2026-05-31 as `uma-hermes-health-watchdog.timer`; `/home/uma/.hermes/logs/hermes-health-watchdog.log` now exists and reports healthy after fixing user-systemd gateway probing. Telegram delivery is wired but not fully validated because `/home/uma/.config/hermes/telegram` is absent.)*
 - [ ] Run the **first Uma restore rehearsal** into a temporary `HERMES_HOME`; document in `docs/hermes-operations.md` / `docs/hermes-disaster-recovery.md`.
 - [ ] Schedule a **quarterly Uma restore-drill reminder** (parity with root).
- [ ] Confirm these close the corresponding Bheem warnings emitted by `getHermesOpsSnapshot()` (backup timer active, repo HEAD readable + clean, Google token present).
+- [~] Confirm these close the corresponding Bheem warnings emitted by `getHermesOpsSnapshot()` (backup timer active, repo HEAD readable + clean, Google token present). *(Partial live evidence 2026-05-31: backup timer active, repo HEAD readable/clean, Google token present, and Uma watchdog log now exists. Still open for Telegram credential validation + Uma-owned token migration.)*

 ## Phase 5 — Dashboard app hardening (G5)

@ -141,21 +141,21 @@ This is the biggest operational asymmetry and the reason half the ops-panel warn
 - [x] Deep links from the ops panel → Task Ledger filtered to the relevant instance/most-recent work. *(Per-instance "View tasks" button on each ops-panel `InstanceCard` links to `/hermes/tasks?instance=<id>`. `HermesInstanceProvider` now hydrates from the `?instance=` URL param on mount (winning over the persisted localStorage selection) and keeps the param meaningful for back/forward + copy-paste.)*
 - [x] Per-instance action rows beyond copy-link/open-dashboard: open-runbook, copy SSH/tunnel command, "how to restart this gateway". *(InstanceCard now exposes "Copy SSH command" (Tailscale-scoped: `tailscale ssh root@<tailscale-ip>` for Vijay, `tailscale ssh uma@<tailscale-ip>` for Bheem — never raw `ssh`), "View tasks" deep link, and "Open runbook" pointing at `docs/hermes-operations.md`. "How to restart this gateway" is intentionally a runbook link rather than a button — restarting is a privileged action that should go through the runbook, not the dashboard.)*
 - [x] Optional dark/light theme toggle if the shell supports it. *(`components/theme-toggle.tsx` Sun/Moon button mounted in the Hermes layout next to the instance switcher. Persists in localStorage `bytelyst.theme.v1`; an inline FOUC-prevention script in the root layout reads the same key and applies `data-theme` to `<html>` before React hydrates so the first paint matches the user's last choice. The design system already had `[data-theme="light"]` overrides in `styles/tokens.css`; the toggle just flips them on.)*
- [ ] Unified alerts feed across both instances on the overview. *(Partially achieved by `recentAlerts` + the new severity filter on the ops panel; full per-instance roll-up of telemetry watchdog alerts is queued behind a UI consumer for the new `/api/hermes/telemetry/:instance` endpoint.)*
+- [x] Unified alerts feed across both instances on the overview. *(Completed 2026-05-31: `/hermes` now renders "Unified live alerts" from both telemetry endpoints, filtered by the global instance switcher, with watchdog alerts, session totals, cron entries, and backup commits.)*

 ## Phase 7 — Security & access (G8)

 - [x] Require authentication on the DevOps dashboard's hermes routes/endpoints (reuse platform-service auth already used elsewhere). *(Both `/api/hermes/ops` and the new `/api/hermes/telemetry/:instance` now gate on `requireAdmin`. Privilege-surface table in `dashboard/DEPLOYMENT.md` updated to match. The previous "read-only ops snapshot, no auth" carve-out is gone — all Hermes routes are admin-only.)*
- [ ] Decide and document `security.redact_secrets` and `privacy.redact_pii` for gateway sessions (per instance). *(Deferred — needs a founder decision on PII handling for session content; not a code-only change.)*
- [ ] Finish the GitHub/Gitea **least-privilege token audit** (root currently pushes both repos) and rotate any migrated/exposed credentials — completed naturally by Decision #5 (Bheem self-pushes with its own scoped token). *(Resolves naturally when Phase 4 ships — see the Phase 4 delegation brief.)*
+- [x] Decide and document `security.redact_secrets` and `privacy.redact_pii` for gateway sessions (per instance). *(Documented 2026-05-31 in `docs/hermes-operations.md`: dashboard surfaces may expose only redacted projections such as counts/status/timestamps/sanitized warnings/cron names/backup subjects; raw prompts, transcripts, command output with secrets, `.env`, OAuth payloads, `state.db`, Telegram/provider tokens, and personal message content are prohibited. Any future event pipeline must redact at ingestion.)*
+- [~] Finish the GitHub/Gitea **least-privilege token audit** (root currently pushes both repos) and rotate any migrated/exposed credentials — completed naturally by Decision #5 (Bheem self-pushes with its own scoped token). *(Audited 2026-05-31 without printing tokens: Gitea package tokens can read package metadata and get `403` from `/api/v1/user`; root GitHub token reports broad scopes `gist, read:org, repo, workflow`; no Uma-owned GitHub token file was found, and active `uma-hermes-backup.service` still runs as root. Rotation/migration requires a fine-grained Uma-owned token.)*
 - [x] Keep all hermes data private-only; never expose the `hermes-ops` snapshot or task data on a public route. *(Verified: no Caddy/public route added; the dashboard is bound to `127.0.0.1` and reached via Tailscale or SSH tunnel only — see `dashboard/DEPLOYMENT.md` "Ports — quick reference" + "Privilege Surface" sections. With this commit's `requireAdmin` change, even an attacker with internal network access still needs a valid admin JWT to read the ops snapshot.)*

 ## Phase 8 — Notifications & Telegram loop (G9)

 > **Mostly VM ops + bot-token configuration**, with two small backend hooks. Full delegation brief in [`docs/prompts/phase8-telegram-loop.md`](./prompts/phase8-telegram-loop.md). The dashboard's documentation half is already done — see `docs/hermes-operations.md` "Telegram Notification Convention".

- [ ] Push new dashboard-detected warnings to the correct Telegram (Vijay → root chat, Bheem → Uma chat), reusing the watchdog delivery path; silent on healthy. *(Design captured in the brief: `lib/dashboard-alerts.ts` writes new warnings to a tag-prefixed log; both watchdogs tail it. Implementation gated on Phase 4 (Uma watchdog must exist first) and on bot tokens.)*
- [ ] Validate the Telegram approval-prompt flow and media/file delivery end-to-end (the two unchecked v1 items). *(Brief item 3.)*
+- [x] Push new dashboard-detected warnings to the correct Telegram (Vijay → root chat, Bheem → Uma chat), reusing the watchdog delivery path; silent on healthy. *(Validated 2026-05-31: `instance=bheem` warning was consumed only by Uma watchdog and delivered to Telegram; `instance=vijay` only by root; `instance=all` by both. Follow-up healthy pass sent Telegram recovery messages for both instances.)*
+- [x] Validate the Telegram approval-prompt flow and media/file delivery end-to-end (the two unchecked v1 items). *(Validated 2026-05-31: root and Uma bots returned Telegram `200` for harmless inline-button approval prompt delivery and small document upload. Existing live gateway logs also prove real inline approval callback execution: root recorded multiple `Telegram button resolved 1 approval(s)` entries through 2026-05-30, including `choice=deny`; Uma recorded `Telegram button resolved 1 approval(s)` entries on 2026-05-25. Bot API cannot synthesize user callback clicks, so this status is based on live receiver logs plus source review of the callback handler.)*
 - [x] Preserve the numbered-emoji progress convention (`1️⃣`, `2️⃣`, …) for completion updates. *(Codified in `docs/hermes-operations.md` under a new "Telegram Notification Convention" section, alongside the routing-per-instance, silent-on-healthy, and never-paste-secrets rules. The brief references this as the source of truth so VM-side implementers stay consistent.)*

 ---
@ -182,25 +182,25 @@ export interface HermesInstanceRef {
 This roadmap is complete when:

 - [ ] The overview, ledger, agents, and history panes render **real data for both Vijay and Bheem**, filterable by instance; only panes without a real source remain (clearly labeled) seed data.
- [ ] `hermes-ops` is cached, uses robust Uma user-systemd checks, distinguishes unknown vs down, and has unit tests.
+- [x] `hermes-ops` is cached, uses robust Uma user-systemd checks, distinguishes unknown vs down, and has unit tests.
 - [ ] Bheem has a persistent backup repo + timer, a watchdog, and one completed restore rehearsal — and the dashboard shows **2/2 healthy** with zero standing Bheem warnings.
 - [ ] CI is green on the correct path, lint is real, and coverage includes auth/csrf/orchestrator/health/hermes-ops.
- [ ] Hermes routes require auth and remain private-only; redact policies are decided and documented.
- [ ] Dashboard warnings reach the correct Telegram chat per instance.
+- [ ] Hermes routes require auth and remain private-only; redact policies are decided and documented. *(Auth/private-only/redaction are complete; still open only because the GitHub/Gitea least-privilege token audit remains tied to Phase 4.)*
+- [x] Dashboard warnings reach the correct Telegram chat per instance.

 ## Implementation Status Checklist

 Update only with evidence (source review, tests, build output, or browser/VM verification).

 - [x] Phase 0 — Guardrails reconfirmed (2026-05-30 pass; remains "must hold throughout")
- [x] Phase 1 — `hermes-ops` hardened + tested
+- [x] Phase 1 — `hermes-ops` hardened + tested, including sanitized ops-export support
 - [x] Phase 2 — Instance dimension + switcher
 - [x] Phase 3 — Real telemetry ingestion + Products pane converted (Task Ledger / Agents / History deferred — depend on JSONL session pipeline, see Phase 3 notes)
 - [ ] Phase 4 — Bheem/Uma parity (backup, watchdog, restore drill)
 - [x] Phase 5 — App/CI hardening (P0/P1/P2 done; P2 follow-ups in DEPLOYMENT.md mitigation roadmap remain)
- [x] Phase 6 — UX polish (severity tags + deep links + per-instance actions; trend cards + theme toggle deferred)
- [x] Phase 7 — Security & access (auth on hermes routes + privacy stance documented; redact_secrets/redact_pii decision deferred)
- [ ] Phase 8 — Notifications & Telegram (convention codified; delivery loop is VM ops, see brief)
+- [x] Phase 6 — UX polish (severity tags + deep links + per-instance actions; trend cards + theme toggle + unified live alerts)
+- [x] Phase 7 — Security & access (auth on hermes routes + privacy stance documented; token audit remains tied to Phase 4)
+- [x] Phase 8 — Notifications & Telegram (warning routing, recovery messages, media delivery, and approval callback evidence validated 2026-05-31)

 ## Decisions (resolved 2026-05-30)

--- a/docs/operations.md
+++ b/docs/operations.md
@ -4,6 +4,11 @@ Common operational paths for the team.

 Use this file as the routing guide. For the exact support boundary, cross-check `docs/supported-scripts.md`.

+For app/dashboard bookmarks and deployment URL references, use
+[`docs/app-url-bookmarks.md`](app-url-bookmarks.md). Keep that file updated
+whenever a new app URL, dashboard URL, API route, or last deploy timestamp
+changes.
+
 ---

 ## Hostinger VM Maintenance
--- a/scripts/hermes-health-watchdog.py
+++ b/scripts/hermes-health-watchdog.py
@ -12,12 +12,21 @@ import subprocess
 import sys
 from datetime import datetime, timezone
 from pathlib import Path
+from urllib.parse import urlencode
+from urllib.request import Request, urlopen

 DISK_WARN_PERCENT = int(os.getenv("HERMES_WATCHDOG_DISK_WARN_PERCENT", "85"))
 MEMORY_WARN_PERCENT = int(os.getenv("HERMES_WATCHDOG_MEMORY_WARN_PERCENT", "90"))
 BACKUP_STALE_MINUTES = int(os.getenv("HERMES_WATCHDOG_BACKUP_STALE_MINUTES", "90"))
 BACKUP_JOB_NAME = os.getenv("HERMES_WATCHDOG_BACKUP_JOB_NAME", "Sync Hermes persistent-data backup to GitHub")
 GATEWAY_SERVICE = os.getenv("HERMES_WATCHDOG_GATEWAY_SERVICE", "hermes-gateway.service")
+SYSTEMD_SCOPE = os.getenv("HERMES_WATCHDOG_SYSTEMD_SCOPE", "system")
+INSTANCE_ID = os.getenv("HERMES_WATCHDOG_INSTANCE", "vijay")
+TELEGRAM_CONFIG = Path(os.getenv("HERMES_WATCHDOG_TELEGRAM_CONFIG", str(Path.home() / ".config/hermes/telegram")))
+WATCHDOG_LOG = Path(os.getenv("HERMES_WATCHDOG_LOG_PATH", str(Path.home() / ".hermes/logs/hermes-health-watchdog.log")))
+DASHBOARD_ALERT_LOG = Path(os.getenv("HERMES_DASHBOARD_ALERT_LOG", "/var/log/hermes-dashboard-warnings.log"))
+DASHBOARD_ALERT_STATE = Path(os.getenv("HERMES_DASHBOARD_ALERT_STATE", str(Path.home() / ".hermes/logs/dashboard-alerts.offset")))
+ALERT_STATE = Path(os.getenv("HERMES_WATCHDOG_ALERT_STATE", str(Path.home() / ".hermes/logs/watchdog-alert-active")))
 DOCKER_CONTAINERS = [
    item.strip()
    for item in os.getenv("HERMES_WATCHDOG_DOCKER_CONTAINERS", "caddy,gitea-npm-registry").split(",")
@ -30,13 +39,99 @@ def run(cmd: list[str], timeout: int = 20) -> subprocess.CompletedProcess[str]:
    return subprocess.run(cmd, text=True, capture_output=True, timeout=timeout, check=False)


+def utc_now() -> str:
+    return datetime.now(timezone.utc).isoformat(timespec="seconds")
+
+
+def append_watchdog_log(severity: str, message: str) -> None:
+    WATCHDOG_LOG.parent.mkdir(parents=True, exist_ok=True)
+    with WATCHDOG_LOG.open("a", encoding="utf-8") as fh:
+        fh.write(f"{utc_now()} {severity.upper()} {message}\n")
+
+
+def read_key_file(path: Path) -> dict[str, str]:
+    values: dict[str, str] = {}
+    try:
+        for line in path.read_text(encoding="utf-8").splitlines():
+            key, sep, value = line.partition("=")
+            if sep and key.strip() and value.strip():
+                values[key.strip()] = value.strip()
+    except FileNotFoundError:
+        return {}
+    return values
+
+
+def telegram_credentials() -> tuple[str | None, str | None]:
+    values = read_key_file(TELEGRAM_CONFIG)
+    token = values.get("BOT_TOKEN") or values.get("TELEGRAM_BOT_TOKEN")
+    chat_id = values.get("CHAT_ID") or values.get("TELEGRAM_CHAT_ID")
+    return token, chat_id
+
+
+def send_telegram(message: str) -> bool:
+    token, chat_id = telegram_credentials()
+    if not token or not chat_id:
+        return False
+    data = urlencode({"chat_id": chat_id, "text": message}).encode("utf-8")
+    req = Request(f"https://api.telegram.org/bot{token}/sendMessage", data=data, method="POST")
+    try:
+        with urlopen(req, timeout=10) as response:  # noqa: S310 - token-protected Telegram API endpoint.
+            return 200 <= response.status < 300
+    except Exception:
+        return False
+
+
+def mark_alert_active() -> None:
+    ALERT_STATE.parent.mkdir(parents=True, exist_ok=True)
+    ALERT_STATE.write_text(utc_now(), encoding="utf-8")
+
+
+def clear_alert_active() -> bool:
+    if not ALERT_STATE.exists():
+        return False
+    try:
+        ALERT_STATE.unlink()
+    except FileNotFoundError:
+        return False
+    return True
+
+
+def read_dashboard_alerts() -> list[str]:
+    if not DASHBOARD_ALERT_LOG.exists():
+        return []
+    try:
+        previous = int(DASHBOARD_ALERT_STATE.read_text(encoding="utf-8").strip() or "0")
+    except Exception:
+        previous = 0
+    try:
+        size = DASHBOARD_ALERT_LOG.stat().st_size
+        start = previous if previous <= size else 0
+        with DASHBOARD_ALERT_LOG.open("r", encoding="utf-8") as fh:
+            fh.seek(start)
+            lines = [line.strip() for line in fh if line.strip()]
+            offset = fh.tell()
+        DASHBOARD_ALERT_STATE.parent.mkdir(parents=True, exist_ok=True)
+        DASHBOARD_ALERT_STATE.write_text(str(offset), encoding="utf-8")
+    except Exception:
+        return []
+
+    routed: list[str] = []
+    for line in lines:
+        if f"instance={INSTANCE_ID}" in line or "instance=all" in line:
+            routed.append(line)
+    return routed
+
+
 def check_gateway(alerts: list[str]) -> None:
-    result = run(["systemctl", "is-active", GATEWAY_SERVICE])
+    cmd = ["systemctl", "--user", "is-active", GATEWAY_SERVICE] if SYSTEMD_SCOPE == "user" else ["systemctl", "is-active", GATEWAY_SERVICE]
+    result = run(cmd)
    if result.stdout.strip() != "active":
        alerts.append(f"gateway service `{GATEWAY_SERVICE}` is not active: `{result.stdout.strip() or result.stderr.strip() or 'unknown'}`")


 def check_backup_cron(alerts: list[str]) -> None:
+    if not BACKUP_JOB_NAME:
+        return
    result = run(["hermes", "cron", "list"], timeout=30)
    out = result.stdout + result.stderr
    if result.returncode != 0:
@ -126,16 +221,32 @@ def main() -> int:
            check(alerts)
        except Exception as exc:  # noqa: BLE001 - watchdog should alert, not crash silently
            alerts.append(f"{check.__name__} errored: {exc}")
+    alerts.extend(f"dashboard alert: {line}" for line in read_dashboard_alerts())

    if alerts:
-        print("🚨 ByteLyst Hermes watchdog alert")
+        header = f"ByteLyst Hermes watchdog alert ({INSTANCE_ID})"
+        append_watchdog_log("WARNING", header)
+        print("🚨 " + header)
        for item in alerts:
+            append_watchdog_log("WARNING", item)
            print(f"- {item}")
-        print(
+        footer = (
            "\nSuggested first checks: `systemctl status hermes-gateway --no-pager`, "
            "`hermes cron list`, `df -h /`, `free -h`, `docker ps`."
        )
+        print(footer)
+        sent = send_telegram("🚨 " + header + "\n" + "\n".join(f"- {item}" for item in alerts) + footer)
+        append_watchdog_log("INFO" if sent else "WARNING", "Telegram delivery succeeded" if sent else "Telegram delivery skipped or failed")
+        mark_alert_active()
        return 0
+    recovered = clear_alert_active()
+    if recovered:
+        message = f"✅ ByteLyst Hermes watchdog recovery ({INSTANCE_ID})\nBack to healthy."
+        sent = send_telegram(message)
+        append_watchdog_log("INFO", "recovery: back to healthy")
+        append_watchdog_log("INFO" if sent else "WARNING", "Telegram recovery delivery succeeded" if sent else "Telegram recovery delivery skipped or failed")
+    else:
+        append_watchdog_log("INFO", "healthy")
    return 0


--- a/scripts/hermes-ops-exporter.py
+++ b/scripts/hermes-ops-exporter.py
@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+"""Write a sanitized Hermes ops snapshot for the unified dashboard.
+
+Run this as the Hermes instance owner (root for Vijay, uma for Bheem). It
+writes booleans, counts, timestamps, and short Git metadata only. It never
+copies tokens, state.db, logs, prompts, session content, or environment files.
+"""
+from __future__ import annotations
+
+import json
+import os
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import Any
+
+
+HERMES_HOME = Path(os.getenv("HERMES_HOME", str(Path.home() / ".hermes")))
+OUTPUT_PATH = Path(os.getenv("HERMES_OPS_EXPORT_PATH", str(HERMES_HOME / "ops-export.json")))
+GATEWAY_SERVICE = os.getenv("HERMES_GATEWAY_SERVICE", "hermes-gateway.service")
+DASHBOARD_SERVICE = os.getenv("HERMES_DASHBOARD_SERVICE", "hermes-root-dashboard.service")
+BACKUP_TIMER = os.getenv("HERMES_BACKUP_TIMER", "hermes-root-backup.timer")
+BACKUP_REPO = Path(os.getenv("HERMES_BACKUP_REPO", str(Path.home() / "repos" / "bytelyst_hostinger_hermes_vm")))
+
+
+def run(cmd: list[str], cwd: Path | None = None, timeout: int = 10) -> tuple[bool, str]:
+    try:
+        result = subprocess.run(cmd, cwd=cwd, text=True, capture_output=True, timeout=timeout, check=False)
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        return False, ""
+    return True, result.stdout.strip()
+
+
+def probe_active(unit: str) -> dict[str, Any]:
+    ran, out = run(["systemctl", "--user", "is-active", unit])
+    if not ran:
+        ran, out = run(["systemctl", "is-active", unit])
+    active = out == "active"
+    return {"active": active, "status": "up" if active else "down" if ran else "unknown"}
+
+
+def probe_enabled(unit: str) -> bool:
+    ran, out = run(["systemctl", "--user", "is-enabled", unit])
+    if not ran:
+        ran, out = run(["systemctl", "is-enabled", unit])
+    return ran and out == "enabled"
+
+
+def probe_timer(name: str) -> dict[str, Any]:
+    active = probe_active(name)
+    ran, out = run([
+        "systemctl",
+        "--user",
+        "show",
+        name,
+        "-p",
+        "NextElapseUSecRealtime",
+        "-p",
+        "LastTriggerUSec",
+        "--no-pager",
+    ])
+    if not ran:
+        ran, out = run([
+            "systemctl",
+            "show",
+            name,
+            "-p",
+            "NextElapseUSecRealtime",
+            "-p",
+            "LastTriggerUSec",
+            "--no-pager",
+        ])
+    props: dict[str, str | None] = {}
+    for line in out.splitlines() if ran else []:
+        key, _, value = line.partition("=")
+        props[key] = value or None
+    return {
+        "name": name,
+        "active": active["active"],
+        "status": active["status"],
+        "nextRun": props.get("NextElapseUSecRealtime"),
+        "lastRun": props.get("LastTriggerUSec"),
+    }
+
+
+def probe_repo(path: Path) -> dict[str, Any]:
+    ran_head, head = run(["git", "rev-parse", "--short", "HEAD"], cwd=path)
+    ran_branch, branch = run(["git", "branch", "--show-current"], cwd=path)
+    ran_status, status = run(["git", "status", "--porcelain"], cwd=path)
+    ran_commit, last_commit = run(["git", "log", "-1", "--format=%cI"], cwd=path)
+    return {
+        "path": str(path),
+        "branch": branch if ran_branch and branch else None,
+        "clean": ran_status and status == "",
+        "head": head if ran_head and head else None,
+        "lastCommitAt": last_commit if ran_commit and last_commit else None,
+        "size": None,
+        "status": "up" if ran_head else "unknown",
+    }
+
+
+def restore_stats(path: Path) -> dict[str, int | None]:
+    try:
+        manifest = json.loads((path / "hermes_persistent_backup" / "MANIFEST.json").read_text(encoding="utf-8"))
+        files = manifest.get("files")
+        file_count = len(files) if isinstance(files, list) else None
+    except Exception:
+        file_count = None
+    try:
+        jobs = json.loads((path / "hermes_persistent_backup" / "cron" / "jobs.json").read_text(encoding="utf-8"))
+        cron_jobs = jobs.get("jobs") if isinstance(jobs, dict) else jobs
+        cron_count = len(cron_jobs) if isinstance(cron_jobs, list) else None
+    except Exception:
+        cron_count = None
+    return {"restoredFileCount": file_count, "restoredCronJobs": cron_count}
+
+
+def write_atomic(path: Path, payload: dict[str, Any]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with tempfile.NamedTemporaryFile("w", encoding="utf-8", dir=path.parent, delete=False) as tmp:
+        json.dump(payload, tmp, indent=2, sort_keys=True)
+        tmp.write("\n")
+        tmp_path = Path(tmp.name)
+    tmp_path.replace(path)
+    path.chmod(0o644)
+
+
+def main() -> int:
+    payload: dict[str, Any] = {
+        "generatedAt": subprocess.check_output(["date", "-u", "+%Y-%m-%dT%H:%M:%SZ"], text=True).strip(),
+        "gateway": {**probe_active(GATEWAY_SERVICE), "enabled": probe_enabled(GATEWAY_SERVICE)},
+        "dashboard": probe_active(DASHBOARD_SERVICE),
+        "backupTimer": probe_timer(BACKUP_TIMER),
+        "repo": probe_repo(BACKUP_REPO),
+        "googleWorkspaceToken": (HERMES_HOME / "google_token.json").is_file(),
+    }
+    payload.update(restore_stats(BACKUP_REPO))
+    write_atomic(OUTPUT_PATH, payload)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/systemd/hermes-health-watchdog.service
+++ b/systemd/hermes-health-watchdog.service
@ -0,0 +1,15 @@
+[Unit]
+Description=Run Vijay Hermes health watchdog
+
+[Service]
+Type=oneshot
+Environment=HERMES_HOME=/root/.hermes
+Environment=HERMES_WATCHDOG_INSTANCE=vijay
+Environment=HERMES_WATCHDOG_GATEWAY_SERVICE=hermes-gateway.service
+Environment=HERMES_WATCHDOG_BACKUP_REPO=/root/repos/bytelyst_hostinger_hermes_vm
+Environment=HERMES_WATCHDOG_LOG_PATH=/root/.hermes/logs/hermes-health-watchdog.log
+Environment=HERMES_WATCHDOG_TELEGRAM_CONFIG=/root/.config/hermes/telegram
+Environment=HERMES_DASHBOARD_ALERT_LOG=/var/log/hermes-dashboard-warnings.log
+Environment=HERMES_DASHBOARD_ALERT_STATE=/root/.hermes/logs/dashboard-alerts.offset
+Environment=HERMES_WATCHDOG_ALERT_STATE=/root/.hermes/logs/watchdog-alert-active
+ExecStart=/root/.hermes/scripts/hermes_health_watchdog.py
--- a/systemd/hermes-health-watchdog.timer
+++ b/systemd/hermes-health-watchdog.timer
@ -0,0 +1,11 @@
+[Unit]
+Description=Run Vijay Hermes health watchdog every 5 minutes
+
+[Timer]
+OnBootSec=2min
+OnUnitActiveSec=5min
+AccuracySec=30s
+Unit=hermes-health-watchdog.service
+
+[Install]
+WantedBy=timers.target
--- a/systemd/hermes-ops-exporter.service
+++ b/systemd/hermes-ops-exporter.service
@ -0,0 +1,12 @@
+[Unit]
+Description=Export sanitized Hermes ops state for Mission Control
+
+[Service]
+Type=oneshot
+Environment=HERMES_HOME=/root/.hermes
+Environment=HERMES_OPS_EXPORT_PATH=/root/.hermes/ops-export.json
+Environment=HERMES_GATEWAY_SERVICE=hermes-gateway.service
+Environment=HERMES_DASHBOARD_SERVICE=hermes-root-dashboard.service
+Environment=HERMES_BACKUP_TIMER=hermes-root-backup.timer
+Environment=HERMES_BACKUP_REPO=/root/repos/bytelyst_hostinger_hermes_vm
+ExecStart=/opt/bytelyst/learning_ai_devops_tools/scripts/hermes-ops-exporter.py
--- a/systemd/hermes-ops-exporter.timer
+++ b/systemd/hermes-ops-exporter.timer
@ -0,0 +1,11 @@
+[Unit]
+Description=Refresh sanitized Hermes ops export every minute
+
+[Timer]
+OnBootSec=1min
+OnUnitActiveSec=1min
+AccuracySec=15s
+Unit=hermes-ops-exporter.service
+
+[Install]
+WantedBy=timers.target
--- a/systemd/uma-hermes-health-watchdog.service
+++ b/systemd/uma-hermes-health-watchdog.service
@ -0,0 +1,18 @@
+[Unit]
+Description=Run Bheem/Uma Hermes health watchdog
+
+[Service]
+Type=oneshot
+Environment=HERMES_HOME=/home/uma/.hermes
+Environment=HERMES_WATCHDOG_INSTANCE=bheem
+Environment=HERMES_WATCHDOG_GATEWAY_SERVICE=uma-hermes-gateway.service
+Environment=HERMES_WATCHDOG_SYSTEMD_SCOPE=user
+Environment=HERMES_WATCHDOG_BACKUP_JOB_NAME=
+Environment=HERMES_WATCHDOG_BACKUP_REPO=/home/uma/repos/uma_hostinger_hermes_vm
+Environment=HERMES_WATCHDOG_LOG_PATH=/home/uma/.hermes/logs/hermes-health-watchdog.log
+Environment=HERMES_WATCHDOG_TELEGRAM_CONFIG=/home/uma/.config/hermes/telegram
+Environment=HERMES_DASHBOARD_ALERT_LOG=/var/log/hermes-dashboard-warnings.log
+Environment=HERMES_DASHBOARD_ALERT_STATE=/home/uma/.hermes/logs/dashboard-alerts.offset
+Environment=HERMES_WATCHDOG_ALERT_STATE=/home/uma/.hermes/logs/watchdog-alert-active
+Environment=HERMES_WATCHDOG_DOCKER_CONTAINERS=
+ExecStart=/home/uma/.hermes/scripts/hermes_health_watchdog.py
--- a/systemd/uma-hermes-health-watchdog.timer
+++ b/systemd/uma-hermes-health-watchdog.timer
@ -0,0 +1,11 @@
+[Unit]
+Description=Run Bheem/Uma Hermes health watchdog every 5 minutes
+
+[Timer]
+OnBootSec=2min
+OnUnitActiveSec=5min
+AccuracySec=30s
+Unit=uma-hermes-health-watchdog.service
+
+[Install]
+WantedBy=timers.target
--- a/systemd/uma-hermes-ops-exporter.service
+++ b/systemd/uma-hermes-ops-exporter.service
@ -0,0 +1,12 @@
+[Unit]
+Description=Export sanitized Uma Hermes ops state for Mission Control
+
+[Service]
+Type=oneshot
+Environment=HERMES_HOME=/home/uma/.hermes
+Environment=HERMES_OPS_EXPORT_PATH=/home/uma/.hermes/ops-export.json
+Environment=HERMES_GATEWAY_SERVICE=uma-hermes-gateway.service
+Environment=HERMES_DASHBOARD_SERVICE=uma-hermes-dashboard.service
+Environment=HERMES_BACKUP_TIMER=uma-hermes-backup.timer
+Environment=HERMES_BACKUP_REPO=/home/uma/repos/uma_hostinger_hermes_vm
+ExecStart=/opt/bytelyst/learning_ai_devops_tools/scripts/hermes-ops-exporter.py
--- a/systemd/uma-hermes-ops-exporter.timer
+++ b/systemd/uma-hermes-ops-exporter.timer
@ -0,0 +1,11 @@
+[Unit]
+Description=Refresh sanitized Uma Hermes ops export every minute
+
+[Timer]
+OnBootSec=1min
+OnUnitActiveSec=1min
+AccuracySec=15s
+Unit=uma-hermes-ops-exporter.service
+
+[Install]
+WantedBy=timers.target