feat(dashboard): Phase 1 — harden hermes-ops backend + tests
- Short-TTL (30s) snapshot cache + in-flight coalescing so the panel poll and
concurrent refreshes don't fan out ~20 systemctl/git/ps/du subprocesses each
time; snapshot carries a `cached` flag and `getHermesOpsSnapshot({force})`.
- Distinguish "unit inactive" (down) from "probe couldn't run" (unknown): a new
exec() wrapper reports whether the command actually ran (ENOENT/timeout =
unknown) vs exited non-zero with output (e.g. systemctl is-active -> inactive).
Per-field ProbeStatus on gateway/dashboard/timer/repo; warnings differentiate
"is not active" from "status could not be determined".
- Robust Bheem/Uma checks: `runuser -u uma -- systemctl --user is-active/
is-enabled` with a ps / existsSync fallback so a failed probe degrades to the
legacy check instead of a false "down".
- Zod schema (HermesOpsSnapshotSchema) as the stable typed contract; the route
validates output before sending. New status fields are additive (active/
enabled/url/etc. preserved) so the existing web client is unaffected.
- Unit tests (mock execFile/fs): healthy snapshot, down vs unknown mapping,
runuser->ps fallback, unreadable repo, cache hit + force bypass, request
coalescing. Backend: 16 tests green.
Roadmap: check off Phase 1 items and Phase 5 P0 in hermes_dashboard_v2_roadmap.md.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
3ee4e7104e
commit
cf5428acd1
186
dashboard/backend/src/modules/hermes-ops/hermes-ops.test.ts
Normal file
186
dashboard/backend/src/modules/hermes-ops/hermes-ops.test.ts
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||||
|
import { HermesOpsSnapshotSchema } from './types.js';
|
||||||
|
|
||||||
|
// --- Mocks for all I/O the repository performs ---------------------------------
|
||||||
|
const execFileMock = vi.hoisted(() => vi.fn());
|
||||||
|
vi.mock('child_process', () => ({ execFile: execFileMock }));
|
||||||
|
|
||||||
|
const readFileMock = vi.hoisted(() => vi.fn());
|
||||||
|
const statMock = vi.hoisted(() => vi.fn());
|
||||||
|
vi.mock('fs/promises', () => ({ readFile: readFileMock, stat: statMock }));
|
||||||
|
|
||||||
|
const existsSyncMock = vi.hoisted(() => vi.fn());
|
||||||
|
vi.mock('fs', () => ({ existsSync: existsSyncMock }));
|
||||||
|
|
||||||
|
const { getHermesOpsSnapshot, clearHermesOpsCache } = await import('./repository.js');
|
||||||
|
|
||||||
|
type CmdResult = { stdout?: string; error?: Error & { code?: string | number; killed?: boolean; stdout?: string } };
|
||||||
|
type Handler = (command: string, args: string[]) => CmdResult;
|
||||||
|
|
||||||
|
// promisify(execFile) calls execFile(cmd, args, options, callback) and resolves
|
||||||
|
// with the callback's second arg, or rejects with the first.
|
||||||
|
function setExec(handler: Handler) {
|
||||||
|
execFileMock.mockImplementation(
|
||||||
|
(command: string, args: string[], _opts: unknown, cb: (err: unknown, result?: { stdout: string }) => void) => {
|
||||||
|
const res = handler(command, args);
|
||||||
|
if (res.error) cb(res.error);
|
||||||
|
else cb(null, { stdout: res.stdout ?? '' });
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// A fully-healthy fleet: every probe succeeds and reports good state.
|
||||||
|
function healthyHandler(): Handler {
|
||||||
|
return (command, args) => {
|
||||||
|
if (command === 'systemctl') {
|
||||||
|
if (args[0] === 'is-active') return { stdout: 'active\n' };
|
||||||
|
if (args[0] === 'is-enabled') return { stdout: 'enabled\n' };
|
||||||
|
if (args[0] === 'show') {
|
||||||
|
return { stdout: 'NextElapseUSecRealtime=Sat 2026-05-31 02:00:00\nLastTriggerUSec=Fri 2026-05-30 02:00:00\n' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (command === 'runuser') {
|
||||||
|
// -u uma -- systemctl --user is-active|is-enabled ...
|
||||||
|
if (args.includes('is-active')) return { stdout: 'active\n' };
|
||||||
|
if (args.includes('is-enabled')) return { stdout: 'enabled\n' };
|
||||||
|
}
|
||||||
|
if (command === 'git') {
|
||||||
|
if (args[0] === 'branch') return { stdout: 'main\n' };
|
||||||
|
if (args[0] === 'status') return { stdout: '' };
|
||||||
|
if (args[0] === 'rev-parse') return { stdout: 'abc1234\n' };
|
||||||
|
if (args[0] === 'log') return { stdout: '2026-05-30T02:00:00+00:00\n' };
|
||||||
|
}
|
||||||
|
if (command === 'du') return { stdout: '12M\t.git\n' };
|
||||||
|
if (command === 'tailscale') return { stdout: '100.87.53.10\n' };
|
||||||
|
if (command === 'ps') return { stdout: '' };
|
||||||
|
return { stdout: '' };
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function inactiveError(stdout: string): CmdResult {
|
||||||
|
// systemctl is-active for an inactive unit: exit 3, prints "inactive".
|
||||||
|
const error = Object.assign(new Error('exit 3'), { code: 3, stdout });
|
||||||
|
return { error };
|
||||||
|
}
|
||||||
|
|
||||||
|
function enoentError(): CmdResult {
|
||||||
|
return { error: Object.assign(new Error('not found'), { code: 'ENOENT' }) };
|
||||||
|
}
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
clearHermesOpsCache();
|
||||||
|
// Default fs: manifests/jobs readable, google token present, emergency token present.
|
||||||
|
readFileMock.mockImplementation(async (p: string) => {
|
||||||
|
if (p.endsWith('MANIFEST.json')) return JSON.stringify({ files: [1, 2, 3] });
|
||||||
|
if (p.endsWith('jobs.json')) return JSON.stringify({ jobs: [{ id: 'a' }, { id: 'b' }] });
|
||||||
|
throw new Error('no such file');
|
||||||
|
});
|
||||||
|
statMock.mockResolvedValue({ isFile: () => true, size: 500 });
|
||||||
|
existsSyncMock.mockReturnValue(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('hermes-ops repository', () => {
|
||||||
|
it('produces a schema-valid, fully-healthy snapshot with no warnings', async () => {
|
||||||
|
setExec(healthyHandler());
|
||||||
|
const snapshot = await getHermesOpsSnapshot({ force: true });
|
||||||
|
|
||||||
|
expect(() => HermesOpsSnapshotSchema.parse(snapshot)).not.toThrow();
|
||||||
|
expect(snapshot.cached).toBe(false);
|
||||||
|
expect(snapshot.instances).toHaveLength(2);
|
||||||
|
for (const inst of snapshot.instances) {
|
||||||
|
expect(inst.gateway.status).toBe('up');
|
||||||
|
expect(inst.gateway.active).toBe(true);
|
||||||
|
expect(inst.gateway.enabled).toBe(true);
|
||||||
|
expect(inst.dashboard.status).toBe('up');
|
||||||
|
expect(inst.backup.timer.status).toBe('up');
|
||||||
|
expect(inst.backup.repo.status).toBe('up');
|
||||||
|
expect(inst.backup.repo.clean).toBe(true);
|
||||||
|
}
|
||||||
|
expect(snapshot.warnings).toHaveLength(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('maps a confirmed-inactive unit to status "down" with a warning', async () => {
|
||||||
|
setExec((command, args) => {
|
||||||
|
if (command === 'systemctl' && args[0] === 'is-active' && args[1] === 'hermes-gateway.service') {
|
||||||
|
return inactiveError('inactive\n');
|
||||||
|
}
|
||||||
|
return healthyHandler()(command, args);
|
||||||
|
});
|
||||||
|
|
||||||
|
const snapshot = await getHermesOpsSnapshot({ force: true });
|
||||||
|
const vijay = snapshot.instances.find((i) => i.id === 'vijay')!;
|
||||||
|
expect(vijay.gateway.status).toBe('down');
|
||||||
|
expect(vijay.gateway.active).toBe(false);
|
||||||
|
expect(snapshot.warnings).toContain('Vijay / root gateway is not active');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('maps an un-runnable probe to status "unknown" (not a false "down")', async () => {
|
||||||
|
setExec((command, args) => {
|
||||||
|
if (command === 'systemctl' && args[0] === 'is-active' && args[1] === 'hermes-root-dashboard.service') {
|
||||||
|
return enoentError();
|
||||||
|
}
|
||||||
|
return healthyHandler()(command, args);
|
||||||
|
});
|
||||||
|
|
||||||
|
const snapshot = await getHermesOpsSnapshot({ force: true });
|
||||||
|
const vijay = snapshot.instances.find((i) => i.id === 'vijay')!;
|
||||||
|
expect(vijay.dashboard.status).toBe('unknown');
|
||||||
|
expect(snapshot.warnings).toContain('Vijay / root private dashboard status could not be determined');
|
||||||
|
expect(snapshot.warnings).not.toContain('Vijay / root private dashboard is not active');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses runuser --user for Bheem and falls back to ps when it cannot run', async () => {
|
||||||
|
setExec((command, args) => {
|
||||||
|
// runuser probe unavailable in this environment.
|
||||||
|
if (command === 'runuser') return enoentError();
|
||||||
|
// Legacy ps fallback shows uma's gateway process running.
|
||||||
|
if (command === 'ps' && args[0] === '-eo') {
|
||||||
|
return { stdout: 'uma /usr/bin/python -m hermes_cli.main gateway\nroot /usr/sbin/sshd\n' };
|
||||||
|
}
|
||||||
|
return healthyHandler()(command, args);
|
||||||
|
});
|
||||||
|
|
||||||
|
const snapshot = await getHermesOpsSnapshot({ force: true });
|
||||||
|
const bheem = snapshot.instances.find((i) => i.id === 'bheem')!;
|
||||||
|
expect(bheem.gateway.active).toBe(true);
|
||||||
|
expect(bheem.gateway.status).toBe('up');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reports unknown repo status when git cannot be read', async () => {
|
||||||
|
setExec((command, args) => {
|
||||||
|
if (command === 'git') return enoentError();
|
||||||
|
return healthyHandler()(command, args);
|
||||||
|
});
|
||||||
|
|
||||||
|
const snapshot = await getHermesOpsSnapshot({ force: true });
|
||||||
|
const vijay = snapshot.instances.find((i) => i.id === 'vijay')!;
|
||||||
|
expect(vijay.backup.repo.status).toBe('unknown');
|
||||||
|
expect(vijay.backup.repo.head).toBeNull();
|
||||||
|
expect(snapshot.warnings).toContain('Vijay / root backup repo HEAD could not be read');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('serves a cached snapshot within the TTL without re-probing', async () => {
|
||||||
|
setExec(healthyHandler());
|
||||||
|
|
||||||
|
const first = await getHermesOpsSnapshot();
|
||||||
|
const callsAfterFirst = execFileMock.mock.calls.length;
|
||||||
|
expect(callsAfterFirst).toBeGreaterThan(0);
|
||||||
|
expect(first.cached).toBe(false);
|
||||||
|
|
||||||
|
const second = await getHermesOpsSnapshot();
|
||||||
|
expect(second.cached).toBe(true);
|
||||||
|
// No additional subprocesses were spawned for the cached read.
|
||||||
|
expect(execFileMock.mock.calls.length).toBe(callsAfterFirst);
|
||||||
|
|
||||||
|
// force: true bypasses the cache and re-probes.
|
||||||
|
const third = await getHermesOpsSnapshot({ force: true });
|
||||||
|
expect(third.cached).toBe(false);
|
||||||
|
expect(execFileMock.mock.calls.length).toBeGreaterThan(callsAfterFirst);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('coalesces concurrent requests onto one computation', async () => {
|
||||||
|
setExec(healthyHandler());
|
||||||
|
const [a, b] = await Promise.all([getHermesOpsSnapshot(), getHermesOpsSnapshot()]);
|
||||||
|
expect(a.generatedAt).toBe(b.generatedAt);
|
||||||
|
});
|
||||||
|
});
|
||||||
@ -2,10 +2,24 @@ import { execFile } from 'child_process';
|
|||||||
import { promisify } from 'util';
|
import { promisify } from 'util';
|
||||||
import { readFile, stat } from 'fs/promises';
|
import { readFile, stat } from 'fs/promises';
|
||||||
import { existsSync } from 'fs';
|
import { existsSync } from 'fs';
|
||||||
import type { HermesOpsCronJob, HermesOpsInstance, HermesOpsRepo, HermesOpsSnapshot, HermesOpsTimer } from './types.js';
|
import type {
|
||||||
|
HermesOpsCronJob,
|
||||||
|
HermesOpsInstance,
|
||||||
|
HermesOpsRepo,
|
||||||
|
HermesOpsSnapshot,
|
||||||
|
HermesOpsTimer,
|
||||||
|
ProbeStatus,
|
||||||
|
} from './types.js';
|
||||||
|
|
||||||
const execFileAsync = promisify(execFile);
|
const execFileAsync = promisify(execFile);
|
||||||
|
|
||||||
|
// Serve the snapshot from a short-TTL cache so the panel poll (~60s) and any
|
||||||
|
// concurrent refreshes don't fan out ~20 systemctl/git/ps/du subprocesses each
|
||||||
|
// time. Mirrors the health module's caching approach.
|
||||||
|
const CACHE_TTL = 30000; // 30 seconds
|
||||||
|
let cache: { snapshot: HermesOpsSnapshot; at: number } | null = null;
|
||||||
|
let inflight: Promise<HermesOpsSnapshot> | null = null;
|
||||||
|
|
||||||
const instances = [
|
const instances = [
|
||||||
{
|
{
|
||||||
id: 'vijay' as const,
|
id: 'vijay' as const,
|
||||||
@ -33,30 +47,101 @@ const instances = [
|
|||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
async function run(command: string, args: string[], cwd?: string): Promise<string | null> {
|
interface ExecResult {
|
||||||
|
// Trimmed stdout. Present even when the command exited non-zero (e.g.
|
||||||
|
// `systemctl is-active` prints "inactive" and exits 3).
|
||||||
|
stdout: string;
|
||||||
|
// Whether the command actually executed. False only when it could not run at
|
||||||
|
// all — binary missing (ENOENT) or killed by the timeout. A non-zero exit with
|
||||||
|
// output still counts as `ran: true` so callers can read the output.
|
||||||
|
ran: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function exec(command: string, args: string[], cwd?: string): Promise<ExecResult> {
|
||||||
try {
|
try {
|
||||||
const { stdout } = await execFileAsync(command, args, {
|
const { stdout } = await execFileAsync(command, args, {
|
||||||
cwd,
|
cwd,
|
||||||
timeout: 5000,
|
timeout: 5000,
|
||||||
maxBuffer: 1024 * 1024,
|
maxBuffer: 1024 * 1024,
|
||||||
});
|
});
|
||||||
return stdout.trim();
|
return { stdout: stdout.trim(), ran: true };
|
||||||
} catch {
|
} catch (error) {
|
||||||
return null;
|
const err = error as NodeJS.ErrnoException & { stdout?: string; killed?: boolean };
|
||||||
|
// Command could not be spawned, or was killed by the timeout → unknown.
|
||||||
|
if (err?.code === 'ENOENT' || err?.killed) {
|
||||||
|
return { stdout: '', ran: false };
|
||||||
|
}
|
||||||
|
// Ran but exited non-zero; the output is still meaningful.
|
||||||
|
if (typeof err?.stdout === 'string') {
|
||||||
|
return { stdout: err.stdout.trim(), ran: true };
|
||||||
|
}
|
||||||
|
return { stdout: '', ran: false };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function isActive(unit: string): Promise<boolean> {
|
function activeFromResult(result: ExecResult): { active: boolean; status: ProbeStatus } {
|
||||||
return (await run('systemctl', ['is-active', unit])) === 'active';
|
if (!result.ran) return { active: false, status: 'unknown' };
|
||||||
|
const active = result.stdout === 'active';
|
||||||
|
return { active, status: active ? 'up' : 'down' };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function isEnabled(unit: string): Promise<boolean> {
|
async function probeSystemActive(unit: string): Promise<{ active: boolean; status: ProbeStatus }> {
|
||||||
return (await run('systemctl', ['is-enabled', unit])) === 'enabled';
|
return activeFromResult(await exec('systemctl', ['is-active', unit]));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function probeSystemEnabled(unit: string): Promise<boolean> {
|
||||||
|
const result = await exec('systemctl', ['is-enabled', unit]);
|
||||||
|
return result.ran && result.stdout === 'enabled';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bheem's gateway runs under Uma's *user* systemd. Use the authoritative
|
||||||
|
// `systemctl --user` check via `runuser`; if that probe can't run (no root,
|
||||||
|
// no user runtime dir, etc.) fall back to the legacy process-table scan so we
|
||||||
|
// degrade to the previous behaviour rather than reporting a false "down".
|
||||||
|
async function probeUmaGatewayActive(): Promise<{ active: boolean; status: ProbeStatus }> {
|
||||||
|
const userCheck = await exec('runuser', [
|
||||||
|
'-u',
|
||||||
|
'uma',
|
||||||
|
'--',
|
||||||
|
'systemctl',
|
||||||
|
'--user',
|
||||||
|
'is-active',
|
||||||
|
'uma-hermes-gateway.service',
|
||||||
|
]);
|
||||||
|
if (userCheck.ran) {
|
||||||
|
const active = userCheck.stdout === 'active';
|
||||||
|
return { active, status: active ? 'up' : 'down' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const ps = await exec('ps', ['-eo', 'user=,args=']);
|
||||||
|
if (ps.ran) {
|
||||||
|
const active = ps.stdout.split('\n').some((line) => {
|
||||||
|
const trimmed = line.trimStart();
|
||||||
|
return trimmed.startsWith('uma ') && trimmed.includes('hermes_cli.main gateway');
|
||||||
|
});
|
||||||
|
return { active, status: active ? 'up' : 'down' };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { active: false, status: 'unknown' };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function probeUmaGatewayEnabled(): Promise<boolean> {
|
||||||
|
const userCheck = await exec('runuser', [
|
||||||
|
'-u',
|
||||||
|
'uma',
|
||||||
|
'--',
|
||||||
|
'systemctl',
|
||||||
|
'--user',
|
||||||
|
'is-enabled',
|
||||||
|
'uma-hermes-gateway.service',
|
||||||
|
]);
|
||||||
|
if (userCheck.ran) return userCheck.stdout === 'enabled';
|
||||||
|
return existsSync('/home/uma/.config/systemd/user/default.target.wants/uma-hermes-gateway.service');
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getTimer(name: string): Promise<HermesOpsTimer> {
|
async function getTimer(name: string): Promise<HermesOpsTimer> {
|
||||||
const active = await isActive(name);
|
const { active, status } = await probeSystemActive(name);
|
||||||
const show = await run('systemctl', [
|
const show = await exec('systemctl', [
|
||||||
'show',
|
'show',
|
||||||
name,
|
name,
|
||||||
'-p',
|
'-p',
|
||||||
@ -66,7 +151,7 @@ async function getTimer(name: string): Promise<HermesOpsTimer> {
|
|||||||
'--no-pager',
|
'--no-pager',
|
||||||
]);
|
]);
|
||||||
const properties = Object.fromEntries(
|
const properties = Object.fromEntries(
|
||||||
(show ?? '')
|
(show.ran ? show.stdout : '')
|
||||||
.split('\n')
|
.split('\n')
|
||||||
.map((line) => {
|
.map((line) => {
|
||||||
const [key, ...value] = line.split('=');
|
const [key, ...value] = line.split('=');
|
||||||
@ -78,43 +163,38 @@ async function getTimer(name: string): Promise<HermesOpsTimer> {
|
|||||||
return {
|
return {
|
||||||
name,
|
name,
|
||||||
active,
|
active,
|
||||||
|
status,
|
||||||
nextRun: properties.NextElapseUSecRealtime ?? null,
|
nextRun: properties.NextElapseUSecRealtime ?? null,
|
||||||
lastRun: properties.LastTriggerUSec ?? null,
|
lastRun: properties.LastTriggerUSec ?? null,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function isUmaGatewayActive(): Promise<boolean> {
|
|
||||||
const output = await run('ps', ['-eo', 'user=,args=']);
|
|
||||||
return Boolean(
|
|
||||||
output?.split('\n').some((line) => {
|
|
||||||
const trimmed = line.trimStart();
|
|
||||||
return trimmed.startsWith('uma ') && trimmed.includes('hermes_cli.main gateway');
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function isUmaGatewayEnabled(): Promise<boolean> {
|
|
||||||
return existsSync('/home/uma/.config/systemd/user/default.target.wants/uma-hermes-gateway.service');
|
|
||||||
}
|
|
||||||
|
|
||||||
async function getRepo(path: string): Promise<HermesOpsRepo> {
|
async function getRepo(path: string): Promise<HermesOpsRepo> {
|
||||||
const [branch, status, head, lastCommitAt, gitSize, backupSize] = await Promise.all([
|
const [branch, statusOut, head, lastCommitAt, gitSize, backupSize] = await Promise.all([
|
||||||
run('git', ['branch', '--show-current'], path),
|
exec('git', ['branch', '--show-current'], path),
|
||||||
run('git', ['status', '--porcelain'], path),
|
exec('git', ['status', '--porcelain'], path),
|
||||||
run('git', ['rev-parse', '--short', 'HEAD'], path),
|
exec('git', ['rev-parse', '--short', 'HEAD'], path),
|
||||||
run('git', ['log', '-1', '--format=%cI'], path),
|
exec('git', ['log', '-1', '--format=%cI'], path),
|
||||||
run('du', ['-sh', '.git'], path),
|
exec('du', ['-sh', '.git'], path),
|
||||||
run('du', ['-sh', 'hermes_persistent_backup'], path),
|
exec('du', ['-sh', 'hermes_persistent_backup'], path),
|
||||||
]);
|
]);
|
||||||
const size = [gitSize, backupSize].filter(Boolean).join(' / ');
|
const size = [gitSize, backupSize]
|
||||||
|
.filter((r) => r.ran && r.stdout)
|
||||||
|
.map((r) => r.stdout)
|
||||||
|
.join(' / ');
|
||||||
|
|
||||||
|
// HEAD readable ⇒ the repo could be inspected; otherwise we can't tell.
|
||||||
|
const status: ProbeStatus = head.ran ? 'up' : 'unknown';
|
||||||
|
|
||||||
return {
|
return {
|
||||||
path,
|
path,
|
||||||
branch: branch || null,
|
branch: branch.ran ? branch.stdout || null : null,
|
||||||
clean: status === '',
|
// `clean` only means something when we could actually read status.
|
||||||
head: head || null,
|
clean: statusOut.ran ? statusOut.stdout === '' : false,
|
||||||
lastCommitAt: lastCommitAt || null,
|
head: head.ran ? head.stdout || null : null,
|
||||||
|
lastCommitAt: lastCommitAt.ran ? lastCommitAt.stdout || null : null,
|
||||||
size: size ? size.replace(/\n/g, ' / ') : null,
|
size: size ? size.replace(/\n/g, ' / ') : null,
|
||||||
|
status,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,20 +224,21 @@ async function tokenExists(path: string): Promise<boolean> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function getTailscaleIp(): Promise<string | null> {
|
async function getTailscaleIp(): Promise<string | null> {
|
||||||
const output = await run('tailscale', ['ip', '-4']);
|
const result = await exec('tailscale', ['ip', '-4']);
|
||||||
return output?.split('\n')[0] || null;
|
if (!result.ran) return null;
|
||||||
|
return result.stdout.split('\n')[0] || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getActiveHermesSessionCount(): Promise<number> {
|
async function getActiveHermesSessionCount(): Promise<number> {
|
||||||
const output = await run('ps', ['-ef']);
|
const result = await exec('ps', ['-ef']);
|
||||||
if (!output) return 0;
|
if (!result.ran) return 0;
|
||||||
return output
|
return result.stdout
|
||||||
.split('\n')
|
.split('\n')
|
||||||
.filter((line) => line.includes('hermes_cli.main') && !line.includes('gateway') && !line.includes('grep'))
|
.filter((line) => line.includes('hermes_cli.main') && !line.includes('gateway') && !line.includes('grep'))
|
||||||
.length;
|
.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getHermesOpsSnapshot(): Promise<HermesOpsSnapshot> {
|
async function buildSnapshot(): Promise<HermesOpsSnapshot> {
|
||||||
const tailscaleIp = await getTailscaleIp();
|
const tailscaleIp = await getTailscaleIp();
|
||||||
const warnings: string[] = [];
|
const warnings: string[] = [];
|
||||||
const emergencyDriveUpload = await getTimer('hermes-emergency-drive-upload.timer');
|
const emergencyDriveUpload = await getTimer('hermes-emergency-drive-upload.timer');
|
||||||
@ -166,13 +247,13 @@ export async function getHermesOpsSnapshot(): Promise<HermesOpsSnapshot> {
|
|||||||
const results: HermesOpsInstance[] = [];
|
const results: HermesOpsInstance[] = [];
|
||||||
for (const item of instances) {
|
for (const item of instances) {
|
||||||
const gatewayActiveCheck =
|
const gatewayActiveCheck =
|
||||||
item.gatewayKind === 'uma-user' ? isUmaGatewayActive() : isActive(item.gatewayService);
|
item.gatewayKind === 'uma-user' ? probeUmaGatewayActive() : probeSystemActive(item.gatewayService);
|
||||||
const gatewayEnabledCheck =
|
const gatewayEnabledCheck =
|
||||||
item.gatewayKind === 'uma-user' ? isUmaGatewayEnabled() : isEnabled(item.gatewayService);
|
item.gatewayKind === 'uma-user' ? probeUmaGatewayEnabled() : probeSystemEnabled(item.gatewayService);
|
||||||
const [gatewayActive, gatewayEnabled, dashboardActive, backupTimer, repo, stats, googleToken] = await Promise.all([
|
const [gateway, gatewayEnabled, dashboard, backupTimer, repo, stats, googleToken] = await Promise.all([
|
||||||
gatewayActiveCheck,
|
gatewayActiveCheck,
|
||||||
gatewayEnabledCheck,
|
gatewayEnabledCheck,
|
||||||
isActive(item.dashboardService),
|
probeSystemActive(item.dashboardService),
|
||||||
getTimer(item.backupTimer),
|
getTimer(item.backupTimer),
|
||||||
getRepo(item.repoPath),
|
getRepo(item.repoPath),
|
||||||
manifestStats(`${item.repoPath}/hermes_persistent_backup`),
|
manifestStats(`${item.repoPath}/hermes_persistent_backup`),
|
||||||
@ -180,12 +261,22 @@ export async function getHermesOpsSnapshot(): Promise<HermesOpsSnapshot> {
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
const dashboardUrl = tailscaleIp ? `http://${tailscaleIp}:${item.dashboardPort}/` : `:${item.dashboardPort}`;
|
const dashboardUrl = tailscaleIp ? `http://${tailscaleIp}:${item.dashboardPort}/` : `:${item.dashboardPort}`;
|
||||||
if (!gatewayActive) warnings.push(`${item.label} gateway is not active`);
|
|
||||||
if (!gatewayEnabled) warnings.push(`${item.label} gateway auto-start is not enabled`);
|
if (gateway.status === 'down') warnings.push(`${item.label} gateway is not active`);
|
||||||
if (!dashboardActive) warnings.push(`${item.label} private dashboard is not active`);
|
else if (gateway.status === 'unknown') warnings.push(`${item.label} gateway status could not be determined`);
|
||||||
if (!backupTimer.active) warnings.push(`${item.label} backup timer is not active`);
|
if (gateway.status !== 'unknown' && !gatewayEnabled) {
|
||||||
if (!repo.head) warnings.push(`${item.label} backup repo HEAD could not be read`);
|
warnings.push(`${item.label} gateway auto-start is not enabled`);
|
||||||
if (!repo.clean) warnings.push(`${item.label} backup repo has uncommitted changes`);
|
}
|
||||||
|
if (dashboard.status === 'down') warnings.push(`${item.label} private dashboard is not active`);
|
||||||
|
else if (dashboard.status === 'unknown') {
|
||||||
|
warnings.push(`${item.label} private dashboard status could not be determined`);
|
||||||
|
}
|
||||||
|
if (backupTimer.status === 'down') warnings.push(`${item.label} backup timer is not active`);
|
||||||
|
else if (backupTimer.status === 'unknown') {
|
||||||
|
warnings.push(`${item.label} backup timer status could not be determined`);
|
||||||
|
}
|
||||||
|
if (repo.status === 'unknown') warnings.push(`${item.label} backup repo HEAD could not be read`);
|
||||||
|
else if (!repo.clean) warnings.push(`${item.label} backup repo has uncommitted changes`);
|
||||||
if (!googleToken) warnings.push(`${item.label} Google Workspace token is missing`);
|
if (!googleToken) warnings.push(`${item.label} Google Workspace token is missing`);
|
||||||
|
|
||||||
results.push({
|
results.push({
|
||||||
@ -194,13 +285,15 @@ export async function getHermesOpsSnapshot(): Promise<HermesOpsSnapshot> {
|
|||||||
hermesHome: item.hermesHome,
|
hermesHome: item.hermesHome,
|
||||||
gateway: {
|
gateway: {
|
||||||
service: item.gatewayService,
|
service: item.gatewayService,
|
||||||
active: gatewayActive,
|
active: gateway.active,
|
||||||
enabled: gatewayEnabled,
|
enabled: gatewayEnabled,
|
||||||
|
status: gateway.status,
|
||||||
},
|
},
|
||||||
dashboard: {
|
dashboard: {
|
||||||
service: item.dashboardService,
|
service: item.dashboardService,
|
||||||
active: dashboardActive,
|
active: dashboard.active,
|
||||||
url: dashboardUrl,
|
url: dashboardUrl,
|
||||||
|
status: dashboard.status,
|
||||||
},
|
},
|
||||||
backup: {
|
backup: {
|
||||||
timer: backupTimer,
|
timer: backupTimer,
|
||||||
@ -215,7 +308,10 @@ export async function getHermesOpsSnapshot(): Promise<HermesOpsSnapshot> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!emergencyDriveUpload.active) warnings.push('Emergency Google Drive upload timer is not active');
|
if (emergencyDriveUpload.status === 'down') warnings.push('Emergency Google Drive upload timer is not active');
|
||||||
|
else if (emergencyDriveUpload.status === 'unknown') {
|
||||||
|
warnings.push('Emergency Google Drive upload timer status could not be determined');
|
||||||
|
}
|
||||||
if (!existsSync('/root/.config/hermes-google-drive/user-token.json')) {
|
if (!existsSync('/root/.config/hermes-google-drive/user-token.json')) {
|
||||||
warnings.push('Emergency Drive OAuth token is missing');
|
warnings.push('Emergency Drive OAuth token is missing');
|
||||||
}
|
}
|
||||||
@ -225,6 +321,7 @@ export async function getHermesOpsSnapshot(): Promise<HermesOpsSnapshot> {
|
|||||||
name: emergencyDriveUpload.name,
|
name: emergencyDriveUpload.name,
|
||||||
label: 'Emergency Drive upload',
|
label: 'Emergency Drive upload',
|
||||||
active: emergencyDriveUpload.active,
|
active: emergencyDriveUpload.active,
|
||||||
|
status: emergencyDriveUpload.status,
|
||||||
nextRun: emergencyDriveUpload.nextRun,
|
nextRun: emergencyDriveUpload.nextRun,
|
||||||
lastRun: emergencyDriveUpload.lastRun,
|
lastRun: emergencyDriveUpload.lastRun,
|
||||||
},
|
},
|
||||||
@ -232,18 +329,21 @@ export async function getHermesOpsSnapshot(): Promise<HermesOpsSnapshot> {
|
|||||||
name: instance.backup.timer.name,
|
name: instance.backup.timer.name,
|
||||||
label: `${instance.label} backup`,
|
label: `${instance.label} backup`,
|
||||||
active: instance.backup.timer.active,
|
active: instance.backup.timer.active,
|
||||||
|
status: instance.backup.timer.status,
|
||||||
nextRun: instance.backup.timer.nextRun,
|
nextRun: instance.backup.timer.nextRun,
|
||||||
lastRun: instance.backup.timer.lastRun,
|
lastRun: instance.backup.timer.lastRun,
|
||||||
})),
|
})),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
const now = new Date().toISOString();
|
||||||
return {
|
return {
|
||||||
generatedAt: new Date().toISOString(),
|
generatedAt: now,
|
||||||
|
cached: false,
|
||||||
tailscaleIp,
|
tailscaleIp,
|
||||||
emergencyDriveUpload,
|
emergencyDriveUpload,
|
||||||
activeSessions: {
|
activeSessions: {
|
||||||
active: activeSessions,
|
active: activeSessions,
|
||||||
updatedAt: new Date().toISOString(),
|
updatedAt: now,
|
||||||
},
|
},
|
||||||
cronJobs,
|
cronJobs,
|
||||||
recentAlerts: warnings.slice(0, 6),
|
recentAlerts: warnings.slice(0, 6),
|
||||||
@ -268,3 +368,32 @@ export async function getHermesOpsSnapshot(): Promise<HermesOpsSnapshot> {
|
|||||||
warnings,
|
warnings,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function getHermesOpsSnapshot(options?: { force?: boolean }): Promise<HermesOpsSnapshot> {
|
||||||
|
const force = options?.force ?? false;
|
||||||
|
|
||||||
|
if (!force && cache && Date.now() - cache.at < CACHE_TTL) {
|
||||||
|
return { ...cache.snapshot, cached: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Coalesce concurrent requests onto a single in-flight computation.
|
||||||
|
if (!force && inflight) return inflight;
|
||||||
|
|
||||||
|
const promise = buildSnapshot()
|
||||||
|
.then((snapshot) => {
|
||||||
|
cache = { snapshot, at: Date.now() };
|
||||||
|
return snapshot;
|
||||||
|
})
|
||||||
|
.finally(() => {
|
||||||
|
if (inflight === promise) inflight = null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!force) inflight = promise;
|
||||||
|
return promise;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test hook: reset the module-level cache between cases.
|
||||||
|
export function clearHermesOpsCache(): void {
|
||||||
|
cache = null;
|
||||||
|
inflight = null;
|
||||||
|
}
|
||||||
|
|||||||
@ -1,10 +1,15 @@
|
|||||||
import type { FastifyInstance } from 'fastify';
|
import type { FastifyInstance } from 'fastify';
|
||||||
import { getHermesOpsSnapshot } from './repository.js';
|
import { getHermesOpsSnapshot } from './repository.js';
|
||||||
|
import { HermesOpsSnapshotSchema } from './types.js';
|
||||||
|
|
||||||
export async function hermesOpsRoutes(fastify: FastifyInstance) {
|
export async function hermesOpsRoutes(fastify: FastifyInstance) {
|
||||||
fastify.get('/hermes/ops', async (req, reply) => {
|
fastify.get('/hermes/ops', async (req, reply) => {
|
||||||
try {
|
try {
|
||||||
return reply.send(await getHermesOpsSnapshot());
|
const snapshot = await getHermesOpsSnapshot();
|
||||||
|
// Validate our own output against the stable contract before sending, so a
|
||||||
|
// shape regression surfaces as a 500 here rather than corrupt UI state.
|
||||||
|
const validated = HermesOpsSnapshotSchema.parse(snapshot);
|
||||||
|
return reply.send(validated);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
fastify.log.error(error, 'Failed to get Hermes operations snapshot');
|
fastify.log.error(error, 'Failed to get Hermes operations snapshot');
|
||||||
return reply.code(500).send({ error: 'Failed to get Hermes operations snapshot' });
|
return reply.code(500).send({ error: 'Failed to get Hermes operations snapshot' });
|
||||||
|
|||||||
@ -1,74 +1,102 @@
|
|||||||
export interface HermesOpsTimer {
|
import { z } from 'zod';
|
||||||
name: string;
|
|
||||||
active: boolean;
|
|
||||||
nextRun: string | null;
|
|
||||||
lastRun: string | null;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface HermesOpsRepo {
|
// A probed value is `up` (confirmed healthy), `down` (confirmed unhealthy/inactive),
|
||||||
path: string;
|
// or `unknown` (the probe itself could not run — command missing, timed out, or no
|
||||||
branch: string | null;
|
// permission). This lets the UI distinguish "definitely down" from "couldn't tell".
|
||||||
clean: boolean;
|
export const ProbeStatusSchema = z.enum(['up', 'down', 'unknown']);
|
||||||
head: string | null;
|
export type ProbeStatus = z.infer<typeof ProbeStatusSchema>;
|
||||||
lastCommitAt: string | null;
|
|
||||||
size: string | null;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface HermesOpsGoogle {
|
export const HermesOpsTimerSchema = z.object({
|
||||||
workspaceToken: boolean;
|
name: z.string(),
|
||||||
driveFolder: string;
|
active: z.boolean(),
|
||||||
}
|
status: ProbeStatusSchema,
|
||||||
|
nextRun: z.string().nullable(),
|
||||||
|
lastRun: z.string().nullable(),
|
||||||
|
});
|
||||||
|
export type HermesOpsTimer = z.infer<typeof HermesOpsTimerSchema>;
|
||||||
|
|
||||||
export interface HermesOpsInstance {
|
export const HermesOpsRepoSchema = z.object({
|
||||||
id: 'vijay' | 'bheem';
|
path: z.string(),
|
||||||
label: string;
|
branch: z.string().nullable(),
|
||||||
hermesHome: string;
|
clean: z.boolean(),
|
||||||
gateway: {
|
head: z.string().nullable(),
|
||||||
service: string;
|
lastCommitAt: z.string().nullable(),
|
||||||
active: boolean;
|
size: z.string().nullable(),
|
||||||
enabled: boolean;
|
// `up` = HEAD was readable; `unknown` = git could not be read (path/permission).
|
||||||
};
|
status: ProbeStatusSchema,
|
||||||
dashboard: {
|
});
|
||||||
service: string;
|
export type HermesOpsRepo = z.infer<typeof HermesOpsRepoSchema>;
|
||||||
active: boolean;
|
|
||||||
url: string;
|
|
||||||
};
|
|
||||||
backup: {
|
|
||||||
timer: HermesOpsTimer;
|
|
||||||
repo: HermesOpsRepo;
|
|
||||||
restoredFileCount: number | null;
|
|
||||||
restoredCronJobs: number | null;
|
|
||||||
};
|
|
||||||
google: HermesOpsGoogle;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface HermesOpsSessionSummary {
|
export const HermesOpsGoogleSchema = z.object({
|
||||||
active: number;
|
workspaceToken: z.boolean(),
|
||||||
updatedAt: string | null;
|
driveFolder: z.string(),
|
||||||
}
|
});
|
||||||
|
export type HermesOpsGoogle = z.infer<typeof HermesOpsGoogleSchema>;
|
||||||
|
|
||||||
export interface HermesOpsCronJob {
|
export const HermesOpsGatewaySchema = z.object({
|
||||||
name: string;
|
service: z.string(),
|
||||||
label: string;
|
active: z.boolean(),
|
||||||
active: boolean;
|
enabled: z.boolean(),
|
||||||
nextRun: string | null;
|
status: ProbeStatusSchema,
|
||||||
lastRun: string | null;
|
});
|
||||||
}
|
|
||||||
|
|
||||||
export interface HermesOpsLink {
|
export const HermesOpsDashboardSchema = z.object({
|
||||||
label: string;
|
service: z.string(),
|
||||||
href: string;
|
active: z.boolean(),
|
||||||
description: string;
|
url: z.string(),
|
||||||
}
|
status: ProbeStatusSchema,
|
||||||
|
});
|
||||||
|
|
||||||
export interface HermesOpsSnapshot {
|
export const HermesOpsInstanceSchema = z.object({
|
||||||
generatedAt: string;
|
id: z.enum(['vijay', 'bheem']),
|
||||||
tailscaleIp: string | null;
|
label: z.string(),
|
||||||
emergencyDriveUpload: HermesOpsTimer;
|
hermesHome: z.string(),
|
||||||
activeSessions: HermesOpsSessionSummary;
|
gateway: HermesOpsGatewaySchema,
|
||||||
cronJobs: HermesOpsCronJob[];
|
dashboard: HermesOpsDashboardSchema,
|
||||||
recentAlerts: string[];
|
backup: z.object({
|
||||||
quickLinks: HermesOpsLink[];
|
timer: HermesOpsTimerSchema,
|
||||||
instances: HermesOpsInstance[];
|
repo: HermesOpsRepoSchema,
|
||||||
warnings: string[];
|
restoredFileCount: z.number().nullable(),
|
||||||
}
|
restoredCronJobs: z.number().nullable(),
|
||||||
|
}),
|
||||||
|
google: HermesOpsGoogleSchema,
|
||||||
|
});
|
||||||
|
export type HermesOpsInstance = z.infer<typeof HermesOpsInstanceSchema>;
|
||||||
|
|
||||||
|
export const HermesOpsSessionSummarySchema = z.object({
|
||||||
|
active: z.number(),
|
||||||
|
updatedAt: z.string().nullable(),
|
||||||
|
});
|
||||||
|
export type HermesOpsSessionSummary = z.infer<typeof HermesOpsSessionSummarySchema>;
|
||||||
|
|
||||||
|
export const HermesOpsCronJobSchema = z.object({
|
||||||
|
name: z.string(),
|
||||||
|
label: z.string(),
|
||||||
|
active: z.boolean(),
|
||||||
|
status: ProbeStatusSchema,
|
||||||
|
nextRun: z.string().nullable(),
|
||||||
|
lastRun: z.string().nullable(),
|
||||||
|
});
|
||||||
|
export type HermesOpsCronJob = z.infer<typeof HermesOpsCronJobSchema>;
|
||||||
|
|
||||||
|
export const HermesOpsLinkSchema = z.object({
|
||||||
|
label: z.string(),
|
||||||
|
href: z.string(),
|
||||||
|
description: z.string(),
|
||||||
|
});
|
||||||
|
export type HermesOpsLink = z.infer<typeof HermesOpsLinkSchema>;
|
||||||
|
|
||||||
|
export const HermesOpsSnapshotSchema = z.object({
|
||||||
|
generatedAt: z.string(),
|
||||||
|
// True when this payload was served from the short-TTL cache rather than freshly probed.
|
||||||
|
cached: z.boolean(),
|
||||||
|
tailscaleIp: z.string().nullable(),
|
||||||
|
emergencyDriveUpload: HermesOpsTimerSchema,
|
||||||
|
activeSessions: HermesOpsSessionSummarySchema,
|
||||||
|
cronJobs: z.array(HermesOpsCronJobSchema),
|
||||||
|
recentAlerts: z.array(z.string()),
|
||||||
|
quickLinks: z.array(HermesOpsLinkSchema),
|
||||||
|
instances: z.array(HermesOpsInstanceSchema),
|
||||||
|
warnings: z.array(z.string()),
|
||||||
|
});
|
||||||
|
export type HermesOpsSnapshot = z.infer<typeof HermesOpsSnapshotSchema>;
|
||||||
|
|||||||
@ -76,13 +76,13 @@ A single private dashboard where, for **both Vijay and Bheem**, S can see at a g
|
|||||||
|
|
||||||
The `hermes-ops` snapshot becomes the single source of truth for live status. Before building UI on it, harden it.
|
The `hermes-ops` snapshot becomes the single source of truth for live status. Before building UI on it, harden it.
|
||||||
|
|
||||||
- [ ] Add a short-TTL cache (mirror the health module's 30s cache) so the 60s panel poll doesn't fan out ~20 `systemctl`/`git`/`ps`/`du` subprocesses every refresh; serve cached snapshot with `generatedAt`.
|
- [x] Add a short-TTL cache (mirror the health module's 30s cache) so the 60s panel poll doesn't fan out ~20 `systemctl`/`git`/`ps`/`du` subprocesses every refresh; serve cached snapshot with `generatedAt`.
|
||||||
- [ ] Replace brittle Bheem/Uma checks in `repository.ts`:
|
- [x] Replace brittle Bheem/Uma checks in `repository.ts` *(runuser `systemctl --user` with ps/existsSync fallback so a failed probe degrades to the legacy check, not a false "down")*:
|
||||||
- `isUmaGatewayActive()` (currently `ps -eo` string match) → `runuser -u uma -- systemctl --user is-active uma-hermes-gateway.service` (or `--machine=uma@.host`).
|
- `isUmaGatewayActive()` (currently `ps -eo` string match) → `runuser -u uma -- systemctl --user is-active uma-hermes-gateway.service` (or `--machine=uma@.host`).
|
||||||
- `isUmaGatewayEnabled()` (currently hardcoded `existsSync` of a wants-symlink) → `systemctl --user is-enabled` via the same path.
|
- `isUmaGatewayEnabled()` (currently hardcoded `existsSync` of a wants-symlink) → `systemctl --user is-enabled` via the same path.
|
||||||
- [ ] Stop swallowing every failure to `null` indiscriminately: distinguish "unit inactive" from "probe failed/timed out" and surface per-field status so the UI can show *unknown* vs *down*.
|
- [x] Stop swallowing every failure to `null` indiscriminately: distinguish "unit inactive" from "probe failed/timed out" and surface per-field status so the UI can show *unknown* vs *down*.
|
||||||
- [ ] Add Zod validation + a stable typed contract for `HermesOpsSnapshot` on the route.
|
- [x] Add Zod validation + a stable typed contract for `HermesOpsSnapshot` on the route.
|
||||||
- [ ] **Add unit tests for the `hermes-ops` repository** (mock `execFile`/fs) — closes the REVIEW_ACTIONS "only `services` has tests" gap for this module.
|
- [x] **Add unit tests for the `hermes-ops` repository** (mock `execFile`/fs) — closes the REVIEW_ACTIONS "only `services` has tests" gap for this module.
|
||||||
- [ ] Read Bheem/Uma state via a **self-reporting ops exporter** (Decision #2): a read-only `uma` user-systemd timer writes a sanitized JSON snapshot to a known path; the root backend reads + aggregates it (Vijay gets a symmetric exporter). Interim stopgap until it ships: `runuser -u uma -- systemctl --user is-active/is-enabled` instead of the `ps`/`existsSync` checks.
|
- [ ] Read Bheem/Uma state via a **self-reporting ops exporter** (Decision #2): a read-only `uma` user-systemd timer writes a sanitized JSON snapshot to a known path; the root backend reads + aggregates it (Vijay gets a symmetric exporter). Interim stopgap until it ships: `runuser -u uma -- systemctl --user is-active/is-enabled` instead of the `ps`/`existsSync` checks.
|
||||||
|
|
||||||
## Phase 2 — Instance dimension across Mission Control (G2)
|
## Phase 2 — Instance dimension across Mission Control (G2)
|
||||||
@ -120,8 +120,8 @@ This is the biggest operational asymmetry and the reason half the ops-panel warn
|
|||||||
|
|
||||||
## Phase 5 — Dashboard app hardening (G5)
|
## Phase 5 — Dashboard app hardening (G5)
|
||||||
|
|
||||||
- [ ] **P0:** Fix the CI workspace path (`${{ gitea.workspace }}`) in `.gitea/workflows/ci.yml`, `DEPLOYMENT.md`, `scripts/deploy-hotcopy.sh` (currently point at non-existent `/opt/bytelyst/bytelyst-devops-tools/...`).
|
- [x] **P0:** Fix the CI workspace path (`${{ gitea.workspace }}`) in `.gitea/workflows/ci.yml`, `DEPLOYMENT.md`, `scripts/deploy-hotcopy.sh` (currently point at non-existent `/opt/bytelyst/bytelyst-devops-tools/...`).
|
||||||
- [ ] **P0:** Replace the no-op `lint` echo with real linting (`next lint` for web, minimal ESLint for backend); make `pnpm lint` fail on bad code.
|
- [x] **P0:** Replace the no-op `lint` echo with real linting (`next lint` for web, minimal ESLint for backend); make `pnpm lint` fail on bad code.
|
||||||
- [ ] **P1:** Add tests for `auth`, `csrf`, `deployments/orchestrator`, `health`, **and `hermes-ops`**; add `pnpm test:coverage` gate.
|
- [ ] **P1:** Add tests for `auth`, `csrf`, `deployments/orchestrator`, `health`, **and `hermes-ops`**; add `pnpm test:coverage` gate.
|
||||||
- [ ] **P1:** Resolve the SSE TODO — either ship a Fastify-5-compatible log-stream or remove the SSE claim from docs/UI.
|
- [ ] **P1:** Resolve the SSE TODO — either ship a Fastify-5-compatible log-stream or remove the SSE claim from docs/UI.
|
||||||
- [ ] **P1:** Fix doc drift (web port 3000 vs 3049; endpoint URLs; merge duplicate deployment docs).
|
- [ ] **P1:** Fix doc drift (web port 3000 vs 3049; endpoint URLs; merge duplicate deployment docs).
|
||||||
@ -185,11 +185,11 @@ This roadmap is complete when:
|
|||||||
Update only with evidence (source review, tests, build output, or browser/VM verification).
|
Update only with evidence (source review, tests, build output, or browser/VM verification).
|
||||||
|
|
||||||
- [ ] Phase 0 — Guardrails reconfirmed
|
- [ ] Phase 0 — Guardrails reconfirmed
|
||||||
- [ ] Phase 1 — `hermes-ops` hardened + tested
|
- [x] Phase 1 — `hermes-ops` hardened + tested
|
||||||
- [ ] Phase 2 — Instance dimension + switcher
|
- [ ] Phase 2 — Instance dimension + switcher
|
||||||
- [ ] Phase 3 — Real telemetry ingestion + panes converted
|
- [ ] Phase 3 — Real telemetry ingestion + panes converted
|
||||||
- [ ] Phase 4 — Bheem/Uma parity (backup, watchdog, restore drill)
|
- [ ] Phase 4 — Bheem/Uma parity (backup, watchdog, restore drill)
|
||||||
- [ ] Phase 5 — App/CI hardening (P0 → P2)
|
- [ ] Phase 5 — App/CI hardening (P0 done; P1/P2 pending)
|
||||||
- [ ] Phase 6 — UX polish
|
- [ ] Phase 6 — UX polish
|
||||||
- [ ] Phase 7 — Security & access
|
- [ ] Phase 7 — Security & access
|
||||||
- [ ] Phase 8 — Notifications & Telegram
|
- [ ] Phase 8 — Notifications & Telegram
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user