Compare commits
3 Commits
9210a8890f
...
5a2d92f519
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5a2d92f519 | ||
| e2db92f3b1 | |||
| 8f522e3505 |
@ -1,8 +1,8 @@
|
||||
'use client';
|
||||
|
||||
import { useEffect, useMemo, useState } from 'react';
|
||||
import { useEffect, useMemo, useRef, useState } from 'react';
|
||||
import Link from 'next/link';
|
||||
import { AlertTriangle, CheckCircle2, Cloud, DatabaseBackup, ExternalLink, Gauge, HardDrive, RefreshCw, ShieldCheck, Timer, Wifi, Activity, CalendarClock, Link2 } from 'lucide-react';
|
||||
import { AlertTriangle, CheckCircle2, Cloud, Copy, DatabaseBackup, ExternalLink, Gauge, HardDrive, RefreshCw, ShieldCheck, Timer, Wifi, Activity, CalendarClock, Link2 } from 'lucide-react';
|
||||
import { Badge, Button } from '@/components/ui/Primitives';
|
||||
import { SectionCard } from '@/components/hermes-shell';
|
||||
import { api, type HermesOpsInstance, type HermesOpsSnapshot } from '@/lib/api';
|
||||
@ -94,6 +94,10 @@ function InstanceCard({ instance }: { instance: HermesOpsInstance }) {
|
||||
Open dashboard <ExternalLink className="ml-2 h-4 w-4" />
|
||||
</a>
|
||||
</Button>
|
||||
<Button variant="ghost" size="sm" onClick={() => void navigator.clipboard.writeText(instance.dashboard.url)}>
|
||||
<Copy className="mr-2 h-4 w-4" />
|
||||
Copy URL
|
||||
</Button>
|
||||
</div>
|
||||
</article>
|
||||
);
|
||||
@ -101,14 +105,19 @@ function InstanceCard({ instance }: { instance: HermesOpsInstance }) {
|
||||
|
||||
export function HermesOpsPanel() {
|
||||
const [snapshot, setSnapshot] = useState<HermesOpsSnapshot | null>(null);
|
||||
const [previousSnapshot, setPreviousSnapshot] = useState<HermesOpsSnapshot | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const latestSnapshotRef = useRef<HermesOpsSnapshot | null>(null);
|
||||
|
||||
const load = async () => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
try {
|
||||
setSnapshot(await api.getHermesOps());
|
||||
const nextSnapshot = await api.getHermesOps();
|
||||
setPreviousSnapshot(latestSnapshotRef.current);
|
||||
latestSnapshotRef.current = nextSnapshot;
|
||||
setSnapshot(nextSnapshot);
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Unable to load Hermes operations status');
|
||||
} finally {
|
||||
@ -123,6 +132,44 @@ export function HermesOpsPanel() {
|
||||
}, []);
|
||||
|
||||
const allHealthy = useMemo(() => snapshot ? snapshot.warnings.length === 0 : false, [snapshot]);
|
||||
const snapshotDiff = useMemo(() => {
|
||||
if (!snapshot || !previousSnapshot) return null;
|
||||
|
||||
const previousHealthyInstances = previousSnapshot.instances.filter((instance) =>
|
||||
instance.gateway.active &&
|
||||
instance.dashboard.active &&
|
||||
instance.backup.timer.active &&
|
||||
instance.backup.repo.clean &&
|
||||
instance.google.workspaceToken
|
||||
).length;
|
||||
|
||||
const currentHealthyInstances = snapshot.instances.filter((instance) =>
|
||||
instance.gateway.active &&
|
||||
instance.dashboard.active &&
|
||||
instance.backup.timer.active &&
|
||||
instance.backup.repo.clean &&
|
||||
instance.google.workspaceToken
|
||||
).length;
|
||||
|
||||
return {
|
||||
healthyInstances: currentHealthyInstances - previousHealthyInstances,
|
||||
warnings: snapshot.warnings.length - previousSnapshot.warnings.length,
|
||||
activeSessions: snapshot.activeSessions.active - previousSnapshot.activeSessions.active,
|
||||
activeDashboards: snapshot.instances.filter((instance) => instance.dashboard.active).length - previousSnapshot.instances.filter((instance) => instance.dashboard.active).length,
|
||||
activeBackupTimers: snapshot.instances.filter((instance) => instance.backup.timer.active).length - previousSnapshot.instances.filter((instance) => instance.backup.timer.active).length,
|
||||
};
|
||||
}, [previousSnapshot, snapshot]);
|
||||
const healthyInstances = snapshot
|
||||
? snapshot.instances.filter((instance) =>
|
||||
instance.gateway.active &&
|
||||
instance.dashboard.active &&
|
||||
instance.backup.timer.active &&
|
||||
instance.backup.repo.clean &&
|
||||
instance.google.workspaceToken
|
||||
).length
|
||||
: 0;
|
||||
const activeDashboards = snapshot ? snapshot.instances.filter((instance) => instance.dashboard.active).length : 0;
|
||||
const activeBackupTimers = snapshot ? snapshot.instances.filter((instance) => instance.backup.timer.active).length : 0;
|
||||
|
||||
return (
|
||||
<SectionCard
|
||||
@ -146,6 +193,65 @@ export function HermesOpsPanel() {
|
||||
|
||||
{snapshot ? (
|
||||
<div className="space-y-5">
|
||||
{snapshotDiff ? (
|
||||
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
||||
<div className="flex items-center justify-between gap-3">
|
||||
<div>
|
||||
<p className="text-sm font-medium text-[var(--bl-text-primary)]">Since previous refresh</p>
|
||||
<p className="text-xs text-[var(--bl-text-secondary)]">Snapshot movement compared with the last poll.</p>
|
||||
</div>
|
||||
<Badge variant="neutral">Delta view</Badge>
|
||||
</div>
|
||||
<div className="mt-3 grid gap-3 md:grid-cols-5">
|
||||
{[
|
||||
{ label: 'Healthy instances', value: snapshotDiff.healthyInstances },
|
||||
{ label: 'Active dashboards', value: snapshotDiff.activeDashboards },
|
||||
{ label: 'Active backups', value: snapshotDiff.activeBackupTimers },
|
||||
{ label: 'Active sessions', value: snapshotDiff.activeSessions },
|
||||
{ label: 'Warnings', value: snapshotDiff.warnings },
|
||||
].map((item) => (
|
||||
<div key={item.label} className="rounded-xl border border-[var(--bl-border)] bg-[var(--bl-surface-card)] p-3">
|
||||
<p className="text-xs uppercase tracking-[0.2em] text-[var(--bl-text-tertiary)]">{item.label}</p>
|
||||
<p className={`mt-2 text-2xl font-semibold ${item.value > 0 ? 'text-[var(--bl-success)]' : item.value < 0 ? 'text-[var(--bl-danger)]' : 'text-[var(--bl-text-primary)]'}`}>
|
||||
{item.value > 0 ? '+' : ''}{item.value}
|
||||
</p>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
<div className="grid gap-3 md:grid-cols-4">
|
||||
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
||||
<div className="flex items-center gap-2 text-sm text-[var(--bl-text-secondary)]">
|
||||
<ShieldCheck className="h-4 w-4" />
|
||||
Healthy instances
|
||||
</div>
|
||||
<p className="mt-2 text-2xl font-semibold text-[var(--bl-text-primary)]">{healthyInstances}/2</p>
|
||||
</div>
|
||||
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
||||
<div className="flex items-center gap-2 text-sm text-[var(--bl-text-secondary)]">
|
||||
<Activity className="h-4 w-4" />
|
||||
Active dashboards
|
||||
</div>
|
||||
<p className="mt-2 text-2xl font-semibold text-[var(--bl-text-primary)]">{activeDashboards}/2</p>
|
||||
</div>
|
||||
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
||||
<div className="flex items-center gap-2 text-sm text-[var(--bl-text-secondary)]">
|
||||
<CalendarClock className="h-4 w-4" />
|
||||
Active backup timers
|
||||
</div>
|
||||
<p className="mt-2 text-2xl font-semibold text-[var(--bl-text-primary)]">{activeBackupTimers}/2</p>
|
||||
</div>
|
||||
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
||||
<div className="flex items-center gap-2 text-sm text-[var(--bl-text-secondary)]">
|
||||
<AlertTriangle className="h-4 w-4" />
|
||||
Open warnings
|
||||
</div>
|
||||
<p className="mt-2 text-2xl font-semibold text-[var(--bl-text-primary)]">{snapshot.warnings.length}</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid gap-3 md:grid-cols-4">
|
||||
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
||||
<div className="flex items-center gap-2 text-sm text-[var(--bl-text-secondary)]">
|
||||
|
||||
@ -31,7 +31,7 @@ Observed on 2026-05-27:
|
||||
- Private dashboards:
|
||||
- Root: `http://100.87.53.10:9119/`, `hermes-root-dashboard.service`
|
||||
- Uma: `http://100.87.53.10:9120/`, `uma-hermes-dashboard.service`
|
||||
- Live ops panel shows gateway state, active sessions, cron state, backup freshness, sanitized alerts, and runbook links for both instances.
|
||||
- Live ops panel shows gateway state, active sessions, refresh delta, cron state, backup freshness, sanitized alerts, and runbook links for both instances.
|
||||
|
||||
## Safety guardrail: no public Hermes dashboard/API
|
||||
|
||||
|
||||
@ -21,6 +21,16 @@
|
||||
- **Needs manual UX validation:** dashboard feature-by-feature checks, Telegram approval prompt flow, and Telegram media/file delivery.
|
||||
- **Needs future workflow adoption:** practicing `delegate_task`, spawned/tmux sessions, worktrees, and Kanban on real tasks before checking them as completed.
|
||||
|
||||
## Next To-Dos
|
||||
|
||||
The remaining work is now mostly hardening rather than feature delivery:
|
||||
|
||||
- finish the GitHub/Gitea least-privilege audit for the root-managed push path
|
||||
- decide whether `security.redact_secrets` should be enabled by default
|
||||
- document the gateway-session `privacy.redact_pii` policy
|
||||
- rotate any credentials that were migrated or exposed during the setup work
|
||||
- tighten least-privilege token scopes for GitHub/Gitea, web APIs, and provider keys
|
||||
|
||||
## Purpose
|
||||
|
||||
Turn the Hermes setup ideas from the referenced video into a practical ByteLyst upgrade checklist for this VM-backed, Telegram-driven Hermes installation.
|
||||
|
||||
@ -665,6 +665,18 @@ Known roadmap assumptions to handle safely during implementation:
|
||||
|
||||
---
|
||||
|
||||
## Next Dashboard Improvements
|
||||
|
||||
Potential follow-up work for Hermes Mission Control:
|
||||
|
||||
- warning severity filters for the live ops panel
|
||||
- compact trend cards for recent alert volume and backup freshness over several refreshes
|
||||
- task-ledger deep links from the ops panel into the most recent Hermes work
|
||||
- per-instance action row improvements beyond copy-link/open-dashboard, such as open-runbook shortcuts
|
||||
- optional dark/light theme toggle if the broader dashboard shell eventually supports it
|
||||
|
||||
---
|
||||
|
||||
# Git workflow
|
||||
|
||||
Commit incrementally:
|
||||
|
||||
@ -64,7 +64,7 @@ These listeners were bound on `0.0.0.0` and/or `[::]` during review.
|
||||
| `3040` | `flowmonk-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
||||
| `3049` | `devops-web` | `/opt/bytelyst/bytelyst-devops-tools/dashboard/docker-compose.yml` | `devops.bytelyst.com` | `private-admin` with direct bypass | Fix old repo path drift, then bind loopback/private |
|
||||
| `3050` | `mindlyst-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
||||
| `3055` | `nomgap-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
||||
| `3055` | `nomgap-web` | orphan from older `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `retire` | Retired on 2026-05-27; current Compose says Nomgap web is deployed to Vercel |
|
||||
| `3060` | `actiontrail-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
||||
| `3070` | `localmemgpt-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
||||
| `3075` | `llmlab-dashboard` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | `llmlab.bytelyst.com` | `private-admin` with direct bypass | Dashboard unhealthy; gate or retire |
|
||||
@ -113,6 +113,7 @@ These listeners were bound on `0.0.0.0` and/or `[::]` during review.
|
||||
|
||||
## Drift / Follow-Up Findings
|
||||
|
||||
- `nomgap-web` was an orphan from an older Compose revision, had no Caddy route, and was retired on 2026-05-27.
|
||||
- `devops-backend` runs from `/opt/bytelyst/learning_ai_devops_tools/dashboard/docker-compose.yml`.
|
||||
- `devops-web` runs from `/opt/bytelyst/bytelyst-devops-tools/dashboard/docker-compose.yml`, an older path. Align this before changing devops dashboard port bindings.
|
||||
- `gitea-npm-registry` has no Compose labels in Docker inspect output. Find its systemd/compose owner before changing `3300`.
|
||||
|
||||
@ -397,7 +397,7 @@ Effective `sshd -T` settings showed:
|
||||
|
||||
### Phase 2 — Operational correctness
|
||||
|
||||
- [ ] Fix/retire unhealthy containers.
|
||||
- [x] Fix/retire unhealthy containers.
|
||||
- [x] Resolve `hermes-root-backup.service` failed state.
|
||||
- [x] Decide and document Gitea runner active/disabled state.
|
||||
- [x] Add missing-script checks. Stale root cron path was fixed on 2026-05-27.
|
||||
@ -515,6 +515,31 @@ Minimum post-checks for Phase 1:
|
||||
|
||||
- The detector currently covers root crontab and failed systemd units. Full ownership inventory still needs `/etc/cron.d`, user crontabs, Hermes cron, Gitea schedules, owners, outputs, and alert channels.
|
||||
|
||||
### 2026-05-27 — Phase 2 unhealthy containers
|
||||
|
||||
**Changed:**
|
||||
|
||||
- Added `HOSTNAME=0.0.0.0` to six managed Next.js web services in `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml`: `jarvisjr-web`, `flowmonk-web`, `mindlyst-web`, `actiontrail-web`, `localmemgpt-web`, and `llmlab-dashboard`.
|
||||
- Recreated those six services from existing images with `docker compose ... up -d --no-build`.
|
||||
- Retired the orphan `learning_ai_common_plat-nomgap-web-1` container. Current Compose already documents `nomgap-web` as deployed to Vercel and not part of the Docker stack.
|
||||
|
||||
**Verified:**
|
||||
|
||||
- `docker compose -f docker-compose.ecosystem.yml --env-file .env.ecosystem config --quiet` passed.
|
||||
- The six recreated web containers report Docker health `healthy`.
|
||||
- `docker ps --filter health=unhealthy` returns no containers.
|
||||
- Host-level smoke checks returned HTTP `200` for `3035`, `3040`, `3050`, `3060`, `3070`, and `3075`; retired orphan port `3055` is closed.
|
||||
- Host-permission `vm-health-check.sh --json` reports `container_health=OK`, `container_loops=OK`, `failed_units=OK`, and `cron_missing_paths=OK`.
|
||||
|
||||
**Committed/pushed:**
|
||||
|
||||
- `learning_ai_common_plat`: `af035e7d` (`fix: bind ecosystem Next apps on all interfaces`) pushed to GitHub.
|
||||
|
||||
**Residual risk:**
|
||||
|
||||
- Local Gitea mirror push for `learning_ai_common_plat` failed at Git HTTP transport even though fetch and health checks work; retry/fix mirror push separately.
|
||||
- This fixed health state, not public exposure. Several direct published ports remain to be loopback-bound or blocked in Phase 1.
|
||||
|
||||
## Do Not Start With
|
||||
|
||||
- Rootless Docker migration.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user