Compare commits
3 Commits
9210a8890f
...
5a2d92f519
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5a2d92f519 | ||
| e2db92f3b1 | |||
| 8f522e3505 |
@ -1,8 +1,8 @@
|
|||||||
'use client';
|
'use client';
|
||||||
|
|
||||||
import { useEffect, useMemo, useState } from 'react';
|
import { useEffect, useMemo, useRef, useState } from 'react';
|
||||||
import Link from 'next/link';
|
import Link from 'next/link';
|
||||||
import { AlertTriangle, CheckCircle2, Cloud, DatabaseBackup, ExternalLink, Gauge, HardDrive, RefreshCw, ShieldCheck, Timer, Wifi, Activity, CalendarClock, Link2 } from 'lucide-react';
|
import { AlertTriangle, CheckCircle2, Cloud, Copy, DatabaseBackup, ExternalLink, Gauge, HardDrive, RefreshCw, ShieldCheck, Timer, Wifi, Activity, CalendarClock, Link2 } from 'lucide-react';
|
||||||
import { Badge, Button } from '@/components/ui/Primitives';
|
import { Badge, Button } from '@/components/ui/Primitives';
|
||||||
import { SectionCard } from '@/components/hermes-shell';
|
import { SectionCard } from '@/components/hermes-shell';
|
||||||
import { api, type HermesOpsInstance, type HermesOpsSnapshot } from '@/lib/api';
|
import { api, type HermesOpsInstance, type HermesOpsSnapshot } from '@/lib/api';
|
||||||
@ -94,6 +94,10 @@ function InstanceCard({ instance }: { instance: HermesOpsInstance }) {
|
|||||||
Open dashboard <ExternalLink className="ml-2 h-4 w-4" />
|
Open dashboard <ExternalLink className="ml-2 h-4 w-4" />
|
||||||
</a>
|
</a>
|
||||||
</Button>
|
</Button>
|
||||||
|
<Button variant="ghost" size="sm" onClick={() => void navigator.clipboard.writeText(instance.dashboard.url)}>
|
||||||
|
<Copy className="mr-2 h-4 w-4" />
|
||||||
|
Copy URL
|
||||||
|
</Button>
|
||||||
</div>
|
</div>
|
||||||
</article>
|
</article>
|
||||||
);
|
);
|
||||||
@ -101,14 +105,19 @@ function InstanceCard({ instance }: { instance: HermesOpsInstance }) {
|
|||||||
|
|
||||||
export function HermesOpsPanel() {
|
export function HermesOpsPanel() {
|
||||||
const [snapshot, setSnapshot] = useState<HermesOpsSnapshot | null>(null);
|
const [snapshot, setSnapshot] = useState<HermesOpsSnapshot | null>(null);
|
||||||
|
const [previousSnapshot, setPreviousSnapshot] = useState<HermesOpsSnapshot | null>(null);
|
||||||
const [loading, setLoading] = useState(true);
|
const [loading, setLoading] = useState(true);
|
||||||
const [error, setError] = useState<string | null>(null);
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const latestSnapshotRef = useRef<HermesOpsSnapshot | null>(null);
|
||||||
|
|
||||||
const load = async () => {
|
const load = async () => {
|
||||||
setLoading(true);
|
setLoading(true);
|
||||||
setError(null);
|
setError(null);
|
||||||
try {
|
try {
|
||||||
setSnapshot(await api.getHermesOps());
|
const nextSnapshot = await api.getHermesOps();
|
||||||
|
setPreviousSnapshot(latestSnapshotRef.current);
|
||||||
|
latestSnapshotRef.current = nextSnapshot;
|
||||||
|
setSnapshot(nextSnapshot);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
setError(err instanceof Error ? err.message : 'Unable to load Hermes operations status');
|
setError(err instanceof Error ? err.message : 'Unable to load Hermes operations status');
|
||||||
} finally {
|
} finally {
|
||||||
@ -123,6 +132,44 @@ export function HermesOpsPanel() {
|
|||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const allHealthy = useMemo(() => snapshot ? snapshot.warnings.length === 0 : false, [snapshot]);
|
const allHealthy = useMemo(() => snapshot ? snapshot.warnings.length === 0 : false, [snapshot]);
|
||||||
|
const snapshotDiff = useMemo(() => {
|
||||||
|
if (!snapshot || !previousSnapshot) return null;
|
||||||
|
|
||||||
|
const previousHealthyInstances = previousSnapshot.instances.filter((instance) =>
|
||||||
|
instance.gateway.active &&
|
||||||
|
instance.dashboard.active &&
|
||||||
|
instance.backup.timer.active &&
|
||||||
|
instance.backup.repo.clean &&
|
||||||
|
instance.google.workspaceToken
|
||||||
|
).length;
|
||||||
|
|
||||||
|
const currentHealthyInstances = snapshot.instances.filter((instance) =>
|
||||||
|
instance.gateway.active &&
|
||||||
|
instance.dashboard.active &&
|
||||||
|
instance.backup.timer.active &&
|
||||||
|
instance.backup.repo.clean &&
|
||||||
|
instance.google.workspaceToken
|
||||||
|
).length;
|
||||||
|
|
||||||
|
return {
|
||||||
|
healthyInstances: currentHealthyInstances - previousHealthyInstances,
|
||||||
|
warnings: snapshot.warnings.length - previousSnapshot.warnings.length,
|
||||||
|
activeSessions: snapshot.activeSessions.active - previousSnapshot.activeSessions.active,
|
||||||
|
activeDashboards: snapshot.instances.filter((instance) => instance.dashboard.active).length - previousSnapshot.instances.filter((instance) => instance.dashboard.active).length,
|
||||||
|
activeBackupTimers: snapshot.instances.filter((instance) => instance.backup.timer.active).length - previousSnapshot.instances.filter((instance) => instance.backup.timer.active).length,
|
||||||
|
};
|
||||||
|
}, [previousSnapshot, snapshot]);
|
||||||
|
const healthyInstances = snapshot
|
||||||
|
? snapshot.instances.filter((instance) =>
|
||||||
|
instance.gateway.active &&
|
||||||
|
instance.dashboard.active &&
|
||||||
|
instance.backup.timer.active &&
|
||||||
|
instance.backup.repo.clean &&
|
||||||
|
instance.google.workspaceToken
|
||||||
|
).length
|
||||||
|
: 0;
|
||||||
|
const activeDashboards = snapshot ? snapshot.instances.filter((instance) => instance.dashboard.active).length : 0;
|
||||||
|
const activeBackupTimers = snapshot ? snapshot.instances.filter((instance) => instance.backup.timer.active).length : 0;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<SectionCard
|
<SectionCard
|
||||||
@ -146,6 +193,65 @@ export function HermesOpsPanel() {
|
|||||||
|
|
||||||
{snapshot ? (
|
{snapshot ? (
|
||||||
<div className="space-y-5">
|
<div className="space-y-5">
|
||||||
|
{snapshotDiff ? (
|
||||||
|
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
||||||
|
<div className="flex items-center justify-between gap-3">
|
||||||
|
<div>
|
||||||
|
<p className="text-sm font-medium text-[var(--bl-text-primary)]">Since previous refresh</p>
|
||||||
|
<p className="text-xs text-[var(--bl-text-secondary)]">Snapshot movement compared with the last poll.</p>
|
||||||
|
</div>
|
||||||
|
<Badge variant="neutral">Delta view</Badge>
|
||||||
|
</div>
|
||||||
|
<div className="mt-3 grid gap-3 md:grid-cols-5">
|
||||||
|
{[
|
||||||
|
{ label: 'Healthy instances', value: snapshotDiff.healthyInstances },
|
||||||
|
{ label: 'Active dashboards', value: snapshotDiff.activeDashboards },
|
||||||
|
{ label: 'Active backups', value: snapshotDiff.activeBackupTimers },
|
||||||
|
{ label: 'Active sessions', value: snapshotDiff.activeSessions },
|
||||||
|
{ label: 'Warnings', value: snapshotDiff.warnings },
|
||||||
|
].map((item) => (
|
||||||
|
<div key={item.label} className="rounded-xl border border-[var(--bl-border)] bg-[var(--bl-surface-card)] p-3">
|
||||||
|
<p className="text-xs uppercase tracking-[0.2em] text-[var(--bl-text-tertiary)]">{item.label}</p>
|
||||||
|
<p className={`mt-2 text-2xl font-semibold ${item.value > 0 ? 'text-[var(--bl-success)]' : item.value < 0 ? 'text-[var(--bl-danger)]' : 'text-[var(--bl-text-primary)]'}`}>
|
||||||
|
{item.value > 0 ? '+' : ''}{item.value}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : null}
|
||||||
|
|
||||||
|
<div className="grid gap-3 md:grid-cols-4">
|
||||||
|
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
||||||
|
<div className="flex items-center gap-2 text-sm text-[var(--bl-text-secondary)]">
|
||||||
|
<ShieldCheck className="h-4 w-4" />
|
||||||
|
Healthy instances
|
||||||
|
</div>
|
||||||
|
<p className="mt-2 text-2xl font-semibold text-[var(--bl-text-primary)]">{healthyInstances}/2</p>
|
||||||
|
</div>
|
||||||
|
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
||||||
|
<div className="flex items-center gap-2 text-sm text-[var(--bl-text-secondary)]">
|
||||||
|
<Activity className="h-4 w-4" />
|
||||||
|
Active dashboards
|
||||||
|
</div>
|
||||||
|
<p className="mt-2 text-2xl font-semibold text-[var(--bl-text-primary)]">{activeDashboards}/2</p>
|
||||||
|
</div>
|
||||||
|
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
||||||
|
<div className="flex items-center gap-2 text-sm text-[var(--bl-text-secondary)]">
|
||||||
|
<CalendarClock className="h-4 w-4" />
|
||||||
|
Active backup timers
|
||||||
|
</div>
|
||||||
|
<p className="mt-2 text-2xl font-semibold text-[var(--bl-text-primary)]">{activeBackupTimers}/2</p>
|
||||||
|
</div>
|
||||||
|
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
||||||
|
<div className="flex items-center gap-2 text-sm text-[var(--bl-text-secondary)]">
|
||||||
|
<AlertTriangle className="h-4 w-4" />
|
||||||
|
Open warnings
|
||||||
|
</div>
|
||||||
|
<p className="mt-2 text-2xl font-semibold text-[var(--bl-text-primary)]">{snapshot.warnings.length}</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div className="grid gap-3 md:grid-cols-4">
|
<div className="grid gap-3 md:grid-cols-4">
|
||||||
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
<div className="rounded-2xl border border-[var(--bl-border)] bg-[var(--bl-surface-muted)] p-4">
|
||||||
<div className="flex items-center gap-2 text-sm text-[var(--bl-text-secondary)]">
|
<div className="flex items-center gap-2 text-sm text-[var(--bl-text-secondary)]">
|
||||||
|
|||||||
@ -31,7 +31,7 @@ Observed on 2026-05-27:
|
|||||||
- Private dashboards:
|
- Private dashboards:
|
||||||
- Root: `http://100.87.53.10:9119/`, `hermes-root-dashboard.service`
|
- Root: `http://100.87.53.10:9119/`, `hermes-root-dashboard.service`
|
||||||
- Uma: `http://100.87.53.10:9120/`, `uma-hermes-dashboard.service`
|
- Uma: `http://100.87.53.10:9120/`, `uma-hermes-dashboard.service`
|
||||||
- Live ops panel shows gateway state, active sessions, cron state, backup freshness, sanitized alerts, and runbook links for both instances.
|
- Live ops panel shows gateway state, active sessions, refresh delta, cron state, backup freshness, sanitized alerts, and runbook links for both instances.
|
||||||
|
|
||||||
## Safety guardrail: no public Hermes dashboard/API
|
## Safety guardrail: no public Hermes dashboard/API
|
||||||
|
|
||||||
|
|||||||
@ -21,6 +21,16 @@
|
|||||||
- **Needs manual UX validation:** dashboard feature-by-feature checks, Telegram approval prompt flow, and Telegram media/file delivery.
|
- **Needs manual UX validation:** dashboard feature-by-feature checks, Telegram approval prompt flow, and Telegram media/file delivery.
|
||||||
- **Needs future workflow adoption:** practicing `delegate_task`, spawned/tmux sessions, worktrees, and Kanban on real tasks before checking them as completed.
|
- **Needs future workflow adoption:** practicing `delegate_task`, spawned/tmux sessions, worktrees, and Kanban on real tasks before checking them as completed.
|
||||||
|
|
||||||
|
## Next To-Dos
|
||||||
|
|
||||||
|
The remaining work is now mostly hardening rather than feature delivery:
|
||||||
|
|
||||||
|
- finish the GitHub/Gitea least-privilege audit for the root-managed push path
|
||||||
|
- decide whether `security.redact_secrets` should be enabled by default
|
||||||
|
- document the gateway-session `privacy.redact_pii` policy
|
||||||
|
- rotate any credentials that were migrated or exposed during the setup work
|
||||||
|
- tighten least-privilege token scopes for GitHub/Gitea, web APIs, and provider keys
|
||||||
|
|
||||||
## Purpose
|
## Purpose
|
||||||
|
|
||||||
Turn the Hermes setup ideas from the referenced video into a practical ByteLyst upgrade checklist for this VM-backed, Telegram-driven Hermes installation.
|
Turn the Hermes setup ideas from the referenced video into a practical ByteLyst upgrade checklist for this VM-backed, Telegram-driven Hermes installation.
|
||||||
|
|||||||
@ -665,6 +665,18 @@ Known roadmap assumptions to handle safely during implementation:
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Next Dashboard Improvements
|
||||||
|
|
||||||
|
Potential follow-up work for Hermes Mission Control:
|
||||||
|
|
||||||
|
- warning severity filters for the live ops panel
|
||||||
|
- compact trend cards for recent alert volume and backup freshness over several refreshes
|
||||||
|
- task-ledger deep links from the ops panel into the most recent Hermes work
|
||||||
|
- per-instance action row improvements beyond copy-link/open-dashboard, such as open-runbook shortcuts
|
||||||
|
- optional dark/light theme toggle if the broader dashboard shell eventually supports it
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
# Git workflow
|
# Git workflow
|
||||||
|
|
||||||
Commit incrementally:
|
Commit incrementally:
|
||||||
|
|||||||
@ -64,7 +64,7 @@ These listeners were bound on `0.0.0.0` and/or `[::]` during review.
|
|||||||
| `3040` | `flowmonk-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
| `3040` | `flowmonk-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
||||||
| `3049` | `devops-web` | `/opt/bytelyst/bytelyst-devops-tools/dashboard/docker-compose.yml` | `devops.bytelyst.com` | `private-admin` with direct bypass | Fix old repo path drift, then bind loopback/private |
|
| `3049` | `devops-web` | `/opt/bytelyst/bytelyst-devops-tools/dashboard/docker-compose.yml` | `devops.bytelyst.com` | `private-admin` with direct bypass | Fix old repo path drift, then bind loopback/private |
|
||||||
| `3050` | `mindlyst-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
| `3050` | `mindlyst-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
||||||
| `3055` | `nomgap-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
| `3055` | `nomgap-web` | orphan from older `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `retire` | Retired on 2026-05-27; current Compose says Nomgap web is deployed to Vercel |
|
||||||
| `3060` | `actiontrail-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
| `3060` | `actiontrail-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
||||||
| `3070` | `localmemgpt-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
| `3070` | `localmemgpt-web` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | none found in Caddy | `needs-decision` | Unhealthy; classify as private/admin or retire |
|
||||||
| `3075` | `llmlab-dashboard` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | `llmlab.bytelyst.com` | `private-admin` with direct bypass | Dashboard unhealthy; gate or retire |
|
| `3075` | `llmlab-dashboard` | `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml` | `llmlab.bytelyst.com` | `private-admin` with direct bypass | Dashboard unhealthy; gate or retire |
|
||||||
@ -113,6 +113,7 @@ These listeners were bound on `0.0.0.0` and/or `[::]` during review.
|
|||||||
|
|
||||||
## Drift / Follow-Up Findings
|
## Drift / Follow-Up Findings
|
||||||
|
|
||||||
|
- `nomgap-web` was an orphan from an older Compose revision, had no Caddy route, and was retired on 2026-05-27.
|
||||||
- `devops-backend` runs from `/opt/bytelyst/learning_ai_devops_tools/dashboard/docker-compose.yml`.
|
- `devops-backend` runs from `/opt/bytelyst/learning_ai_devops_tools/dashboard/docker-compose.yml`.
|
||||||
- `devops-web` runs from `/opt/bytelyst/bytelyst-devops-tools/dashboard/docker-compose.yml`, an older path. Align this before changing devops dashboard port bindings.
|
- `devops-web` runs from `/opt/bytelyst/bytelyst-devops-tools/dashboard/docker-compose.yml`, an older path. Align this before changing devops dashboard port bindings.
|
||||||
- `gitea-npm-registry` has no Compose labels in Docker inspect output. Find its systemd/compose owner before changing `3300`.
|
- `gitea-npm-registry` has no Compose labels in Docker inspect output. Find its systemd/compose owner before changing `3300`.
|
||||||
|
|||||||
@ -397,7 +397,7 @@ Effective `sshd -T` settings showed:
|
|||||||
|
|
||||||
### Phase 2 — Operational correctness
|
### Phase 2 — Operational correctness
|
||||||
|
|
||||||
- [ ] Fix/retire unhealthy containers.
|
- [x] Fix/retire unhealthy containers.
|
||||||
- [x] Resolve `hermes-root-backup.service` failed state.
|
- [x] Resolve `hermes-root-backup.service` failed state.
|
||||||
- [x] Decide and document Gitea runner active/disabled state.
|
- [x] Decide and document Gitea runner active/disabled state.
|
||||||
- [x] Add missing-script checks. Stale root cron path was fixed on 2026-05-27.
|
- [x] Add missing-script checks. Stale root cron path was fixed on 2026-05-27.
|
||||||
@ -515,6 +515,31 @@ Minimum post-checks for Phase 1:
|
|||||||
|
|
||||||
- The detector currently covers root crontab and failed systemd units. Full ownership inventory still needs `/etc/cron.d`, user crontabs, Hermes cron, Gitea schedules, owners, outputs, and alert channels.
|
- The detector currently covers root crontab and failed systemd units. Full ownership inventory still needs `/etc/cron.d`, user crontabs, Hermes cron, Gitea schedules, owners, outputs, and alert channels.
|
||||||
|
|
||||||
|
### 2026-05-27 — Phase 2 unhealthy containers
|
||||||
|
|
||||||
|
**Changed:**
|
||||||
|
|
||||||
|
- Added `HOSTNAME=0.0.0.0` to six managed Next.js web services in `/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml`: `jarvisjr-web`, `flowmonk-web`, `mindlyst-web`, `actiontrail-web`, `localmemgpt-web`, and `llmlab-dashboard`.
|
||||||
|
- Recreated those six services from existing images with `docker compose ... up -d --no-build`.
|
||||||
|
- Retired the orphan `learning_ai_common_plat-nomgap-web-1` container. Current Compose already documents `nomgap-web` as deployed to Vercel and not part of the Docker stack.
|
||||||
|
|
||||||
|
**Verified:**
|
||||||
|
|
||||||
|
- `docker compose -f docker-compose.ecosystem.yml --env-file .env.ecosystem config --quiet` passed.
|
||||||
|
- The six recreated web containers report Docker health `healthy`.
|
||||||
|
- `docker ps --filter health=unhealthy` returns no containers.
|
||||||
|
- Host-level smoke checks returned HTTP `200` for `3035`, `3040`, `3050`, `3060`, `3070`, and `3075`; retired orphan port `3055` is closed.
|
||||||
|
- Host-permission `vm-health-check.sh --json` reports `container_health=OK`, `container_loops=OK`, `failed_units=OK`, and `cron_missing_paths=OK`.
|
||||||
|
|
||||||
|
**Committed/pushed:**
|
||||||
|
|
||||||
|
- `learning_ai_common_plat`: `af035e7d` (`fix: bind ecosystem Next apps on all interfaces`) pushed to GitHub.
|
||||||
|
|
||||||
|
**Residual risk:**
|
||||||
|
|
||||||
|
- Local Gitea mirror push for `learning_ai_common_plat` failed at Git HTTP transport even though fetch and health checks work; retry/fix mirror push separately.
|
||||||
|
- This fixed health state, not public exposure. Several direct published ports remain to be loopback-bound or blocked in Phase 1.
|
||||||
|
|
||||||
## Do Not Start With
|
## Do Not Start With
|
||||||
|
|
||||||
- Rootless Docker migration.
|
- Rootless Docker migration.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user