From 2ed19464c5e8f566c713a69efc98a97d13f1a9b1 Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Mon, 1 Jun 2026 11:02:56 -0700 Subject: [PATCH] fix(tracker-web): exclude stale factories from the engine picker availableEnginesForProduct skipped only health:down factories, so an engine advertised solely by a host that had stopped heartbeating could still be offered in the picker. Also skip factories whose lastHeartbeatAt is older than 90s (mirrors the coordinator's DEFAULT_STALE_FACTORY_MS), and treat an unparseable timestamp as stale. Adds unit coverage for the engine-collection, down, stale, and graceful-degradation paths. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../src/__tests__/fleet-client.test.ts | 59 +++++++++++++++++++ .../tracker-web/src/lib/fleet-client.ts | 18 +++++- 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/dashboards/tracker-web/src/__tests__/fleet-client.test.ts b/dashboards/tracker-web/src/__tests__/fleet-client.test.ts index a0d34414..1b64eb8e 100644 --- a/dashboards/tracker-web/src/__tests__/fleet-client.test.ts +++ b/dashboards/tracker-web/src/__tests__/fleet-client.test.ts @@ -24,6 +24,7 @@ import { getJobDag, getJobExplain, listFactories, + availableEnginesForProduct, getFleetMetrics, getBudget, getBudgetBurndown, @@ -289,6 +290,64 @@ describe('fleet-client', () => { }); }); + describe('availableEnginesForProduct', () => { + const fresh = () => new Date().toISOString(); + const stale = () => new Date(Date.now() - 120_000).toISOString(); // > 90s + + const factory = (over: Record) => ({ + id: 'f', + productId: 'lysnrai', + factoryId: 'f', + capabilities: [], + health: 'ok', + load: 0, + seatLimit: 1, + lastHeartbeatAt: fresh(), + ...over, + }); + + it('collects engine:* caps from healthy, live factories', async () => { + fetchSpy.mockResolvedValue({ + factories: [ + factory({ capabilities: ['os:mac', 'engine:devin', 'engine:claude'] }), + factory({ capabilities: ['engine:copilot'] }), + ], + }); + const engines = await availableEnginesForProduct('lysnrai'); + expect(engines.sort()).toEqual(['claude', 'copilot', 'devin']); + }); + + it('ignores non-engine caps and unknown engines', async () => { + fetchSpy.mockResolvedValue({ + factories: [factory({ capabilities: ['os:mac', 'has:git', 'engine:bogus'] })], + }); + expect(await availableEnginesForProduct()).toEqual([]); + }); + + it('skips down factories', async () => { + fetchSpy.mockResolvedValue({ + factories: [factory({ health: 'down', capabilities: ['engine:codex'] })], + }); + expect(await availableEnginesForProduct()).toEqual([]); + }); + + it('skips stale (missed-heartbeat) factories even when health says ok', async () => { + fetchSpy.mockResolvedValue({ + factories: [ + factory({ capabilities: ['engine:codex'], lastHeartbeatAt: stale() }), + factory({ capabilities: ['engine:devin'], lastHeartbeatAt: fresh() }), + ], + }); + // codex's only host is stale → excluded; devin's host is fresh → kept. + expect(await availableEnginesForProduct()).toEqual(['devin']); + }); + + it('returns empty (⇒ caller offers all) when the list call fails', async () => { + fetchSpy.mockRejectedValue(new Error('Network error')); + expect(await availableEnginesForProduct()).toEqual([]); + }); + }); + describe('budget operations', () => { it('getBudget returns budget or null', async () => { fetchSpy.mockResolvedValue({ id: 'lysnrai', ceilingUsd: 100, spentUsd: 25 }); diff --git a/dashboards/tracker-web/src/lib/fleet-client.ts b/dashboards/tracker-web/src/lib/fleet-client.ts index a8474059..848dbbce 100644 --- a/dashboards/tracker-web/src/lib/fleet-client.ts +++ b/dashboards/tracker-web/src/lib/fleet-client.ts @@ -529,13 +529,27 @@ export async function listFactories(productId?: string): Promise<{ factories: Fl } } +/** A factory missing a heartbeat for longer than this is treated as effectively + * down for engine advertisement. Mirrors the coordinator's `DEFAULT_STALE_FACTORY_MS` + * (90s) so the picker never offers an engine only a dead host advertised. */ +const STALE_FACTORY_MS = 90_000; + +/** True when the factory has missed heartbeats long enough to be considered dead. */ +function isFactoryStale(f: FleetFactory, nowMs: number): boolean { + const last = Date.parse(f.lastHeartbeatAt); + return Number.isNaN(last) || nowMs - last > STALE_FACTORY_MS; +} + /** Concrete engines a product's live factories advertise (`engine:*` capabilities), - * intersected with the known engine set. Empty ⇒ unknown (caller should not filter). */ + * intersected with the known engine set. Skips `down` and stale (missed-heartbeat) + * factories so the picker never offers an engine only a dead host had. + * Empty ⇒ unknown (caller should not filter). */ export async function availableEnginesForProduct(productId?: string): Promise { const { factories } = await listFactories(productId); + const nowMs = Date.now(); const seen = new Set(); for (const f of factories) { - if (f.health === 'down') continue; + if (f.health === 'down' || isFactoryStale(f, nowMs)) continue; for (const cap of f.capabilities ?? []) { if (cap.startsWith('engine:')) { const e = cap.slice('engine:'.length) as FleetEngine;