fix(tracker-web): exclude stale factories from the engine picker
availableEnginesForProduct skipped only health:down factories, so an engine advertised solely by a host that had stopped heartbeating could still be offered in the picker. Also skip factories whose lastHeartbeatAt is older than 90s (mirrors the coordinator's DEFAULT_STALE_FACTORY_MS), and treat an unparseable timestamp as stale. Adds unit coverage for the engine-collection, down, stale, and graceful-degradation paths. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com>
This commit is contained in:
parent
f0a30b8356
commit
2ed19464c5
@ -24,6 +24,7 @@ import {
|
|||||||
getJobDag,
|
getJobDag,
|
||||||
getJobExplain,
|
getJobExplain,
|
||||||
listFactories,
|
listFactories,
|
||||||
|
availableEnginesForProduct,
|
||||||
getFleetMetrics,
|
getFleetMetrics,
|
||||||
getBudget,
|
getBudget,
|
||||||
getBudgetBurndown,
|
getBudgetBurndown,
|
||||||
@ -289,6 +290,64 @@ describe('fleet-client', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('availableEnginesForProduct', () => {
|
||||||
|
const fresh = () => new Date().toISOString();
|
||||||
|
const stale = () => new Date(Date.now() - 120_000).toISOString(); // > 90s
|
||||||
|
|
||||||
|
const factory = (over: Record<string, unknown>) => ({
|
||||||
|
id: 'f',
|
||||||
|
productId: 'lysnrai',
|
||||||
|
factoryId: 'f',
|
||||||
|
capabilities: [],
|
||||||
|
health: 'ok',
|
||||||
|
load: 0,
|
||||||
|
seatLimit: 1,
|
||||||
|
lastHeartbeatAt: fresh(),
|
||||||
|
...over,
|
||||||
|
});
|
||||||
|
|
||||||
|
it('collects engine:* caps from healthy, live factories', async () => {
|
||||||
|
fetchSpy.mockResolvedValue({
|
||||||
|
factories: [
|
||||||
|
factory({ capabilities: ['os:mac', 'engine:devin', 'engine:claude'] }),
|
||||||
|
factory({ capabilities: ['engine:copilot'] }),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
const engines = await availableEnginesForProduct('lysnrai');
|
||||||
|
expect(engines.sort()).toEqual(['claude', 'copilot', 'devin']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ignores non-engine caps and unknown engines', async () => {
|
||||||
|
fetchSpy.mockResolvedValue({
|
||||||
|
factories: [factory({ capabilities: ['os:mac', 'has:git', 'engine:bogus'] })],
|
||||||
|
});
|
||||||
|
expect(await availableEnginesForProduct()).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('skips down factories', async () => {
|
||||||
|
fetchSpy.mockResolvedValue({
|
||||||
|
factories: [factory({ health: 'down', capabilities: ['engine:codex'] })],
|
||||||
|
});
|
||||||
|
expect(await availableEnginesForProduct()).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('skips stale (missed-heartbeat) factories even when health says ok', async () => {
|
||||||
|
fetchSpy.mockResolvedValue({
|
||||||
|
factories: [
|
||||||
|
factory({ capabilities: ['engine:codex'], lastHeartbeatAt: stale() }),
|
||||||
|
factory({ capabilities: ['engine:devin'], lastHeartbeatAt: fresh() }),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
// codex's only host is stale → excluded; devin's host is fresh → kept.
|
||||||
|
expect(await availableEnginesForProduct()).toEqual(['devin']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty (⇒ caller offers all) when the list call fails', async () => {
|
||||||
|
fetchSpy.mockRejectedValue(new Error('Network error'));
|
||||||
|
expect(await availableEnginesForProduct()).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('budget operations', () => {
|
describe('budget operations', () => {
|
||||||
it('getBudget returns budget or null', async () => {
|
it('getBudget returns budget or null', async () => {
|
||||||
fetchSpy.mockResolvedValue({ id: 'lysnrai', ceilingUsd: 100, spentUsd: 25 });
|
fetchSpy.mockResolvedValue({ id: 'lysnrai', ceilingUsd: 100, spentUsd: 25 });
|
||||||
|
|||||||
@ -529,13 +529,27 @@ export async function listFactories(productId?: string): Promise<{ factories: Fl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** A factory missing a heartbeat for longer than this is treated as effectively
|
||||||
|
* down for engine advertisement. Mirrors the coordinator's `DEFAULT_STALE_FACTORY_MS`
|
||||||
|
* (90s) so the picker never offers an engine only a dead host advertised. */
|
||||||
|
const STALE_FACTORY_MS = 90_000;
|
||||||
|
|
||||||
|
/** True when the factory has missed heartbeats long enough to be considered dead. */
|
||||||
|
function isFactoryStale(f: FleetFactory, nowMs: number): boolean {
|
||||||
|
const last = Date.parse(f.lastHeartbeatAt);
|
||||||
|
return Number.isNaN(last) || nowMs - last > STALE_FACTORY_MS;
|
||||||
|
}
|
||||||
|
|
||||||
/** Concrete engines a product's live factories advertise (`engine:*` capabilities),
|
/** Concrete engines a product's live factories advertise (`engine:*` capabilities),
|
||||||
* intersected with the known engine set. Empty ⇒ unknown (caller should not filter). */
|
* intersected with the known engine set. Skips `down` and stale (missed-heartbeat)
|
||||||
|
* factories so the picker never offers an engine only a dead host had.
|
||||||
|
* Empty ⇒ unknown (caller should not filter). */
|
||||||
export async function availableEnginesForProduct(productId?: string): Promise<FleetEngine[]> {
|
export async function availableEnginesForProduct(productId?: string): Promise<FleetEngine[]> {
|
||||||
const { factories } = await listFactories(productId);
|
const { factories } = await listFactories(productId);
|
||||||
|
const nowMs = Date.now();
|
||||||
const seen = new Set<FleetEngine>();
|
const seen = new Set<FleetEngine>();
|
||||||
for (const f of factories) {
|
for (const f of factories) {
|
||||||
if (f.health === 'down') continue;
|
if (f.health === 'down' || isFactoryStale(f, nowMs)) continue;
|
||||||
for (const cap of f.capabilities ?? []) {
|
for (const cap of f.capabilities ?? []) {
|
||||||
if (cap.startsWith('engine:')) {
|
if (cap.startsWith('engine:')) {
|
||||||
const e = cap.slice('engine:'.length) as FleetEngine;
|
const e = cap.slice('engine:'.length) as FleetEngine;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user