feat(fleet): factory-token expiry, prod-default enforcement, token GC
Hardens the factory credential lifecycle (§12): - Token expiry: tokens now carry an absolute expiresAt (FLEET_TOKEN_TTL_DAYS, default 90; 0 disables). verifyToken rejects an expired token regardless of status, bounding the blast radius of a leak. - Enforcement default: factoryTokenEnforcementEnabled now defaults ON in production and OFF in development/test (an explicit FLEET_REQUIRE_FACTORY_TOKEN still wins) — real deployments are secure by default while the local prototype and the test suite keep working without enrollment. - Token GC: pruneInvalidatedTokens deletes revoked, expired, and rotating-past- grace tokens; wired into the hourly fleet GC sweep (SweepResult.tokensDeleted) so the credential store stays bounded. Covered by new enrollment.test.ts cases (expiry, TTL=0, enforcement default matrix, prune) and the reaper/sweep accounting. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com>
This commit is contained in:
parent
42d27d8a4f
commit
493027fbad
@ -23,6 +23,7 @@ import { promisify } from 'node:util';
|
||||
import { BadRequestError, ConflictError } from '../../lib/errors.js';
|
||||
import * as repo from './repository.js';
|
||||
import { deleteArtifact as deleteArtifactWithBlob } from './artifacts-blob.js';
|
||||
import { pruneInvalidatedTokens } from './enrollment.js';
|
||||
import {
|
||||
selectJob,
|
||||
selectPreemptionVictim,
|
||||
@ -1964,6 +1965,7 @@ export interface SweepOptions {
|
||||
export interface SweepResult {
|
||||
leasesDeleted: number;
|
||||
factoriesDeleted: number;
|
||||
tokensDeleted: number;
|
||||
jobsDeleted: number;
|
||||
runsDeleted: number;
|
||||
eventsDeleted: number;
|
||||
@ -1996,6 +1998,7 @@ export async function sweepFleetGarbage(opts: SweepOptions): Promise<SweepResult
|
||||
const result: SweepResult = {
|
||||
leasesDeleted: 0,
|
||||
factoriesDeleted: 0,
|
||||
tokensDeleted: 0,
|
||||
jobsDeleted: 0,
|
||||
runsDeleted: 0,
|
||||
eventsDeleted: 0,
|
||||
@ -2025,6 +2028,9 @@ export async function sweepFleetGarbage(opts: SweepOptions): Promise<SweepResult
|
||||
}
|
||||
}
|
||||
|
||||
// 2b) Invalidated factory tokens (revoked / expired / rotating past grace).
|
||||
result.tokensDeleted = await pruneInvalidatedTokens(now);
|
||||
|
||||
// 3) Terminal-job retention cascade — OPT-IN only (destroys history).
|
||||
if (jobRetentionMs > 0) {
|
||||
const jobCutoff = new Date(now - jobRetentionMs).toISOString();
|
||||
|
||||
@ -33,15 +33,19 @@ async function buildApp(): Promise<FastifyInstance> {
|
||||
}
|
||||
|
||||
describe('fleet enrollment + scoped tokens', () => {
|
||||
const ORIGINAL_NODE_ENV = process.env.NODE_ENV;
|
||||
beforeEach(() => {
|
||||
setProvider(new MemoryDatastoreProvider());
|
||||
delete process.env.FLEET_REQUIRE_FACTORY_TOKEN;
|
||||
delete process.env.FLEET_TOKEN_ROTATION_OVERLAP_SEC;
|
||||
delete process.env.FLEET_TOKEN_TTL_DAYS;
|
||||
});
|
||||
afterEach(() => {
|
||||
_resetDatastoreProvider();
|
||||
delete process.env.FLEET_REQUIRE_FACTORY_TOKEN;
|
||||
delete process.env.FLEET_TOKEN_ROTATION_OVERLAP_SEC;
|
||||
delete process.env.FLEET_TOKEN_TTL_DAYS;
|
||||
process.env.NODE_ENV = ORIGINAL_NODE_ENV;
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
@ -124,6 +128,69 @@ describe('fleet enrollment + scoped tokens', () => {
|
||||
expect(await enrollment.verifyToken(second.token)).not.toBeNull();
|
||||
});
|
||||
|
||||
// ── Token expiry (§12 — bound a leaked credential) ──
|
||||
it('issues tokens with an expiresAt (default 90d) and rejects an expired one', async () => {
|
||||
const res = await enrollment.enrollFactory({ productId: PID, factoryId: 'fac_1' });
|
||||
const stored = await repo.getFactoryToken(res.tokenId, PID);
|
||||
expect(stored?.expiresAt).toBeTruthy();
|
||||
// valid now…
|
||||
expect(await enrollment.verifyToken(res.token)).not.toBeNull();
|
||||
// …but once expiresAt is in the past, verify rejects it (even though active).
|
||||
await repo.updateFactoryToken(res.tokenId, PID, {
|
||||
expiresAt: new Date(Date.now() - 1000).toISOString(),
|
||||
});
|
||||
expect(await enrollment.verifyToken(res.token)).toBeNull();
|
||||
});
|
||||
|
||||
it('FLEET_TOKEN_TTL_DAYS=0 disables expiry (no expiresAt)', async () => {
|
||||
process.env.FLEET_TOKEN_TTL_DAYS = '0';
|
||||
const res = await enrollment.enrollFactory({ productId: PID, factoryId: 'fac_1' });
|
||||
const stored = await repo.getFactoryToken(res.tokenId, PID);
|
||||
expect(stored?.expiresAt).toBeUndefined();
|
||||
expect(await enrollment.verifyToken(res.token)).not.toBeNull();
|
||||
});
|
||||
|
||||
// ── Enforcement default (production-on, dev/test-off) ──
|
||||
it('factoryTokenEnforcementEnabled: defaults on in production, off otherwise; explicit wins', () => {
|
||||
process.env.NODE_ENV = 'production';
|
||||
delete process.env.FLEET_REQUIRE_FACTORY_TOKEN;
|
||||
expect(enrollment.factoryTokenEnforcementEnabled()).toBe(true);
|
||||
process.env.NODE_ENV = 'development';
|
||||
expect(enrollment.factoryTokenEnforcementEnabled()).toBe(false);
|
||||
process.env.NODE_ENV = 'test';
|
||||
expect(enrollment.factoryTokenEnforcementEnabled()).toBe(false);
|
||||
// explicit override beats the environment default either way
|
||||
process.env.NODE_ENV = 'development';
|
||||
process.env.FLEET_REQUIRE_FACTORY_TOKEN = '1';
|
||||
expect(enrollment.factoryTokenEnforcementEnabled()).toBe(true);
|
||||
process.env.NODE_ENV = 'production';
|
||||
process.env.FLEET_REQUIRE_FACTORY_TOKEN = 'off';
|
||||
expect(enrollment.factoryTokenEnforcementEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
// ── Token GC ──
|
||||
it('pruneInvalidatedTokens removes revoked/expired/rotating-expired, keeps active', async () => {
|
||||
const active = await enrollment.enrollFactory({ productId: PID, factoryId: 'fac_active' });
|
||||
const revoked = await enrollment.enrollFactory({ productId: PID, factoryId: 'fac_revoked' });
|
||||
await enrollment.revokeToken({ productId: PID, factoryId: 'fac_revoked' });
|
||||
const expired = await enrollment.enrollFactory({ productId: PID, factoryId: 'fac_expired' });
|
||||
await repo.updateFactoryToken(expired.tokenId, PID, {
|
||||
expiresAt: new Date(Date.now() - 1000).toISOString(),
|
||||
});
|
||||
const rotating = await enrollment.enrollFactory({ productId: PID, factoryId: 'fac_rot' });
|
||||
await repo.updateFactoryToken(rotating.tokenId, PID, {
|
||||
status: 'rotating',
|
||||
rotatingUntil: new Date(Date.now() - 1000).toISOString(),
|
||||
});
|
||||
|
||||
const deleted = await enrollment.pruneInvalidatedTokens();
|
||||
expect(deleted).toBe(3);
|
||||
expect(await repo.getFactoryToken(active.tokenId, PID)).not.toBeNull();
|
||||
expect(await repo.getFactoryToken(revoked.tokenId, PID)).toBeNull();
|
||||
expect(await repo.getFactoryToken(expired.tokenId, PID)).toBeNull();
|
||||
expect(await repo.getFactoryToken(rotating.tokenId, PID)).toBeNull();
|
||||
});
|
||||
|
||||
// ── Gated enforcement on heartbeat/claim ──
|
||||
it('enforcement OFF (default): claim + heartbeat work with NO token', async () => {
|
||||
const app = await buildApp();
|
||||
|
||||
@ -26,6 +26,7 @@ import {
|
||||
export const FACTORY_TOKEN_HEADER = 'x-factory-token';
|
||||
const TOKEN_PREFIX = 'flt_';
|
||||
const DEFAULT_ROTATION_OVERLAP_SEC = 300;
|
||||
const DEFAULT_TOKEN_TTL_DAYS = 90;
|
||||
|
||||
// ── Crypto helpers (reuse the auth module's sha256 token pattern; no new schemes) ──
|
||||
|
||||
@ -48,10 +49,29 @@ function rotationOverlapSeconds(): number {
|
||||
return Number.isFinite(v) && v >= 0 ? v : DEFAULT_ROTATION_OVERLAP_SEC;
|
||||
}
|
||||
|
||||
/** Enforcement flag — default OFF (unset). Read per-request so tests can toggle. */
|
||||
/** Token lifetime in ms from `FLEET_TOKEN_TTL_DAYS` (default 90 days). 0 ⇒ no
|
||||
* expiry (returns null). Negative/garbage ⇒ the default. */
|
||||
function tokenTtlMs(): number | null {
|
||||
const raw = process.env.FLEET_TOKEN_TTL_DAYS;
|
||||
if (raw === undefined || raw.trim() === '') return DEFAULT_TOKEN_TTL_DAYS * 86_400_000;
|
||||
const days = Number(raw);
|
||||
if (!Number.isFinite(days) || days < 0) return DEFAULT_TOKEN_TTL_DAYS * 86_400_000;
|
||||
if (days === 0) return null; // explicitly disabled
|
||||
return days * 86_400_000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether factory-token auth is enforced on heartbeat/claim. An explicit
|
||||
* `FLEET_REQUIRE_FACTORY_TOKEN` (1/true/on or 0/false/off) always wins. With no
|
||||
* explicit value the default is ON in production and OFF elsewhere (development /
|
||||
* test) — so real deployments are secure by default while the local prototype and
|
||||
* the test suite keep working without enrollment. Read per-request so tests can toggle.
|
||||
*/
|
||||
export function factoryTokenEnforcementEnabled(): boolean {
|
||||
const v = (process.env.FLEET_REQUIRE_FACTORY_TOKEN ?? '').trim().toLowerCase();
|
||||
return v === '1' || v === 'true' || v === 'yes' || v === 'on';
|
||||
if (v === '1' || v === 'true' || v === 'yes' || v === 'on') return true;
|
||||
if (v === '0' || v === 'false' || v === 'no' || v === 'off') return false;
|
||||
return (process.env.NODE_ENV ?? '').toLowerCase() === 'production';
|
||||
}
|
||||
|
||||
// ── Scope + result shapes ───────────────────────────────────────────────────
|
||||
@ -123,6 +143,7 @@ interface IssueTokenArgs {
|
||||
async function issueToken(args: IssueTokenArgs): Promise<IssuedToken> {
|
||||
const tokenId = `fltk_${randomUUID()}`;
|
||||
const plaintext = newPlaintextToken();
|
||||
const ttlMs = tokenTtlMs();
|
||||
const doc = FleetFactoryTokenDocSchema.parse({
|
||||
id: tokenId,
|
||||
productId: args.productId,
|
||||
@ -132,6 +153,7 @@ async function issueToken(args: IssueTokenArgs): Promise<IssuedToken> {
|
||||
label: args.label,
|
||||
status: 'active',
|
||||
createdAt: new Date().toISOString(),
|
||||
...(ttlMs !== null ? { expiresAt: new Date(Date.now() + ttlMs).toISOString() } : {}),
|
||||
});
|
||||
await repo.createFactoryToken(doc);
|
||||
return {
|
||||
@ -206,6 +228,8 @@ export async function verifyToken(plaintext: string | undefined): Promise<TokenS
|
||||
if (!doc) return null;
|
||||
if (!constantTimeEqualHex(doc.tokenHash, hash)) return null;
|
||||
if (doc.status === 'revoked') return null;
|
||||
// Absolute expiry bounds a leaked credential regardless of status.
|
||||
if (doc.expiresAt && new Date(doc.expiresAt).getTime() < Date.now()) return null;
|
||||
if (doc.status === 'rotating') {
|
||||
const until = doc.rotatingUntil ? new Date(doc.rotatingUntil).getTime() : 0;
|
||||
if (!until || until < Date.now()) return null;
|
||||
@ -222,6 +246,30 @@ export async function verifyToken(plaintext: string | undefined): Promise<TokenS
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete factory tokens that can never authenticate again — revoked, past their
|
||||
* `expiresAt`, or `rotating` past the grace window — so the credential store does
|
||||
* not grow without bound. `active` (and within-grace `rotating`) tokens are kept.
|
||||
* Returns how many were removed. Best-effort: a single delete failure is skipped.
|
||||
*/
|
||||
export async function pruneInvalidatedTokens(nowMs = Date.now()): Promise<number> {
|
||||
let deleted = 0;
|
||||
for (const t of await repo.listAllFactoryTokens()) {
|
||||
const expired = !!t.expiresAt && new Date(t.expiresAt).getTime() < nowMs;
|
||||
const rotatingExpired =
|
||||
t.status === 'rotating' && (!t.rotatingUntil || new Date(t.rotatingUntil).getTime() < nowMs);
|
||||
if (t.status === 'revoked' || expired || rotatingExpired) {
|
||||
try {
|
||||
await repo.deleteFactoryToken(t.id, t.productId);
|
||||
deleted += 1;
|
||||
} catch {
|
||||
/* best-effort */
|
||||
}
|
||||
}
|
||||
}
|
||||
return deleted;
|
||||
}
|
||||
|
||||
// ── Gated request enforcement (heartbeat / claim) ─────────────────────────────
|
||||
|
||||
export function extractFactoryToken(req: FastifyRequest): string | undefined {
|
||||
|
||||
@ -24,6 +24,7 @@ const log = { info: vi.fn(), warn: vi.fn() };
|
||||
const emptyGc = {
|
||||
leasesDeleted: 0,
|
||||
factoriesDeleted: 0,
|
||||
tokensDeleted: 0,
|
||||
jobsDeleted: 0,
|
||||
runsDeleted: 0,
|
||||
eventsDeleted: 0,
|
||||
|
||||
@ -68,12 +68,17 @@ async function runReapPass(log?: ReaperLog): Promise<void> {
|
||||
try {
|
||||
const gc = await sweepFleetGarbage({ now: nowMs, jobRetentionMs: jobRetentionMs() });
|
||||
const total =
|
||||
gc.leasesDeleted + gc.factoriesDeleted + gc.jobsDeleted + gc.runsDeleted + gc.eventsDeleted;
|
||||
gc.leasesDeleted +
|
||||
gc.factoriesDeleted +
|
||||
gc.tokensDeleted +
|
||||
gc.jobsDeleted +
|
||||
gc.runsDeleted +
|
||||
gc.eventsDeleted;
|
||||
if (total > 0) {
|
||||
log?.info(
|
||||
`[fleet-reaper] gc: ${gc.leasesDeleted} lease(s), ${gc.factoriesDeleted} factory(ies), ` +
|
||||
`${gc.jobsDeleted} job(s) + ${gc.runsDeleted} run(s)/${gc.eventsDeleted} event(s)/` +
|
||||
`${gc.artifactsDeleted} artifact(s)`
|
||||
`${gc.tokensDeleted} token(s), ${gc.jobsDeleted} job(s) + ${gc.runsDeleted} run(s)/` +
|
||||
`${gc.eventsDeleted} event(s)/${gc.artifactsDeleted} artifact(s)`
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
|
||||
@ -455,6 +455,15 @@ export async function updateFactoryToken(
|
||||
return factoryTokens().update(id, productId, updates);
|
||||
}
|
||||
|
||||
/** All factory tokens across every product (cross-partition) — used by token GC. */
|
||||
export async function listAllFactoryTokens(): Promise<FleetFactoryTokenDoc[]> {
|
||||
return factoryTokens().findMany({});
|
||||
}
|
||||
|
||||
export async function deleteFactoryToken(id: string, productId: string): Promise<void> {
|
||||
await factoryTokens().delete(id, productId);
|
||||
}
|
||||
|
||||
// ── Budgets (Phase 3 §11/§13) ──────────────────────────────────────────────
|
||||
|
||||
export async function getBudget(productId: string): Promise<FleetBudgetDoc | null> {
|
||||
|
||||
@ -304,6 +304,10 @@ export const FleetFactoryTokenDocSchema = z.object({
|
||||
createdAt: z.string(),
|
||||
lastUsedAt: z.string().optional(),
|
||||
rotatingUntil: z.string().optional(),
|
||||
/** Absolute expiry (ISO). A token past `expiresAt` is rejected by verifyToken,
|
||||
* bounding the blast radius of a leaked credential. Absent ⇒ never expires
|
||||
* (FLEET_TOKEN_TTL_DAYS=0 at issue time). */
|
||||
expiresAt: z.string().optional(),
|
||||
});
|
||||
export type FleetFactoryTokenDoc = z.infer<typeof FleetFactoryTokenDocSchema>;
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user