From 721d3fcb4879605d2ac78c20339bc1813459c131 Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Fri, 29 May 2026 20:19:59 -0700 Subject: [PATCH] feat(platform-service): fleet data model + container registration (P2 foundation) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the agent-gigafactory fleet data model (modules/fleet/types.ts): Zod schemas as the source of truth with inferred types (no `any`) for the 7 durable containers — FleetJobDoc, FleetRunDoc, FleetLeaseDoc, FleetFactoryDoc, FleetProfileDoc, FleetEventDoc, FleetArtifactDoc — each carrying productId. Lifecycle stages mirror the agent-queue gigafactory spec (queued|blocked|assigned|building|review|testing| shipped|failed|dead_letter). Registers fleet_* containers with their partition keys (/productId for jobs/factories/profiles, /jobId for runs/leases/events/artifacts). --- .../platform-service/src/lib/cosmos-init.ts | 8 + .../src/modules/fleet/types.test.ts | 219 ++++++++++++ .../src/modules/fleet/types.ts | 317 ++++++++++++++++++ 3 files changed, 544 insertions(+) create mode 100644 services/platform-service/src/modules/fleet/types.test.ts create mode 100644 services/platform-service/src/modules/fleet/types.ts diff --git a/services/platform-service/src/lib/cosmos-init.ts b/services/platform-service/src/lib/cosmos-init.ts index 84d2a98a..a0e93928 100644 --- a/services/platform-service/src/lib/cosmos-init.ts +++ b/services/platform-service/src/lib/cosmos-init.ts @@ -187,6 +187,14 @@ const CONTAINER_DEFS: Record = { // i18n (P3) translations: { partitionKeyPath: '/locale' }, i18n_locales: { partitionKeyPath: '/locale' }, + // Agent Gigafactory — fleet coordinator (see modules/fleet/README.md) + fleet_jobs: { partitionKeyPath: '/productId' }, + fleet_runs: { partitionKeyPath: '/jobId' }, + fleet_leases: { partitionKeyPath: '/jobId' }, + fleet_factories: { partitionKeyPath: '/productId' }, + fleet_profiles: { partitionKeyPath: '/productId' }, + fleet_events: { partitionKeyPath: '/jobId' }, + fleet_artifacts: { partitionKeyPath: '/jobId' }, }; export async function initCosmosIfNeeded(): Promise { diff --git a/services/platform-service/src/modules/fleet/types.test.ts b/services/platform-service/src/modules/fleet/types.test.ts new file mode 100644 index 00000000..5cdb93ff --- /dev/null +++ b/services/platform-service/src/modules/fleet/types.test.ts @@ -0,0 +1,219 @@ +/** + * Fleet schema validation — valid docs pass; missing productId / bad enum fail. + */ + +import { describe, it, expect } from 'vitest'; +import { + FleetJobDocSchema, + FleetRunDocSchema, + FleetLeaseDocSchema, + FleetFactoryDocSchema, + FleetProfileDocSchema, + FleetEventDocSchema, + FleetArtifactDocSchema, + SubmitJobSchema, + PatchJobSchema, + ClaimSchema, + FLEET_STAGES, + FLEET_PRIORITIES, +} from './types.js'; + +const now = '2026-05-30T00:00:00.000Z'; + +const validJob = { + id: 'fjob_1', + productId: 'lysnrai', + stage: 'queued', + idempotencyKey: 'task-1', + contentHash: 'abc', + bodyMd: '# task', + manifestSnapshot: { + priority: 'medium', + capabilities: [], + prefersEngine: [], + allowedScope: [], + deps: [], + }, + priority: 'medium', + priorityOrder: 2, + capabilities: [], + deps: [], + kind: 'leaf', + attempts: 0, + leaseEpoch: 0, + rev: 0, + createdAt: now, + updatedAt: now, +}; + +describe('FleetJobDocSchema', () => { + it('accepts a valid job', () => { + expect(FleetJobDocSchema.safeParse(validJob).success).toBe(true); + }); + it('rejects a missing productId', () => { + const { productId: _omit, ...bad } = validJob; + expect(FleetJobDocSchema.safeParse(bad).success).toBe(false); + }); + it('rejects an empty productId', () => { + expect(FleetJobDocSchema.safeParse({ ...validJob, productId: '' }).success).toBe(false); + }); + it('rejects an invalid stage enum', () => { + expect(FleetJobDocSchema.safeParse({ ...validJob, stage: 'done' }).success).toBe(false); + }); + it('rejects an invalid priority enum', () => { + expect(FleetJobDocSchema.safeParse({ ...validJob, priority: 'urgent' }).success).toBe(false); + }); +}); + +describe('FleetRunDocSchema', () => { + const validRun = { + id: 'fjob_1:run:1', + productId: 'lysnrai', + jobId: 'fjob_1', + attempt: 1, + engine: 'devin', + startedAt: now, + insights: { tokensIn: 10, tokensOut: 5 }, + }; + it('accepts a valid run', () => { + expect(FleetRunDocSchema.safeParse(validRun).success).toBe(true); + }); + it('rejects missing productId', () => { + const { productId: _o, ...bad } = validRun; + expect(FleetRunDocSchema.safeParse(bad).success).toBe(false); + }); + it('rejects attempt <= 0', () => { + expect(FleetRunDocSchema.safeParse({ ...validRun, attempt: 0 }).success).toBe(false); + }); +}); + +describe('FleetLeaseDocSchema', () => { + const validLease = { + id: 'fjob_1', + productId: 'lysnrai', + jobId: 'fjob_1', + leaseEpoch: 1, + renewals: 0, + status: 'held', + rev: 0, + updatedAt: now, + }; + it('accepts a valid lease', () => { + expect(FleetLeaseDocSchema.safeParse(validLease).success).toBe(true); + }); + it('rejects an invalid status', () => { + expect(FleetLeaseDocSchema.safeParse({ ...validLease, status: 'open' }).success).toBe(false); + }); + it('rejects missing productId', () => { + const { productId: _o, ...bad } = validLease; + expect(FleetLeaseDocSchema.safeParse(bad).success).toBe(false); + }); +}); + +describe('FleetFactoryDocSchema', () => { + const validFactory = { + id: 'fac_1', + productId: 'lysnrai', + factoryId: 'fac_1', + descriptor: { os: 'mac' }, + capabilities: ['os:mac'], + health: 'ok', + load: 0, + seatLimit: 2, + lastHeartbeatAt: now, + }; + it('accepts a valid factory', () => { + expect(FleetFactoryDocSchema.safeParse(validFactory).success).toBe(true); + }); + it('rejects invalid health enum', () => { + expect(FleetFactoryDocSchema.safeParse({ ...validFactory, health: 'sick' }).success).toBe( + false + ); + }); +}); + +describe('FleetProfileDocSchema / FleetEventDocSchema / FleetArtifactDocSchema', () => { + it('accepts a valid profile and rejects missing productId', () => { + const valid = { + id: 'prof_1', + productId: 'lysnrai', + name: 'backend', + version: 1, + snapshot: {}, + createdAt: now, + }; + expect(FleetProfileDocSchema.safeParse(valid).success).toBe(true); + const { productId: _o, ...bad } = valid; + expect(FleetProfileDocSchema.safeParse(bad).success).toBe(false); + }); + it('accepts a valid event and rejects missing type', () => { + const valid = { + id: 'fjob_1:evt:0', + productId: 'lysnrai', + jobId: 'fjob_1', + seq: 0, + type: 'submitted', + at: now, + data: {}, + }; + expect(FleetEventDocSchema.safeParse(valid).success).toBe(true); + const { type: _t, ...bad } = valid; + expect(FleetEventDocSchema.safeParse(bad).success).toBe(false); + }); + it('accepts a valid artifact and rejects missing blobUrl', () => { + const valid = { + id: 'art_1', + productId: 'lysnrai', + jobId: 'fjob_1', + kind: 'coverage', + blobUrl: 'https://b/x', + createdAt: now, + }; + expect(FleetArtifactDocSchema.safeParse(valid).success).toBe(true); + const { blobUrl: _b, ...bad } = valid; + expect(FleetArtifactDocSchema.safeParse(bad).success).toBe(false); + }); +}); + +describe('request schemas', () => { + it('SubmitJobSchema applies defaults', () => { + const parsed = SubmitJobSchema.safeParse({ idempotencyKey: 'k', bodyMd: '# do' }); + expect(parsed.success).toBe(true); + if (parsed.success) { + expect(parsed.data.priority).toBe('medium'); + expect(parsed.data.kind).toBe('leaf'); + expect(parsed.data.deps).toEqual([]); + } + }); + it('SubmitJobSchema rejects empty bodyMd / idempotencyKey', () => { + expect(SubmitJobSchema.safeParse({ idempotencyKey: '', bodyMd: 'x' }).success).toBe(false); + expect(SubmitJobSchema.safeParse({ idempotencyKey: 'k', bodyMd: '' }).success).toBe(false); + }); + it('PatchJobSchema requires leaseEpoch', () => { + expect(PatchJobSchema.safeParse({ stage: 'building' }).success).toBe(false); + expect(PatchJobSchema.safeParse({ leaseEpoch: 1, stage: 'building' }).success).toBe(true); + }); + it('ClaimSchema requires factoryId', () => { + expect(ClaimSchema.safeParse({ capabilities: [] }).success).toBe(false); + expect(ClaimSchema.safeParse({ factoryId: 'fac_1' }).success).toBe(true); + }); +}); + +describe('enum constants', () => { + it('stages match the agent-queue lifecycle', () => { + expect(FLEET_STAGES).toEqual([ + 'queued', + 'blocked', + 'assigned', + 'building', + 'review', + 'testing', + 'shipped', + 'failed', + 'dead_letter', + ]); + }); + it('priorities', () => { + expect(FLEET_PRIORITIES).toEqual(['critical', 'high', 'medium', 'low']); + }); +}); diff --git a/services/platform-service/src/modules/fleet/types.ts b/services/platform-service/src/modules/fleet/types.ts new file mode 100644 index 00000000..f20c859f --- /dev/null +++ b/services/platform-service/src/modules/fleet/types.ts @@ -0,0 +1,317 @@ +/** + * Fleet module — types for the agent-gigafactory coordinator (Phase 2 foundation). + * + * Durable data model for distributed agent jobs: jobs, runs, leases, factories, + * profiles, events, and artifacts. Product-agnostic — every document carries a + * `productId`. Zod schemas are the source of truth; doc/input types are inferred + * from them (no `any`). + * + * Field names + lifecycle mirror the agent-queue gigafactory spec + * (../learning_ai_devops_tools/agent-queue/docs/GIGAFACTORY_ROADMAP.md §4/§7/§8/§13). + */ + +import { z } from 'zod'; + +// ── Enums ─────────────────────────────────────────────────────────────────── + +/** Canonical job lifecycle (§11). `blocked` = unmet deps; `dead_letter` = retries exhausted. */ +export const FLEET_STAGES = [ + 'queued', + 'blocked', + 'assigned', + 'building', + 'review', + 'testing', + 'shipped', + 'failed', + 'dead_letter', +] as const; +export type FleetStage = (typeof FLEET_STAGES)[number]; + +/** Stages from which a worker may still progress / that count as "in flight". */ +export const ACTIVE_STAGES: readonly FleetStage[] = ['assigned', 'building', 'review', 'testing']; +/** Stages that satisfy a hard dependency / a soft dependency. */ +export const DEP_DONE_HARD: readonly FleetStage[] = ['shipped']; +export const DEP_DONE_SOFT: readonly FleetStage[] = ['shipped', 'testing']; + +export const FLEET_PRIORITIES = ['critical', 'high', 'medium', 'low'] as const; +export type FleetPriority = (typeof FLEET_PRIORITIES)[number]; +export const PRIORITY_ORDER: Record = { + critical: 0, + high: 1, + medium: 2, + low: 3, +}; + +export const FLEET_ENGINE_CLASSES = ['agentic-coder', 'chat-coder', 'review-only'] as const; +export type FleetEngineClass = (typeof FLEET_ENGINE_CLASSES)[number]; + +export const DEPS_MODES = ['hard', 'soft'] as const; +export type DepsMode = (typeof DEPS_MODES)[number]; + +/** Failure classes a `retry.on` may name (mirrors agent-queue). */ +export const RETRY_CLASSES = ['timeout', 'verify_failed', 'crash', 'agent_error'] as const; +export type RetryClass = (typeof RETRY_CLASSES)[number]; + +/** Terminal run results recorded on a FleetRunDoc. */ +export const RUN_RESULTS = [ + 'review', + 'testing', + 'shipped', + 'failed', + 'timeout', + 'verify_failed', + 'capability_mismatch', + 'no_engine', + 'retries_exhausted', +] as const; +export type RunResult = (typeof RUN_RESULTS)[number]; + +export const FACTORY_HEALTH = ['ok', 'degraded', 'down'] as const; +export type FactoryHealth = (typeof FACTORY_HEALTH)[number]; + +export const LEASE_STATUS = ['held', 'expired', 'released'] as const; +export type LeaseStatus = (typeof LEASE_STATUS)[number]; + +export const JOB_KINDS = ['leaf', 'composite'] as const; +export type JobKind = (typeof JOB_KINDS)[number]; + +// ── Shared value objects ───────────────────────────────────────────────────── + +export const CheckpointSchema = z.object({ + wipBranch: z.string(), + wipBase: z.string().optional(), + wipCommit: z.string().optional(), +}); + +export const BudgetSchema = z.object({ + usd: z.number().nonnegative().optional(), + tokens: z.number().nonnegative().optional(), + wall: z.number().nonnegative().optional(), +}); + +export const RetryPolicySchema = z.object({ + max: z.number().int().nonnegative(), + backoff: z.string().optional(), + on: z.array(z.enum(RETRY_CLASSES)).default([]), +}); + +export const ManifestSnapshotSchema = z.object({ + priority: z.enum(FLEET_PRIORITIES).default('medium'), + capabilities: z.array(z.string()).default([]), + engineClass: z.enum(FLEET_ENGINE_CLASSES).optional(), + profile: z.string().optional(), + prefersEngine: z.array(z.string()).default([]), + allowedScope: z.array(z.string()).default([]), + deps: z.array(z.string()).default([]), + depsMode: z.enum(DEPS_MODES).optional(), + budget: BudgetSchema.optional(), + retry: RetryPolicySchema.optional(), +}); + +export const InsightsSchema = z.object({ + model: z.string().optional(), + tokensIn: z.number().optional(), + tokensOut: z.number().optional(), + tokensCached: z.number().optional(), + costUsd: z.number().optional(), + estimated: z.boolean().optional(), + turns: z.number().optional(), + toolCalls: z.number().optional(), + filesChanged: z.number().optional(), + linesAdded: z.number().optional(), + linesDeleted: z.number().optional(), +}); + +// ── Container documents ─────────────────────────────────────────────────────── + +/** + * FleetJobDoc — the durable job (pk `/productId`). + * `rev` is the optimistic-concurrency token used by the coordinator's atomic + * claim + fenced transitions (maps to Cosmos `_etag` / If-Match in production; + * see repository.revUpdate). + */ +export const FleetJobDocSchema = z.object({ + id: z.string(), + productId: z.string().min(1), + stage: z.enum(FLEET_STAGES), + idempotencyKey: z.string().min(1), + contentHash: z.string(), + bodyMd: z.string(), + manifestSnapshot: ManifestSnapshotSchema, + priority: z.enum(FLEET_PRIORITIES), + priorityOrder: z.number().int(), + capabilities: z.array(z.string()).default([]), + engineClass: z.enum(FLEET_ENGINE_CLASSES).optional(), + profile: z.string().optional(), + deps: z.array(z.string()).default([]), + depsMode: z.enum(DEPS_MODES).optional(), + budget: BudgetSchema.optional(), + retry: RetryPolicySchema.optional(), + kind: z.enum(JOB_KINDS).default('leaf'), + parentId: z.string().optional(), + trackerItemId: z.string().optional(), + checkpoint: CheckpointSchema.optional(), + attempts: z.number().int().nonnegative().default(0), + leaseEpoch: z.number().int().nonnegative().default(0), + rev: z.number().int().nonnegative().default(0), + blockedReason: z.string().optional(), + createdAt: z.string(), + updatedAt: z.string(), +}); +export type FleetJobDoc = z.infer; + +/** FleetRunDoc — one execution attempt of a job (pk `/jobId`). */ +export const FleetRunDocSchema = z.object({ + id: z.string(), + productId: z.string().min(1), + jobId: z.string().min(1), + attempt: z.number().int().positive(), + factoryId: z.string().optional(), + engine: z.string(), + profileSnapshot: z.record(z.string(), z.unknown()).optional(), + startedAt: z.string(), + endedAt: z.string().optional(), + exit: z.number().int().optional(), + verifyResult: z.enum(['pass', 'fail']).optional(), + result: z.enum(RUN_RESULTS).optional(), + insights: InsightsSchema.default({}), +}); +export type FleetRunDoc = z.infer; + +/** FleetLeaseDoc — the single-holder lease for a job (pk `/jobId`). */ +export const FleetLeaseDocSchema = z.object({ + id: z.string(), + productId: z.string().min(1), + jobId: z.string().min(1), + holderFactoryId: z.string().optional(), + expiresAt: z.string().optional(), + leaseEpoch: z.number().int().nonnegative().default(0), + renewals: z.number().int().nonnegative().default(0), + status: z.enum(LEASE_STATUS).default('held'), + rev: z.number().int().nonnegative().default(0), + updatedAt: z.string(), +}); +export type FleetLeaseDoc = z.infer; + +/** FleetFactoryDoc — a registered worker host (pk `/productId`). */ +export const FleetFactoryDocSchema = z.object({ + id: z.string(), + productId: z.string().min(1), + factoryId: z.string().min(1), + descriptor: z.record(z.string(), z.unknown()).default({}), + capabilities: z.array(z.string()).default([]), + health: z.enum(FACTORY_HEALTH).default('ok'), + load: z.number().nonnegative().default(0), + seatLimit: z.number().int().positive().default(1), + lastHeartbeatAt: z.string(), +}); +export type FleetFactoryDoc = z.infer; + +/** FleetProfileDoc — an immutable, versioned profile snapshot (pk `/productId`). */ +export const FleetProfileDocSchema = z.object({ + id: z.string(), + productId: z.string().min(1), + name: z.string().min(1), + version: z.number().int().positive(), + snapshot: z.record(z.string(), z.unknown()), + createdAt: z.string(), +}); +export type FleetProfileDoc = z.infer; + +/** FleetEventDoc — append-only audit/event stream entry (pk `/jobId`). */ +export const FleetEventDocSchema = z.object({ + id: z.string(), + productId: z.string().min(1), + jobId: z.string().min(1), + seq: z.number().int().nonnegative(), + type: z.string().min(1), + at: z.string(), + actor: z.string().optional(), + data: z.record(z.string(), z.unknown()).default({}), +}); +export type FleetEventDoc = z.infer; + +/** FleetArtifactDoc — pointer to a blob-stored artifact (pk `/jobId`). No inline logs. */ +export const FleetArtifactDocSchema = z.object({ + id: z.string(), + productId: z.string().min(1), + jobId: z.string().min(1), + runId: z.string().optional(), + kind: z.string().min(1), + blobUrl: z.string().min(1), + sizeBytes: z.number().int().nonnegative().optional(), + createdAt: z.string(), +}); +export type FleetArtifactDoc = z.infer; + +// ── API request schemas (routes) ────────────────────────────────────────────── + +export const SubmitJobSchema = z.object({ + productId: z.string().min(1).optional(), + idempotencyKey: z.string().min(1), + bodyMd: z.string().min(1), + priority: z.enum(FLEET_PRIORITIES).default('medium'), + capabilities: z.array(z.string()).default([]), + engineClass: z.enum(FLEET_ENGINE_CLASSES).optional(), + profile: z.string().optional(), + prefersEngine: z.array(z.string()).default([]), + allowedScope: z.array(z.string()).default([]), + deps: z.array(z.string()).default([]), + depsMode: z.enum(DEPS_MODES).optional(), + budget: BudgetSchema.optional(), + retry: RetryPolicySchema.optional(), + kind: z.enum(JOB_KINDS).default('leaf'), + parentId: z.string().optional(), + trackerItemId: z.string().optional(), +}); +export type SubmitJobInput = z.infer; + +export const ListJobsQuerySchema = z.object({ + productId: z.string().optional(), + stage: z.enum(FLEET_STAGES).optional(), + idempotencyKey: z.string().optional(), + limit: z.coerce.number().int().min(1).max(200).default(50), + offset: z.coerce.number().int().min(0).default(0), +}); +export type ListJobsQuery = z.infer; + +/** Fenced state transition — worker MUST carry its leaseEpoch. */ +export const PatchJobSchema = z.object({ + leaseEpoch: z.number().int().nonnegative(), + stage: z.enum(FLEET_STAGES).optional(), + checkpoint: CheckpointSchema.optional(), + blockedReason: z.string().optional(), +}); +export type PatchJobInput = z.infer; + +export const ClaimSchema = z.object({ + productId: z.string().min(1).optional(), + factoryId: z.string().min(1), + capabilities: z.array(z.string()).default([]), + leaseSeconds: z.number().int().positive().max(86400).default(900), +}); +export type ClaimInput = z.infer; + +export const RenewLeaseSchema = z.object({ + leaseEpoch: z.number().int().nonnegative(), + leaseSeconds: z.number().int().positive().max(86400).default(900), +}); +export type RenewLeaseInput = z.infer; + +export const ReleaseLeaseSchema = z.object({ + leaseEpoch: z.number().int().nonnegative(), + stage: z.enum(FLEET_STAGES).optional(), +}); +export type ReleaseLeaseInput = z.infer; + +export const HeartbeatSchema = z.object({ + productId: z.string().min(1).optional(), + factoryId: z.string().min(1), + descriptor: z.record(z.string(), z.unknown()).optional(), + capabilities: z.array(z.string()).optional(), + health: z.enum(FACTORY_HEALTH).optional(), + load: z.number().nonnegative().optional(), + seatLimit: z.number().int().positive().optional(), +}); +export type HeartbeatInput = z.infer;