feat(cowork-service): H.7 — wire @bytelyst/llm-router for multi-model routing

Added LLM routing module to cowork-service: - lib/llm-router.ts — singleton LlmRouter with cloud + local Ollama support - modules/llm/types.ts — Zod request schemas - modules/llm/routes.ts — POST /api/llm/chat, GET /api/llm/providers, GET /api/llm/health - All endpoints gated by llm_multi_model_enabled feature flag - Best-effort init: service works without API keys (router stays uninitialized) - 8 new tests (routes), server test updated for 3 route modules - 57 total tests passing, typecheck clean
2026-04-02 23:10:07 -07:00 · 2026-04-02 23:10:07 -07:00 · f542160784
commit f542160784
parent 53c3565874
8 changed files with 362 additions and 24 deletions
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@ -195,7 +195,7 @@ importers:
        version: 9.39.2(jiti@2.6.1)
      eslint-config-next:
        specifier: 16.1.6
-        version: 16.1.6(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
+        version: 16.1.6(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
      husky:
        specifier: ^9.0.0
        version: 9.1.7
@ -292,7 +292,7 @@ importers:
        version: 9.39.2(jiti@2.6.1)
      eslint-config-next:
        specifier: 16.1.6
-        version: 16.1.6(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
+        version: 16.1.6(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
      husky:
        specifier: ^9.0.0
        version: 9.1.7
@ -991,6 +991,9 @@ importers:
      '@bytelyst/fastify-core':
        specifier: workspace:*
        version: link:../../packages/fastify-core
+      '@bytelyst/llm-router':
+        specifier: workspace:*
+        version: link:../../packages/llm-router
      '@bytelyst/logger':
        specifier: workspace:*
        version: link:../../packages/logger
@ -15280,7 +15283,7 @@ snapshots:
      '@next/eslint-plugin-next': 16.1.6
      eslint: 9.39.2(jiti@2.6.1)
      eslint-import-resolver-node: 0.3.9
-      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
+      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1))
      eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
      eslint-plugin-jsx-a11y: 6.10.2(eslint@9.39.2(jiti@2.6.1))
      eslint-plugin-react: 7.37.5(eslint@9.39.2(jiti@2.6.1))
@ -15303,7 +15306,7 @@ snapshots:
    transitivePeerDependencies:
      - supports-color

-  eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)):
+  eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)):
    dependencies:
      '@nolyfill/is-core-module': 1.0.39
      debug: 4.4.3
@ -15318,21 +15321,6 @@ snapshots:
    transitivePeerDependencies:
      - supports-color

-  eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)):
-    dependencies:
-      '@nolyfill/is-core-module': 1.0.39
-      debug: 4.4.3
-      eslint: 9.39.2(jiti@2.6.1)
-      get-tsconfig: 4.13.6
-      is-bun-module: 2.0.0
-      stable-hash: 0.0.5
-      tinyglobby: 0.2.15
-      unrs-resolver: 1.11.1
-    optionalDependencies:
-      eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
-    transitivePeerDependencies:
-      - supports-color
-
  eslint-module-utils@2.12.1(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)):
    dependencies:
      debug: 3.2.7
@ -15344,14 +15332,14 @@ snapshots:
    transitivePeerDependencies:
      - supports-color

-  eslint-module-utils@2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)):
+  eslint-module-utils@2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)):
    dependencies:
      debug: 3.2.7
    optionalDependencies:
      '@typescript-eslint/parser': 8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
      eslint: 9.39.2(jiti@2.6.1)
      eslint-import-resolver-node: 0.3.9
-      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
+      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1))
    transitivePeerDependencies:
      - supports-color

@ -15395,7 +15383,7 @@ snapshots:
      doctrine: 2.1.0
      eslint: 9.39.2(jiti@2.6.1)
      eslint-import-resolver-node: 0.3.9
-      eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
+      eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
      hasown: 2.0.2
      is-core-module: 2.16.1
      is-glob: 4.0.3
--- a/services/cowork-service/package.json
+++ b/services/cowork-service/package.json
@ -21,6 +21,7 @@
    "@bytelyst/errors": "workspace:*",
    "@bytelyst/fastify-auth": "workspace:*",
    "@bytelyst/fastify-core": "workspace:*",
+    "@bytelyst/llm-router": "workspace:*",
    "@bytelyst/logger": "workspace:*",
    "@fastify/cors": "^10.0.2",
    "fastify": "^5.2.1",
--- a/services/cowork-service/src/lib/llm-router.ts
+++ b/services/cowork-service/src/lib/llm-router.ts
@ -0,0 +1,79 @@
+/**
+ * LLM Router singleton for cowork-service.
+ *
+ * Provides multi-model LLM routing via @bytelyst/llm-router.
+ * Supports cloud providers (Groq, OpenRouter, Together, Cerebras) and local Ollama.
+ * Gated by the `llm_multi_model_enabled` feature flag.
+ *
+ * The Rust runtime still handles primary Anthropic calls — this router adds
+ * alternative model access for multi-model comparison, fallback, and cost optimization.
+ */
+
+import { LlmRouter, createLocalOllamaProvider, type ProviderConfig, type TelemetryEntry } from '@bytelyst/llm-router';
+import { config } from './config.js';
+
+let _router: LlmRouter | null = null;
+
+export interface LlmRouterOptions {
+  /** Additional providers beyond defaults. */
+  extraProviders?: ProviderConfig[];
+  /** Local Ollama model IDs (auto-creates local-ollama provider). */
+  ollamaModels?: string[];
+  /** Ollama base URL. Default: http://localhost:11434/v1 */
+  ollamaBaseUrl?: string;
+  /** Telemetry callback. */
+  onTelemetry?: (entry: TelemetryEntry) => void;
+}
+
+/**
+ * Initialize (or reinitialize) the LLM router singleton.
+ * Call once during server startup.
+ */
+export function initLlmRouter(opts?: LlmRouterOptions): LlmRouter {
+  const providers: ProviderConfig[] = [];
+
+  // Add local Ollama if models are specified
+  if (opts?.ollamaModels && opts.ollamaModels.length > 0) {
+    providers.push(
+      createLocalOllamaProvider(opts.ollamaModels, opts.ollamaBaseUrl),
+    );
+  }
+
+  // Add any extra providers
+  if (opts?.extraProviders) {
+    providers.push(...opts.extraProviders);
+  }
+
+  // If no explicit providers, use defaults (cloud free-tier providers)
+  const routerConfig = providers.length > 0 ? { providers } : undefined;
+
+  _router = new LlmRouter({
+    ...routerConfig,
+    timeoutMs: config.RUST_RUNTIME_TIMEOUT_MS,
+    onTelemetry: opts?.onTelemetry,
+  });
+
+  return _router;
+}
+
+/**
+ * Get the LLM router singleton. Throws if not initialized.
+ */
+export function getLlmRouter(): LlmRouter {
+  if (!_router) {
+    throw new Error('LLM router not initialized — call initLlmRouter() first');
+  }
+  return _router;
+}
+
+/**
+ * Check if the LLM router has been initialized.
+ */
+export function isLlmRouterReady(): boolean {
+  return _router !== null;
+}
+
+/** Reset singleton (for testing). */
+export function resetLlmRouter(): void {
+  _router = null;
+}
--- a/services/cowork-service/src/modules/llm/routes.test.ts
+++ b/services/cowork-service/src/modules/llm/routes.test.ts
@ -0,0 +1,147 @@
+import { describe, expect, it, vi, beforeEach } from 'vitest';
+import Fastify from 'fastify';
+import { llmRoutes } from './routes.js';
+
+// Mock feature flags
+vi.mock('../../lib/feature-flags.js', () => ({
+  isFeatureEnabled: vi.fn((flag: string) => flag === 'llm_multi_model_enabled'),
+}));
+
+// Mock LLM router
+const mockChat = vi.fn();
+const mockGetProviders = vi.fn(() => ['groq', 'openrouter']);
+const mockGetHealth = vi.fn(() => [
+  { provider: 'groq', model: 'llama-3.3-70b-versatile', healthy: true, totalRequests: 5, successes: 5, rateLimits: 0, errors: 0, avgLatencyMs: 200, p95LatencyMs: 300 },
+]);
+
+vi.mock('../../lib/llm-router.js', () => ({
+  getLlmRouter: vi.fn(() => ({
+    chat: mockChat,
+    getProviders: mockGetProviders,
+    getHealth: mockGetHealth,
+  })),
+  isLlmRouterReady: vi.fn(() => true),
+}));
+
+import { isFeatureEnabled } from '../../lib/feature-flags.js';
+import { isLlmRouterReady } from '../../lib/llm-router.js';
+
+async function buildApp() {
+  const app = Fastify({ logger: false });
+  await app.register(llmRoutes);
+  return app;
+}
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  vi.mocked(isFeatureEnabled).mockReturnValue(true);
+  vi.mocked(isLlmRouterReady).mockReturnValue(true);
+});
+
+describe('POST /api/llm/chat', () => {
+  it('routes a valid chat request', async () => {
+    mockChat.mockResolvedValue({
+      response: { id: 'r1', object: 'chat.completion', created: 1, model: 'llama-3.3-70b', choices: [{ index: 0, message: { role: 'assistant', content: 'Hello' }, finish_reason: 'stop' }] },
+      provider: 'groq',
+      model: 'llama-3.3-70b-versatile',
+      totalLatencyMs: 150,
+      attempts: 1,
+    });
+
+    const app = await buildApp();
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/llm/chat',
+      payload: { messages: [{ role: 'user', content: 'Hi' }] },
+    });
+
+    expect(res.statusCode).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.provider).toBe('groq');
+    expect(body.model).toBe('llama-3.3-70b-versatile');
+    expect(mockChat).toHaveBeenCalledOnce();
+  });
+
+  it('returns 400 for invalid request body', async () => {
+    const app = await buildApp();
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/llm/chat',
+      payload: { messages: [] },
+    });
+
+    expect(res.statusCode).toBe(400);
+    expect(JSON.parse(res.body).error).toBe('Invalid request');
+  });
+
+  it('returns 403 when flag is disabled', async () => {
+    vi.mocked(isFeatureEnabled).mockReturnValue(false);
+    const app = await buildApp();
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/llm/chat',
+      payload: { messages: [{ role: 'user', content: 'Hi' }] },
+    });
+
+    expect(res.statusCode).toBe(403);
+  });
+
+  it('returns 503 when router not ready', async () => {
+    vi.mocked(isLlmRouterReady).mockReturnValue(false);
+    const app = await buildApp();
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/llm/chat',
+      payload: { messages: [{ role: 'user', content: 'Hi' }] },
+    });
+
+    expect(res.statusCode).toBe(503);
+  });
+
+  it('returns 502 on router error', async () => {
+    mockChat.mockRejectedValue(new Error('All providers exhausted'));
+    const app = await buildApp();
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/llm/chat',
+      payload: { messages: [{ role: 'user', content: 'Hi' }] },
+    });
+
+    expect(res.statusCode).toBe(502);
+    expect(JSON.parse(res.body).error).toContain('All providers exhausted');
+  });
+});
+
+describe('GET /api/llm/providers', () => {
+  it('returns provider list', async () => {
+    const app = await buildApp();
+    const res = await app.inject({ method: 'GET', url: '/api/llm/providers' });
+
+    expect(res.statusCode).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.providers).toEqual(['groq', 'openrouter']);
+    expect(body.ready).toBe(true);
+  });
+
+  it('returns empty when router not ready', async () => {
+    vi.mocked(isLlmRouterReady).mockReturnValue(false);
+    const app = await buildApp();
+    const res = await app.inject({ method: 'GET', url: '/api/llm/providers' });
+
+    expect(res.statusCode).toBe(200);
+    expect(JSON.parse(res.body)).toEqual({ providers: [], ready: false });
+  });
+});
+
+describe('GET /api/llm/health', () => {
+  it('returns health snapshots', async () => {
+    const app = await buildApp();
+    const res = await app.inject({ method: 'GET', url: '/api/llm/health' });
+
+    expect(res.statusCode).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.snapshots).toHaveLength(1);
+    expect(body.snapshots[0].provider).toBe('groq');
+    expect(body.ready).toBe(true);
+  });
+});
--- a/services/cowork-service/src/modules/llm/routes.ts
+++ b/services/cowork-service/src/modules/llm/routes.ts
@ -0,0 +1,84 @@
+/**
+ * LLM routing endpoints for cowork-service.
+ *
+ * POST /api/llm/chat     — Route a chat completion through the LLM router
+ * GET  /api/llm/providers — List available LLM providers
+ * GET  /api/llm/health    — Health snapshots for all provider+model pairs
+ *
+ * All endpoints are gated by the `llm_multi_model_enabled` feature flag.
+ */
+
+import type { FastifyInstance } from 'fastify';
+import { isFeatureEnabled } from '../../lib/feature-flags.js';
+import { getLlmRouter, isLlmRouterReady } from '../../lib/llm-router.js';
+import { ChatRequestSchema } from './types.js';
+
+export async function llmRoutes(app: FastifyInstance) {
+  // ── POST /api/llm/chat ───────────────────────────────────────
+  app.post('/api/llm/chat', async (req, reply) => {
+    if (!isFeatureEnabled('llm_multi_model_enabled')) {
+      reply.code(403);
+      return { error: 'LLM multi-model routing is disabled' };
+    }
+
+    if (!isLlmRouterReady()) {
+      reply.code(503);
+      return { error: 'LLM router not initialized — no providers configured' };
+    }
+
+    const parsed = ChatRequestSchema.safeParse(req.body);
+    if (!parsed.success) {
+      reply.code(400);
+      return { error: 'Invalid request', details: parsed.error.issues };
+    }
+
+    try {
+      const result = await getLlmRouter().chat(parsed.data);
+      return {
+        response: result.response,
+        provider: result.provider,
+        model: result.model,
+        totalLatencyMs: result.totalLatencyMs,
+        attempts: result.attempts,
+      };
+    } catch (err) {
+      req.log.error(err, 'LLM chat routing failed');
+      reply.code(502);
+      return { error: err instanceof Error ? err.message : 'LLM routing failed' };
+    }
+  });
+
+  // ── GET /api/llm/providers ──────────────────────────────────
+  app.get('/api/llm/providers', async (_req, reply) => {
+    if (!isFeatureEnabled('llm_multi_model_enabled')) {
+      reply.code(403);
+      return { error: 'LLM multi-model routing is disabled' };
+    }
+
+    if (!isLlmRouterReady()) {
+      return { providers: [], ready: false };
+    }
+
+    return {
+      providers: getLlmRouter().getProviders(),
+      ready: true,
+    };
+  });
+
+  // ── GET /api/llm/health ─────────────────────────────────────
+  app.get('/api/llm/health', async (_req, reply) => {
+    if (!isFeatureEnabled('llm_multi_model_enabled')) {
+      reply.code(403);
+      return { error: 'LLM multi-model routing is disabled' };
+    }
+
+    if (!isLlmRouterReady()) {
+      return { snapshots: [], ready: false };
+    }
+
+    return {
+      snapshots: getLlmRouter().getHealth(),
+      ready: true,
+    };
+  });
+}
--- a/services/cowork-service/src/modules/llm/types.ts
+++ b/services/cowork-service/src/modules/llm/types.ts
@ -0,0 +1,20 @@
+/**
+ * Zod schemas for LLM routing endpoints.
+ */
+
+import { z } from 'zod';
+
+export const ChatMessageSchema = z.object({
+  role: z.enum(['system', 'user', 'assistant']),
+  content: z.string(),
+});
+
+export const ChatRequestSchema = z.object({
+  messages: z.array(ChatMessageSchema).min(1),
+  model: z.string().optional(),
+  temperature: z.number().min(0).max(2).optional(),
+  max_tokens: z.number().int().positive().optional(),
+  top_p: z.number().min(0).max(1).optional(),
+});
+
+export type ChatRequest = z.infer<typeof ChatRequestSchema>;
--- a/services/cowork-service/src/server.test.ts
+++ b/services/cowork-service/src/server.test.ts
@ -62,6 +62,12 @@ vi.mock('./lib/flush-scheduler.js', () => ({
    finalFlush: vi.fn(async () => undefined),
  })),
 }));
+vi.mock('./lib/llm-router.js', () => ({
+  initLlmRouter: vi.fn(() => ({ getProviders: vi.fn(() => []) })),
+  getLlmRouter: vi.fn(() => ({ getProviders: vi.fn(() => []) })),
+  isLlmRouterReady: vi.fn(() => false),
+}));
+vi.mock('./modules/llm/routes.js', () => ({ llmRoutes: vi.fn() }));

 describe('cowork-service bootstrap', () => {
  beforeEach(() => {
@ -81,9 +87,9 @@ describe('cowork-service bootstrap', () => {
    expect(opts.version).toBe('0.1.0');
    expect(opts.readiness).toBe(true);

-    // JWT context + health + task routes = 2 register calls + 1 JWT
+    // JWT context + health + task + llm routes = 3 register calls + 1 JWT
    expect(registerOptionalJwtContextMock).toHaveBeenCalledOnce();
-    expect(appMock.register).toHaveBeenCalledTimes(2);
+    expect(appMock.register).toHaveBeenCalledTimes(3);
    expect(startServiceMock).toHaveBeenCalledWith(appMock, { port: 4009, host: '0.0.0.0' });
  });
 });
--- a/services/cowork-service/src/server.ts
+++ b/services/cowork-service/src/server.ts
@ -23,6 +23,8 @@ import { config } from './lib/config.js';
 import { productConfig, PRODUCT_ID } from './lib/product-config.js';
 import { getIpcBridge } from './lib/ipc-bridge.js';
 import { getFlushScheduler } from './lib/flush-scheduler.js';
+import { initLlmRouter } from './lib/llm-router.js';
+import { llmRoutes } from './modules/llm/routes.js';
 import type { JwtPayload } from './lib/request-context.js';

 const jwtSecret = new TextEncoder().encode(config.JWT_SECRET);
@ -51,6 +53,7 @@ await registerOptionalJwtContext(app, {
 // Register route modules
 await app.register(healthRoutes);
 await app.register(taskRoutes);
+await app.register(llmRoutes);

 // Bootstrap endpoint (same pattern as FlowMonk, ActionTrail, etc.)
 app.get('/api/bootstrap', async () => ({
@ -69,6 +72,16 @@ try {
  app.log.warn({ err }, 'IPC bridge failed to start — running in fallback mode');
 }

+// Initialize LLM router (best-effort — works without API keys in dev)
+try {
+  const llm = initLlmRouter({
+    onTelemetry: (entry) => app.log.debug({ llmTelemetry: entry }, 'llm-router event'),
+  });
+  app.log.info({ providers: llm.getProviders() }, 'LLM router initialized');
+} catch (err) {
+  app.log.warn({ err }, 'LLM router not available — no provider API keys configured');
+}
+
 // Start flush scheduler (periodic drain of IPC buffers → platform-service)
 const scheduler = getFlushScheduler(app.log);
 if (bridge.isRunning) {