From f542160784f62e723f2b4fb618fa4cbea892911d Mon Sep 17 00:00:00 2001
From: saravanakumardb1 <saravanakumardb1@users.noreply.github.com>
Date: Thu, 2 Apr 2026 23:10:07 -0700
Subject: [PATCH] =?UTF-8?q?feat(cowork-service):=20H.7=20=E2=80=94=20wire?=
 =?UTF-8?q?=20@bytelyst/llm-router=20for=20multi-model=20routing?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Added LLM routing module to cowork-service:
- lib/llm-router.ts — singleton LlmRouter with cloud + local Ollama support
- modules/llm/types.ts — Zod request schemas
- modules/llm/routes.ts — POST /api/llm/chat, GET /api/llm/providers, GET /api/llm/health
- All endpoints gated by llm_multi_model_enabled feature flag
- Best-effort init: service works without API keys (router stays uninitialized)
- 8 new tests (routes), server test updated for 3 route modules
- 57 total tests passing, typecheck clean
---
 pnpm-lock.yaml                                |  32 ++--
 services/cowork-service/package.json          |   1 +
 services/cowork-service/src/lib/llm-router.ts |  79 ++++++++++
 .../src/modules/llm/routes.test.ts            | 147 ++++++++++++++++++
 .../cowork-service/src/modules/llm/routes.ts  |  84 ++++++++++
 .../cowork-service/src/modules/llm/types.ts   |  20 +++
 services/cowork-service/src/server.test.ts    |  10 +-
 services/cowork-service/src/server.ts         |  13 ++
 8 files changed, 362 insertions(+), 24 deletions(-)
 create mode 100644 services/cowork-service/src/lib/llm-router.ts
 create mode 100644 services/cowork-service/src/modules/llm/routes.test.ts
 create mode 100644 services/cowork-service/src/modules/llm/routes.ts
 create mode 100644 services/cowork-service/src/modules/llm/types.ts

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 23d54fe0..5db07452 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -195,7 +195,7 @@ importers:
         version: 9.39.2(jiti@2.6.1)
       eslint-config-next:
         specifier: 16.1.6
-        version: 16.1.6(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
+        version: 16.1.6(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
       husky:
         specifier: ^9.0.0
         version: 9.1.7
@@ -292,7 +292,7 @@ importers:
         version: 9.39.2(jiti@2.6.1)
       eslint-config-next:
         specifier: 16.1.6
-        version: 16.1.6(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
+        version: 16.1.6(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
       husky:
         specifier: ^9.0.0
         version: 9.1.7
@@ -991,6 +991,9 @@ importers:
       '@bytelyst/fastify-core':
         specifier: workspace:*
         version: link:../../packages/fastify-core
+      '@bytelyst/llm-router':
+        specifier: workspace:*
+        version: link:../../packages/llm-router
       '@bytelyst/logger':
         specifier: workspace:*
         version: link:../../packages/logger
@@ -15280,7 +15283,7 @@ snapshots:
       '@next/eslint-plugin-next': 16.1.6
       eslint: 9.39.2(jiti@2.6.1)
       eslint-import-resolver-node: 0.3.9
-      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
+      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1))
       eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
       eslint-plugin-jsx-a11y: 6.10.2(eslint@9.39.2(jiti@2.6.1))
       eslint-plugin-react: 7.37.5(eslint@9.39.2(jiti@2.6.1))
@@ -15303,7 +15306,7 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
-  eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)):
+  eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)):
     dependencies:
       '@nolyfill/is-core-module': 1.0.39
       debug: 4.4.3
@@ -15318,21 +15321,6 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
-  eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)):
-    dependencies:
-      '@nolyfill/is-core-module': 1.0.39
-      debug: 4.4.3
-      eslint: 9.39.2(jiti@2.6.1)
-      get-tsconfig: 4.13.6
-      is-bun-module: 2.0.0
-      stable-hash: 0.0.5
-      tinyglobby: 0.2.15
-      unrs-resolver: 1.11.1
-    optionalDependencies:
-      eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
-    transitivePeerDependencies:
-      - supports-color
-
   eslint-module-utils@2.12.1(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)):
     dependencies:
       debug: 3.2.7
@@ -15344,14 +15332,14 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
-  eslint-module-utils@2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)):
+  eslint-module-utils@2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)):
     dependencies:
       debug: 3.2.7
     optionalDependencies:
       '@typescript-eslint/parser': 8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
       eslint: 9.39.2(jiti@2.6.1)
       eslint-import-resolver-node: 0.3.9
-      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
+      eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1))
     transitivePeerDependencies:
       - supports-color
 
@@ -15395,7 +15383,7 @@ snapshots:
       doctrine: 2.1.0
       eslint: 9.39.2(jiti@2.6.1)
       eslint-import-resolver-node: 0.3.9
-      eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
+      eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
       hasown: 2.0.2
       is-core-module: 2.16.1
       is-glob: 4.0.3
diff --git a/services/cowork-service/package.json b/services/cowork-service/package.json
index 23bb2687..f43b0396 100644
--- a/services/cowork-service/package.json
+++ b/services/cowork-service/package.json
@@ -21,6 +21,7 @@
     "@bytelyst/errors": "workspace:*",
     "@bytelyst/fastify-auth": "workspace:*",
     "@bytelyst/fastify-core": "workspace:*",
+    "@bytelyst/llm-router": "workspace:*",
     "@bytelyst/logger": "workspace:*",
     "@fastify/cors": "^10.0.2",
     "fastify": "^5.2.1",
diff --git a/services/cowork-service/src/lib/llm-router.ts b/services/cowork-service/src/lib/llm-router.ts
new file mode 100644
index 00000000..2f40746c
--- /dev/null
+++ b/services/cowork-service/src/lib/llm-router.ts
@@ -0,0 +1,79 @@
+/**
+ * LLM Router singleton for cowork-service.
+ *
+ * Provides multi-model LLM routing via @bytelyst/llm-router.
+ * Supports cloud providers (Groq, OpenRouter, Together, Cerebras) and local Ollama.
+ * Gated by the `llm_multi_model_enabled` feature flag.
+ *
+ * The Rust runtime still handles primary Anthropic calls — this router adds
+ * alternative model access for multi-model comparison, fallback, and cost optimization.
+ */
+
+import { LlmRouter, createLocalOllamaProvider, type ProviderConfig, type TelemetryEntry } from '@bytelyst/llm-router';
+import { config } from './config.js';
+
+let _router: LlmRouter | null = null;
+
+export interface LlmRouterOptions {
+  /** Additional providers beyond defaults. */
+  extraProviders?: ProviderConfig[];
+  /** Local Ollama model IDs (auto-creates local-ollama provider). */
+  ollamaModels?: string[];
+  /** Ollama base URL. Default: http://localhost:11434/v1 */
+  ollamaBaseUrl?: string;
+  /** Telemetry callback. */
+  onTelemetry?: (entry: TelemetryEntry) => void;
+}
+
+/**
+ * Initialize (or reinitialize) the LLM router singleton.
+ * Call once during server startup.
+ */
+export function initLlmRouter(opts?: LlmRouterOptions): LlmRouter {
+  const providers: ProviderConfig[] = [];
+
+  // Add local Ollama if models are specified
+  if (opts?.ollamaModels && opts.ollamaModels.length > 0) {
+    providers.push(
+      createLocalOllamaProvider(opts.ollamaModels, opts.ollamaBaseUrl),
+    );
+  }
+
+  // Add any extra providers
+  if (opts?.extraProviders) {
+    providers.push(...opts.extraProviders);
+  }
+
+  // If no explicit providers, use defaults (cloud free-tier providers)
+  const routerConfig = providers.length > 0 ? { providers } : undefined;
+
+  _router = new LlmRouter({
+    ...routerConfig,
+    timeoutMs: config.RUST_RUNTIME_TIMEOUT_MS,
+    onTelemetry: opts?.onTelemetry,
+  });
+
+  return _router;
+}
+
+/**
+ * Get the LLM router singleton. Throws if not initialized.
+ */
+export function getLlmRouter(): LlmRouter {
+  if (!_router) {
+    throw new Error('LLM router not initialized — call initLlmRouter() first');
+  }
+  return _router;
+}
+
+/**
+ * Check if the LLM router has been initialized.
+ */
+export function isLlmRouterReady(): boolean {
+  return _router !== null;
+}
+
+/** Reset singleton (for testing). */
+export function resetLlmRouter(): void {
+  _router = null;
+}
diff --git a/services/cowork-service/src/modules/llm/routes.test.ts b/services/cowork-service/src/modules/llm/routes.test.ts
new file mode 100644
index 00000000..5545dbe6
--- /dev/null
+++ b/services/cowork-service/src/modules/llm/routes.test.ts
@@ -0,0 +1,147 @@
+import { describe, expect, it, vi, beforeEach } from 'vitest';
+import Fastify from 'fastify';
+import { llmRoutes } from './routes.js';
+
+// Mock feature flags
+vi.mock('../../lib/feature-flags.js', () => ({
+  isFeatureEnabled: vi.fn((flag: string) => flag === 'llm_multi_model_enabled'),
+}));
+
+// Mock LLM router
+const mockChat = vi.fn();
+const mockGetProviders = vi.fn(() => ['groq', 'openrouter']);
+const mockGetHealth = vi.fn(() => [
+  { provider: 'groq', model: 'llama-3.3-70b-versatile', healthy: true, totalRequests: 5, successes: 5, rateLimits: 0, errors: 0, avgLatencyMs: 200, p95LatencyMs: 300 },
+]);
+
+vi.mock('../../lib/llm-router.js', () => ({
+  getLlmRouter: vi.fn(() => ({
+    chat: mockChat,
+    getProviders: mockGetProviders,
+    getHealth: mockGetHealth,
+  })),
+  isLlmRouterReady: vi.fn(() => true),
+}));
+
+import { isFeatureEnabled } from '../../lib/feature-flags.js';
+import { isLlmRouterReady } from '../../lib/llm-router.js';
+
+async function buildApp() {
+  const app = Fastify({ logger: false });
+  await app.register(llmRoutes);
+  return app;
+}
+
+beforeEach(() => {
+  vi.clearAllMocks();
+  vi.mocked(isFeatureEnabled).mockReturnValue(true);
+  vi.mocked(isLlmRouterReady).mockReturnValue(true);
+});
+
+describe('POST /api/llm/chat', () => {
+  it('routes a valid chat request', async () => {
+    mockChat.mockResolvedValue({
+      response: { id: 'r1', object: 'chat.completion', created: 1, model: 'llama-3.3-70b', choices: [{ index: 0, message: { role: 'assistant', content: 'Hello' }, finish_reason: 'stop' }] },
+      provider: 'groq',
+      model: 'llama-3.3-70b-versatile',
+      totalLatencyMs: 150,
+      attempts: 1,
+    });
+
+    const app = await buildApp();
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/llm/chat',
+      payload: { messages: [{ role: 'user', content: 'Hi' }] },
+    });
+
+    expect(res.statusCode).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.provider).toBe('groq');
+    expect(body.model).toBe('llama-3.3-70b-versatile');
+    expect(mockChat).toHaveBeenCalledOnce();
+  });
+
+  it('returns 400 for invalid request body', async () => {
+    const app = await buildApp();
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/llm/chat',
+      payload: { messages: [] },
+    });
+
+    expect(res.statusCode).toBe(400);
+    expect(JSON.parse(res.body).error).toBe('Invalid request');
+  });
+
+  it('returns 403 when flag is disabled', async () => {
+    vi.mocked(isFeatureEnabled).mockReturnValue(false);
+    const app = await buildApp();
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/llm/chat',
+      payload: { messages: [{ role: 'user', content: 'Hi' }] },
+    });
+
+    expect(res.statusCode).toBe(403);
+  });
+
+  it('returns 503 when router not ready', async () => {
+    vi.mocked(isLlmRouterReady).mockReturnValue(false);
+    const app = await buildApp();
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/llm/chat',
+      payload: { messages: [{ role: 'user', content: 'Hi' }] },
+    });
+
+    expect(res.statusCode).toBe(503);
+  });
+
+  it('returns 502 on router error', async () => {
+    mockChat.mockRejectedValue(new Error('All providers exhausted'));
+    const app = await buildApp();
+    const res = await app.inject({
+      method: 'POST',
+      url: '/api/llm/chat',
+      payload: { messages: [{ role: 'user', content: 'Hi' }] },
+    });
+
+    expect(res.statusCode).toBe(502);
+    expect(JSON.parse(res.body).error).toContain('All providers exhausted');
+  });
+});
+
+describe('GET /api/llm/providers', () => {
+  it('returns provider list', async () => {
+    const app = await buildApp();
+    const res = await app.inject({ method: 'GET', url: '/api/llm/providers' });
+
+    expect(res.statusCode).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.providers).toEqual(['groq', 'openrouter']);
+    expect(body.ready).toBe(true);
+  });
+
+  it('returns empty when router not ready', async () => {
+    vi.mocked(isLlmRouterReady).mockReturnValue(false);
+    const app = await buildApp();
+    const res = await app.inject({ method: 'GET', url: '/api/llm/providers' });
+
+    expect(res.statusCode).toBe(200);
+    expect(JSON.parse(res.body)).toEqual({ providers: [], ready: false });
+  });
+});
+
+describe('GET /api/llm/health', () => {
+  it('returns health snapshots', async () => {
+    const app = await buildApp();
+    const res = await app.inject({ method: 'GET', url: '/api/llm/health' });
+
+    expect(res.statusCode).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.snapshots).toHaveLength(1);
+    expect(body.snapshots[0].provider).toBe('groq');
+    expect(body.ready).toBe(true);
+  });
+});
diff --git a/services/cowork-service/src/modules/llm/routes.ts b/services/cowork-service/src/modules/llm/routes.ts
new file mode 100644
index 00000000..72905418
--- /dev/null
+++ b/services/cowork-service/src/modules/llm/routes.ts
@@ -0,0 +1,84 @@
+/**
+ * LLM routing endpoints for cowork-service.
+ *
+ * POST /api/llm/chat     — Route a chat completion through the LLM router
+ * GET  /api/llm/providers — List available LLM providers
+ * GET  /api/llm/health    — Health snapshots for all provider+model pairs
+ *
+ * All endpoints are gated by the `llm_multi_model_enabled` feature flag.
+ */
+
+import type { FastifyInstance } from 'fastify';
+import { isFeatureEnabled } from '../../lib/feature-flags.js';
+import { getLlmRouter, isLlmRouterReady } from '../../lib/llm-router.js';
+import { ChatRequestSchema } from './types.js';
+
+export async function llmRoutes(app: FastifyInstance) {
+  // ── POST /api/llm/chat ───────────────────────────────────────
+  app.post('/api/llm/chat', async (req, reply) => {
+    if (!isFeatureEnabled('llm_multi_model_enabled')) {
+      reply.code(403);
+      return { error: 'LLM multi-model routing is disabled' };
+    }
+
+    if (!isLlmRouterReady()) {
+      reply.code(503);
+      return { error: 'LLM router not initialized — no providers configured' };
+    }
+
+    const parsed = ChatRequestSchema.safeParse(req.body);
+    if (!parsed.success) {
+      reply.code(400);
+      return { error: 'Invalid request', details: parsed.error.issues };
+    }
+
+    try {
+      const result = await getLlmRouter().chat(parsed.data);
+      return {
+        response: result.response,
+        provider: result.provider,
+        model: result.model,
+        totalLatencyMs: result.totalLatencyMs,
+        attempts: result.attempts,
+      };
+    } catch (err) {
+      req.log.error(err, 'LLM chat routing failed');
+      reply.code(502);
+      return { error: err instanceof Error ? err.message : 'LLM routing failed' };
+    }
+  });
+
+  // ── GET /api/llm/providers ──────────────────────────────────
+  app.get('/api/llm/providers', async (_req, reply) => {
+    if (!isFeatureEnabled('llm_multi_model_enabled')) {
+      reply.code(403);
+      return { error: 'LLM multi-model routing is disabled' };
+    }
+
+    if (!isLlmRouterReady()) {
+      return { providers: [], ready: false };
+    }
+
+    return {
+      providers: getLlmRouter().getProviders(),
+      ready: true,
+    };
+  });
+
+  // ── GET /api/llm/health ─────────────────────────────────────
+  app.get('/api/llm/health', async (_req, reply) => {
+    if (!isFeatureEnabled('llm_multi_model_enabled')) {
+      reply.code(403);
+      return { error: 'LLM multi-model routing is disabled' };
+    }
+
+    if (!isLlmRouterReady()) {
+      return { snapshots: [], ready: false };
+    }
+
+    return {
+      snapshots: getLlmRouter().getHealth(),
+      ready: true,
+    };
+  });
+}
diff --git a/services/cowork-service/src/modules/llm/types.ts b/services/cowork-service/src/modules/llm/types.ts
new file mode 100644
index 00000000..09749e91
--- /dev/null
+++ b/services/cowork-service/src/modules/llm/types.ts
@@ -0,0 +1,20 @@
+/**
+ * Zod schemas for LLM routing endpoints.
+ */
+
+import { z } from 'zod';
+
+export const ChatMessageSchema = z.object({
+  role: z.enum(['system', 'user', 'assistant']),
+  content: z.string(),
+});
+
+export const ChatRequestSchema = z.object({
+  messages: z.array(ChatMessageSchema).min(1),
+  model: z.string().optional(),
+  temperature: z.number().min(0).max(2).optional(),
+  max_tokens: z.number().int().positive().optional(),
+  top_p: z.number().min(0).max(1).optional(),
+});
+
+export type ChatRequest = z.infer<typeof ChatRequestSchema>;
diff --git a/services/cowork-service/src/server.test.ts b/services/cowork-service/src/server.test.ts
index 48ee2c04..c1fe4e0d 100644
--- a/services/cowork-service/src/server.test.ts
+++ b/services/cowork-service/src/server.test.ts
@@ -62,6 +62,12 @@ vi.mock('./lib/flush-scheduler.js', () => ({
     finalFlush: vi.fn(async () => undefined),
   })),
 }));
+vi.mock('./lib/llm-router.js', () => ({
+  initLlmRouter: vi.fn(() => ({ getProviders: vi.fn(() => []) })),
+  getLlmRouter: vi.fn(() => ({ getProviders: vi.fn(() => []) })),
+  isLlmRouterReady: vi.fn(() => false),
+}));
+vi.mock('./modules/llm/routes.js', () => ({ llmRoutes: vi.fn() }));
 
 describe('cowork-service bootstrap', () => {
   beforeEach(() => {
@@ -81,9 +87,9 @@ describe('cowork-service bootstrap', () => {
     expect(opts.version).toBe('0.1.0');
     expect(opts.readiness).toBe(true);
 
-    // JWT context + health + task routes = 2 register calls + 1 JWT
+    // JWT context + health + task + llm routes = 3 register calls + 1 JWT
     expect(registerOptionalJwtContextMock).toHaveBeenCalledOnce();
-    expect(appMock.register).toHaveBeenCalledTimes(2);
+    expect(appMock.register).toHaveBeenCalledTimes(3);
     expect(startServiceMock).toHaveBeenCalledWith(appMock, { port: 4009, host: '0.0.0.0' });
   });
 });
diff --git a/services/cowork-service/src/server.ts b/services/cowork-service/src/server.ts
index 506a7bfe..c4665564 100644
--- a/services/cowork-service/src/server.ts
+++ b/services/cowork-service/src/server.ts
@@ -23,6 +23,8 @@ import { config } from './lib/config.js';
 import { productConfig, PRODUCT_ID } from './lib/product-config.js';
 import { getIpcBridge } from './lib/ipc-bridge.js';
 import { getFlushScheduler } from './lib/flush-scheduler.js';
+import { initLlmRouter } from './lib/llm-router.js';
+import { llmRoutes } from './modules/llm/routes.js';
 import type { JwtPayload } from './lib/request-context.js';
 
 const jwtSecret = new TextEncoder().encode(config.JWT_SECRET);
@@ -51,6 +53,7 @@ await registerOptionalJwtContext(app, {
 // Register route modules
 await app.register(healthRoutes);
 await app.register(taskRoutes);
+await app.register(llmRoutes);
 
 // Bootstrap endpoint (same pattern as FlowMonk, ActionTrail, etc.)
 app.get('/api/bootstrap', async () => ({
@@ -69,6 +72,16 @@ try {
   app.log.warn({ err }, 'IPC bridge failed to start — running in fallback mode');
 }
 
+// Initialize LLM router (best-effort — works without API keys in dev)
+try {
+  const llm = initLlmRouter({
+    onTelemetry: (entry) => app.log.debug({ llmTelemetry: entry }, 'llm-router event'),
+  });
+  app.log.info({ providers: llm.getProviders() }, 'LLM router initialized');
+} catch (err) {
+  app.log.warn({ err }, 'LLM router not available — no provider API keys configured');
+}
+
 // Start flush scheduler (periodic drain of IPC buffers → platform-service)
 const scheduler = getFlushScheduler(app.log);
 if (bridge.isRunning) {