From f542160784f62e723f2b4fb618fa4cbea892911d Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Thu, 2 Apr 2026 23:10:07 -0700 Subject: [PATCH] =?UTF-8?q?feat(cowork-service):=20H.7=20=E2=80=94=20wire?= =?UTF-8?q?=20@bytelyst/llm-router=20for=20multi-model=20routing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added LLM routing module to cowork-service: - lib/llm-router.ts — singleton LlmRouter with cloud + local Ollama support - modules/llm/types.ts — Zod request schemas - modules/llm/routes.ts — POST /api/llm/chat, GET /api/llm/providers, GET /api/llm/health - All endpoints gated by llm_multi_model_enabled feature flag - Best-effort init: service works without API keys (router stays uninitialized) - 8 new tests (routes), server test updated for 3 route modules - 57 total tests passing, typecheck clean --- pnpm-lock.yaml | 32 ++-- services/cowork-service/package.json | 1 + services/cowork-service/src/lib/llm-router.ts | 79 ++++++++++ .../src/modules/llm/routes.test.ts | 147 ++++++++++++++++++ .../cowork-service/src/modules/llm/routes.ts | 84 ++++++++++ .../cowork-service/src/modules/llm/types.ts | 20 +++ services/cowork-service/src/server.test.ts | 10 +- services/cowork-service/src/server.ts | 13 ++ 8 files changed, 362 insertions(+), 24 deletions(-) create mode 100644 services/cowork-service/src/lib/llm-router.ts create mode 100644 services/cowork-service/src/modules/llm/routes.test.ts create mode 100644 services/cowork-service/src/modules/llm/routes.ts create mode 100644 services/cowork-service/src/modules/llm/types.ts diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 23d54fe0..5db07452 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -195,7 +195,7 @@ importers: version: 9.39.2(jiti@2.6.1) eslint-config-next: specifier: 16.1.6 - version: 16.1.6(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3) + version: 16.1.6(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3) husky: specifier: ^9.0.0 version: 9.1.7 @@ -292,7 +292,7 @@ importers: version: 9.39.2(jiti@2.6.1) eslint-config-next: specifier: 16.1.6 - version: 16.1.6(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3) + version: 16.1.6(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3) husky: specifier: ^9.0.0 version: 9.1.7 @@ -991,6 +991,9 @@ importers: '@bytelyst/fastify-core': specifier: workspace:* version: link:../../packages/fastify-core + '@bytelyst/llm-router': + specifier: workspace:* + version: link:../../packages/llm-router '@bytelyst/logger': specifier: workspace:* version: link:../../packages/logger @@ -15280,7 +15283,7 @@ snapshots: '@next/eslint-plugin-next': 16.1.6 eslint: 9.39.2(jiti@2.6.1) eslint-import-resolver-node: 0.3.9 - eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)) + eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)) eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)) eslint-plugin-jsx-a11y: 6.10.2(eslint@9.39.2(jiti@2.6.1)) eslint-plugin-react: 7.37.5(eslint@9.39.2(jiti@2.6.1)) @@ -15303,7 +15306,7 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)): + eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)): dependencies: '@nolyfill/is-core-module': 1.0.39 debug: 4.4.3 @@ -15318,21 +15321,6 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)): - dependencies: - '@nolyfill/is-core-module': 1.0.39 - debug: 4.4.3 - eslint: 9.39.2(jiti@2.6.1) - get-tsconfig: 4.13.6 - is-bun-module: 2.0.0 - stable-hash: 0.0.5 - tinyglobby: 0.2.15 - unrs-resolver: 1.11.1 - optionalDependencies: - eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)) - transitivePeerDependencies: - - supports-color - eslint-module-utils@2.12.1(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)): dependencies: debug: 3.2.7 @@ -15344,14 +15332,14 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-module-utils@2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)): + eslint-module-utils@2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)): dependencies: debug: 3.2.7 optionalDependencies: '@typescript-eslint/parser': 8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3) eslint: 9.39.2(jiti@2.6.1) eslint-import-resolver-node: 0.3.9 - eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)) + eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)) transitivePeerDependencies: - supports-color @@ -15395,7 +15383,7 @@ snapshots: doctrine: 2.1.0 eslint: 9.39.2(jiti@2.6.1) eslint-import-resolver-node: 0.3.9 - eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)) + eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)) hasown: 2.0.2 is-core-module: 2.16.1 is-glob: 4.0.3 diff --git a/services/cowork-service/package.json b/services/cowork-service/package.json index 23bb2687..f43b0396 100644 --- a/services/cowork-service/package.json +++ b/services/cowork-service/package.json @@ -21,6 +21,7 @@ "@bytelyst/errors": "workspace:*", "@bytelyst/fastify-auth": "workspace:*", "@bytelyst/fastify-core": "workspace:*", + "@bytelyst/llm-router": "workspace:*", "@bytelyst/logger": "workspace:*", "@fastify/cors": "^10.0.2", "fastify": "^5.2.1", diff --git a/services/cowork-service/src/lib/llm-router.ts b/services/cowork-service/src/lib/llm-router.ts new file mode 100644 index 00000000..2f40746c --- /dev/null +++ b/services/cowork-service/src/lib/llm-router.ts @@ -0,0 +1,79 @@ +/** + * LLM Router singleton for cowork-service. + * + * Provides multi-model LLM routing via @bytelyst/llm-router. + * Supports cloud providers (Groq, OpenRouter, Together, Cerebras) and local Ollama. + * Gated by the `llm_multi_model_enabled` feature flag. + * + * The Rust runtime still handles primary Anthropic calls — this router adds + * alternative model access for multi-model comparison, fallback, and cost optimization. + */ + +import { LlmRouter, createLocalOllamaProvider, type ProviderConfig, type TelemetryEntry } from '@bytelyst/llm-router'; +import { config } from './config.js'; + +let _router: LlmRouter | null = null; + +export interface LlmRouterOptions { + /** Additional providers beyond defaults. */ + extraProviders?: ProviderConfig[]; + /** Local Ollama model IDs (auto-creates local-ollama provider). */ + ollamaModels?: string[]; + /** Ollama base URL. Default: http://localhost:11434/v1 */ + ollamaBaseUrl?: string; + /** Telemetry callback. */ + onTelemetry?: (entry: TelemetryEntry) => void; +} + +/** + * Initialize (or reinitialize) the LLM router singleton. + * Call once during server startup. + */ +export function initLlmRouter(opts?: LlmRouterOptions): LlmRouter { + const providers: ProviderConfig[] = []; + + // Add local Ollama if models are specified + if (opts?.ollamaModels && opts.ollamaModels.length > 0) { + providers.push( + createLocalOllamaProvider(opts.ollamaModels, opts.ollamaBaseUrl), + ); + } + + // Add any extra providers + if (opts?.extraProviders) { + providers.push(...opts.extraProviders); + } + + // If no explicit providers, use defaults (cloud free-tier providers) + const routerConfig = providers.length > 0 ? { providers } : undefined; + + _router = new LlmRouter({ + ...routerConfig, + timeoutMs: config.RUST_RUNTIME_TIMEOUT_MS, + onTelemetry: opts?.onTelemetry, + }); + + return _router; +} + +/** + * Get the LLM router singleton. Throws if not initialized. + */ +export function getLlmRouter(): LlmRouter { + if (!_router) { + throw new Error('LLM router not initialized — call initLlmRouter() first'); + } + return _router; +} + +/** + * Check if the LLM router has been initialized. + */ +export function isLlmRouterReady(): boolean { + return _router !== null; +} + +/** Reset singleton (for testing). */ +export function resetLlmRouter(): void { + _router = null; +} diff --git a/services/cowork-service/src/modules/llm/routes.test.ts b/services/cowork-service/src/modules/llm/routes.test.ts new file mode 100644 index 00000000..5545dbe6 --- /dev/null +++ b/services/cowork-service/src/modules/llm/routes.test.ts @@ -0,0 +1,147 @@ +import { describe, expect, it, vi, beforeEach } from 'vitest'; +import Fastify from 'fastify'; +import { llmRoutes } from './routes.js'; + +// Mock feature flags +vi.mock('../../lib/feature-flags.js', () => ({ + isFeatureEnabled: vi.fn((flag: string) => flag === 'llm_multi_model_enabled'), +})); + +// Mock LLM router +const mockChat = vi.fn(); +const mockGetProviders = vi.fn(() => ['groq', 'openrouter']); +const mockGetHealth = vi.fn(() => [ + { provider: 'groq', model: 'llama-3.3-70b-versatile', healthy: true, totalRequests: 5, successes: 5, rateLimits: 0, errors: 0, avgLatencyMs: 200, p95LatencyMs: 300 }, +]); + +vi.mock('../../lib/llm-router.js', () => ({ + getLlmRouter: vi.fn(() => ({ + chat: mockChat, + getProviders: mockGetProviders, + getHealth: mockGetHealth, + })), + isLlmRouterReady: vi.fn(() => true), +})); + +import { isFeatureEnabled } from '../../lib/feature-flags.js'; +import { isLlmRouterReady } from '../../lib/llm-router.js'; + +async function buildApp() { + const app = Fastify({ logger: false }); + await app.register(llmRoutes); + return app; +} + +beforeEach(() => { + vi.clearAllMocks(); + vi.mocked(isFeatureEnabled).mockReturnValue(true); + vi.mocked(isLlmRouterReady).mockReturnValue(true); +}); + +describe('POST /api/llm/chat', () => { + it('routes a valid chat request', async () => { + mockChat.mockResolvedValue({ + response: { id: 'r1', object: 'chat.completion', created: 1, model: 'llama-3.3-70b', choices: [{ index: 0, message: { role: 'assistant', content: 'Hello' }, finish_reason: 'stop' }] }, + provider: 'groq', + model: 'llama-3.3-70b-versatile', + totalLatencyMs: 150, + attempts: 1, + }); + + const app = await buildApp(); + const res = await app.inject({ + method: 'POST', + url: '/api/llm/chat', + payload: { messages: [{ role: 'user', content: 'Hi' }] }, + }); + + expect(res.statusCode).toBe(200); + const body = JSON.parse(res.body); + expect(body.provider).toBe('groq'); + expect(body.model).toBe('llama-3.3-70b-versatile'); + expect(mockChat).toHaveBeenCalledOnce(); + }); + + it('returns 400 for invalid request body', async () => { + const app = await buildApp(); + const res = await app.inject({ + method: 'POST', + url: '/api/llm/chat', + payload: { messages: [] }, + }); + + expect(res.statusCode).toBe(400); + expect(JSON.parse(res.body).error).toBe('Invalid request'); + }); + + it('returns 403 when flag is disabled', async () => { + vi.mocked(isFeatureEnabled).mockReturnValue(false); + const app = await buildApp(); + const res = await app.inject({ + method: 'POST', + url: '/api/llm/chat', + payload: { messages: [{ role: 'user', content: 'Hi' }] }, + }); + + expect(res.statusCode).toBe(403); + }); + + it('returns 503 when router not ready', async () => { + vi.mocked(isLlmRouterReady).mockReturnValue(false); + const app = await buildApp(); + const res = await app.inject({ + method: 'POST', + url: '/api/llm/chat', + payload: { messages: [{ role: 'user', content: 'Hi' }] }, + }); + + expect(res.statusCode).toBe(503); + }); + + it('returns 502 on router error', async () => { + mockChat.mockRejectedValue(new Error('All providers exhausted')); + const app = await buildApp(); + const res = await app.inject({ + method: 'POST', + url: '/api/llm/chat', + payload: { messages: [{ role: 'user', content: 'Hi' }] }, + }); + + expect(res.statusCode).toBe(502); + expect(JSON.parse(res.body).error).toContain('All providers exhausted'); + }); +}); + +describe('GET /api/llm/providers', () => { + it('returns provider list', async () => { + const app = await buildApp(); + const res = await app.inject({ method: 'GET', url: '/api/llm/providers' }); + + expect(res.statusCode).toBe(200); + const body = JSON.parse(res.body); + expect(body.providers).toEqual(['groq', 'openrouter']); + expect(body.ready).toBe(true); + }); + + it('returns empty when router not ready', async () => { + vi.mocked(isLlmRouterReady).mockReturnValue(false); + const app = await buildApp(); + const res = await app.inject({ method: 'GET', url: '/api/llm/providers' }); + + expect(res.statusCode).toBe(200); + expect(JSON.parse(res.body)).toEqual({ providers: [], ready: false }); + }); +}); + +describe('GET /api/llm/health', () => { + it('returns health snapshots', async () => { + const app = await buildApp(); + const res = await app.inject({ method: 'GET', url: '/api/llm/health' }); + + expect(res.statusCode).toBe(200); + const body = JSON.parse(res.body); + expect(body.snapshots).toHaveLength(1); + expect(body.snapshots[0].provider).toBe('groq'); + expect(body.ready).toBe(true); + }); +}); diff --git a/services/cowork-service/src/modules/llm/routes.ts b/services/cowork-service/src/modules/llm/routes.ts new file mode 100644 index 00000000..72905418 --- /dev/null +++ b/services/cowork-service/src/modules/llm/routes.ts @@ -0,0 +1,84 @@ +/** + * LLM routing endpoints for cowork-service. + * + * POST /api/llm/chat — Route a chat completion through the LLM router + * GET /api/llm/providers — List available LLM providers + * GET /api/llm/health — Health snapshots for all provider+model pairs + * + * All endpoints are gated by the `llm_multi_model_enabled` feature flag. + */ + +import type { FastifyInstance } from 'fastify'; +import { isFeatureEnabled } from '../../lib/feature-flags.js'; +import { getLlmRouter, isLlmRouterReady } from '../../lib/llm-router.js'; +import { ChatRequestSchema } from './types.js'; + +export async function llmRoutes(app: FastifyInstance) { + // ── POST /api/llm/chat ─────────────────────────────────────── + app.post('/api/llm/chat', async (req, reply) => { + if (!isFeatureEnabled('llm_multi_model_enabled')) { + reply.code(403); + return { error: 'LLM multi-model routing is disabled' }; + } + + if (!isLlmRouterReady()) { + reply.code(503); + return { error: 'LLM router not initialized — no providers configured' }; + } + + const parsed = ChatRequestSchema.safeParse(req.body); + if (!parsed.success) { + reply.code(400); + return { error: 'Invalid request', details: parsed.error.issues }; + } + + try { + const result = await getLlmRouter().chat(parsed.data); + return { + response: result.response, + provider: result.provider, + model: result.model, + totalLatencyMs: result.totalLatencyMs, + attempts: result.attempts, + }; + } catch (err) { + req.log.error(err, 'LLM chat routing failed'); + reply.code(502); + return { error: err instanceof Error ? err.message : 'LLM routing failed' }; + } + }); + + // ── GET /api/llm/providers ────────────────────────────────── + app.get('/api/llm/providers', async (_req, reply) => { + if (!isFeatureEnabled('llm_multi_model_enabled')) { + reply.code(403); + return { error: 'LLM multi-model routing is disabled' }; + } + + if (!isLlmRouterReady()) { + return { providers: [], ready: false }; + } + + return { + providers: getLlmRouter().getProviders(), + ready: true, + }; + }); + + // ── GET /api/llm/health ───────────────────────────────────── + app.get('/api/llm/health', async (_req, reply) => { + if (!isFeatureEnabled('llm_multi_model_enabled')) { + reply.code(403); + return { error: 'LLM multi-model routing is disabled' }; + } + + if (!isLlmRouterReady()) { + return { snapshots: [], ready: false }; + } + + return { + snapshots: getLlmRouter().getHealth(), + ready: true, + }; + }); +} diff --git a/services/cowork-service/src/modules/llm/types.ts b/services/cowork-service/src/modules/llm/types.ts new file mode 100644 index 00000000..09749e91 --- /dev/null +++ b/services/cowork-service/src/modules/llm/types.ts @@ -0,0 +1,20 @@ +/** + * Zod schemas for LLM routing endpoints. + */ + +import { z } from 'zod'; + +export const ChatMessageSchema = z.object({ + role: z.enum(['system', 'user', 'assistant']), + content: z.string(), +}); + +export const ChatRequestSchema = z.object({ + messages: z.array(ChatMessageSchema).min(1), + model: z.string().optional(), + temperature: z.number().min(0).max(2).optional(), + max_tokens: z.number().int().positive().optional(), + top_p: z.number().min(0).max(1).optional(), +}); + +export type ChatRequest = z.infer; diff --git a/services/cowork-service/src/server.test.ts b/services/cowork-service/src/server.test.ts index 48ee2c04..c1fe4e0d 100644 --- a/services/cowork-service/src/server.test.ts +++ b/services/cowork-service/src/server.test.ts @@ -62,6 +62,12 @@ vi.mock('./lib/flush-scheduler.js', () => ({ finalFlush: vi.fn(async () => undefined), })), })); +vi.mock('./lib/llm-router.js', () => ({ + initLlmRouter: vi.fn(() => ({ getProviders: vi.fn(() => []) })), + getLlmRouter: vi.fn(() => ({ getProviders: vi.fn(() => []) })), + isLlmRouterReady: vi.fn(() => false), +})); +vi.mock('./modules/llm/routes.js', () => ({ llmRoutes: vi.fn() })); describe('cowork-service bootstrap', () => { beforeEach(() => { @@ -81,9 +87,9 @@ describe('cowork-service bootstrap', () => { expect(opts.version).toBe('0.1.0'); expect(opts.readiness).toBe(true); - // JWT context + health + task routes = 2 register calls + 1 JWT + // JWT context + health + task + llm routes = 3 register calls + 1 JWT expect(registerOptionalJwtContextMock).toHaveBeenCalledOnce(); - expect(appMock.register).toHaveBeenCalledTimes(2); + expect(appMock.register).toHaveBeenCalledTimes(3); expect(startServiceMock).toHaveBeenCalledWith(appMock, { port: 4009, host: '0.0.0.0' }); }); }); diff --git a/services/cowork-service/src/server.ts b/services/cowork-service/src/server.ts index 506a7bfe..c4665564 100644 --- a/services/cowork-service/src/server.ts +++ b/services/cowork-service/src/server.ts @@ -23,6 +23,8 @@ import { config } from './lib/config.js'; import { productConfig, PRODUCT_ID } from './lib/product-config.js'; import { getIpcBridge } from './lib/ipc-bridge.js'; import { getFlushScheduler } from './lib/flush-scheduler.js'; +import { initLlmRouter } from './lib/llm-router.js'; +import { llmRoutes } from './modules/llm/routes.js'; import type { JwtPayload } from './lib/request-context.js'; const jwtSecret = new TextEncoder().encode(config.JWT_SECRET); @@ -51,6 +53,7 @@ await registerOptionalJwtContext(app, { // Register route modules await app.register(healthRoutes); await app.register(taskRoutes); +await app.register(llmRoutes); // Bootstrap endpoint (same pattern as FlowMonk, ActionTrail, etc.) app.get('/api/bootstrap', async () => ({ @@ -69,6 +72,16 @@ try { app.log.warn({ err }, 'IPC bridge failed to start — running in fallback mode'); } +// Initialize LLM router (best-effort — works without API keys in dev) +try { + const llm = initLlmRouter({ + onTelemetry: (entry) => app.log.debug({ llmTelemetry: entry }, 'llm-router event'), + }); + app.log.info({ providers: llm.getProviders() }, 'LLM router initialized'); +} catch (err) { + app.log.warn({ err }, 'LLM router not available — no provider API keys configured'); +} + // Start flush scheduler (periodic drain of IPC buffers → platform-service) const scheduler = getFlushScheduler(app.log); if (bridge.isRunning) {