feat(cowork-service): H.7 — wire @bytelyst/llm-router for multi-model routing

Added LLM routing module to cowork-service:
- lib/llm-router.ts — singleton LlmRouter with cloud + local Ollama support
- modules/llm/types.ts — Zod request schemas
- modules/llm/routes.ts — POST /api/llm/chat, GET /api/llm/providers, GET /api/llm/health
- All endpoints gated by llm_multi_model_enabled feature flag
- Best-effort init: service works without API keys (router stays uninitialized)
- 8 new tests (routes), server test updated for 3 route modules
- 57 total tests passing, typecheck clean
This commit is contained in:
saravanakumardb1 2026-04-02 23:10:07 -07:00
parent 53c3565874
commit f542160784
8 changed files with 362 additions and 24 deletions

32
pnpm-lock.yaml generated
View File

@ -195,7 +195,7 @@ importers:
version: 9.39.2(jiti@2.6.1)
eslint-config-next:
specifier: 16.1.6
version: 16.1.6(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
version: 16.1.6(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
husky:
specifier: ^9.0.0
version: 9.1.7
@ -292,7 +292,7 @@ importers:
version: 9.39.2(jiti@2.6.1)
eslint-config-next:
specifier: 16.1.6
version: 16.1.6(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
version: 16.1.6(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
husky:
specifier: ^9.0.0
version: 9.1.7
@ -991,6 +991,9 @@ importers:
'@bytelyst/fastify-core':
specifier: workspace:*
version: link:../../packages/fastify-core
'@bytelyst/llm-router':
specifier: workspace:*
version: link:../../packages/llm-router
'@bytelyst/logger':
specifier: workspace:*
version: link:../../packages/logger
@ -15280,7 +15283,7 @@ snapshots:
'@next/eslint-plugin-next': 16.1.6
eslint: 9.39.2(jiti@2.6.1)
eslint-import-resolver-node: 0.3.9
eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1))
eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
eslint-plugin-jsx-a11y: 6.10.2(eslint@9.39.2(jiti@2.6.1))
eslint-plugin-react: 7.37.5(eslint@9.39.2(jiti@2.6.1))
@ -15303,7 +15306,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)):
eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)):
dependencies:
'@nolyfill/is-core-module': 1.0.39
debug: 4.4.3
@ -15318,21 +15321,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)):
dependencies:
'@nolyfill/is-core-module': 1.0.39
debug: 4.4.3
eslint: 9.39.2(jiti@2.6.1)
get-tsconfig: 4.13.6
is-bun-module: 2.0.0
stable-hash: 0.0.5
tinyglobby: 0.2.15
unrs-resolver: 1.11.1
optionalDependencies:
eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
transitivePeerDependencies:
- supports-color
eslint-module-utils@2.12.1(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)):
dependencies:
debug: 3.2.7
@ -15344,14 +15332,14 @@ snapshots:
transitivePeerDependencies:
- supports-color
eslint-module-utils@2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)):
eslint-module-utils@2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)):
dependencies:
debug: 3.2.7
optionalDependencies:
'@typescript-eslint/parser': 8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
eslint: 9.39.2(jiti@2.6.1)
eslint-import-resolver-node: 0.3.9
eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1))
transitivePeerDependencies:
- supports-color
@ -15395,7 +15383,7 @@ snapshots:
doctrine: 2.1.0
eslint: 9.39.2(jiti@2.6.1)
eslint-import-resolver-node: 0.3.9
eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
hasown: 2.0.2
is-core-module: 2.16.1
is-glob: 4.0.3

View File

@ -21,6 +21,7 @@
"@bytelyst/errors": "workspace:*",
"@bytelyst/fastify-auth": "workspace:*",
"@bytelyst/fastify-core": "workspace:*",
"@bytelyst/llm-router": "workspace:*",
"@bytelyst/logger": "workspace:*",
"@fastify/cors": "^10.0.2",
"fastify": "^5.2.1",

View File

@ -0,0 +1,79 @@
/**
* LLM Router singleton for cowork-service.
*
* Provides multi-model LLM routing via @bytelyst/llm-router.
* Supports cloud providers (Groq, OpenRouter, Together, Cerebras) and local Ollama.
* Gated by the `llm_multi_model_enabled` feature flag.
*
* The Rust runtime still handles primary Anthropic calls this router adds
* alternative model access for multi-model comparison, fallback, and cost optimization.
*/
import { LlmRouter, createLocalOllamaProvider, type ProviderConfig, type TelemetryEntry } from '@bytelyst/llm-router';
import { config } from './config.js';
let _router: LlmRouter | null = null;
export interface LlmRouterOptions {
/** Additional providers beyond defaults. */
extraProviders?: ProviderConfig[];
/** Local Ollama model IDs (auto-creates local-ollama provider). */
ollamaModels?: string[];
/** Ollama base URL. Default: http://localhost:11434/v1 */
ollamaBaseUrl?: string;
/** Telemetry callback. */
onTelemetry?: (entry: TelemetryEntry) => void;
}
/**
* Initialize (or reinitialize) the LLM router singleton.
* Call once during server startup.
*/
export function initLlmRouter(opts?: LlmRouterOptions): LlmRouter {
const providers: ProviderConfig[] = [];
// Add local Ollama if models are specified
if (opts?.ollamaModels && opts.ollamaModels.length > 0) {
providers.push(
createLocalOllamaProvider(opts.ollamaModels, opts.ollamaBaseUrl),
);
}
// Add any extra providers
if (opts?.extraProviders) {
providers.push(...opts.extraProviders);
}
// If no explicit providers, use defaults (cloud free-tier providers)
const routerConfig = providers.length > 0 ? { providers } : undefined;
_router = new LlmRouter({
...routerConfig,
timeoutMs: config.RUST_RUNTIME_TIMEOUT_MS,
onTelemetry: opts?.onTelemetry,
});
return _router;
}
/**
* Get the LLM router singleton. Throws if not initialized.
*/
export function getLlmRouter(): LlmRouter {
if (!_router) {
throw new Error('LLM router not initialized — call initLlmRouter() first');
}
return _router;
}
/**
* Check if the LLM router has been initialized.
*/
export function isLlmRouterReady(): boolean {
return _router !== null;
}
/** Reset singleton (for testing). */
export function resetLlmRouter(): void {
_router = null;
}

View File

@ -0,0 +1,147 @@
import { describe, expect, it, vi, beforeEach } from 'vitest';
import Fastify from 'fastify';
import { llmRoutes } from './routes.js';
// Mock feature flags
vi.mock('../../lib/feature-flags.js', () => ({
isFeatureEnabled: vi.fn((flag: string) => flag === 'llm_multi_model_enabled'),
}));
// Mock LLM router
const mockChat = vi.fn();
const mockGetProviders = vi.fn(() => ['groq', 'openrouter']);
const mockGetHealth = vi.fn(() => [
{ provider: 'groq', model: 'llama-3.3-70b-versatile', healthy: true, totalRequests: 5, successes: 5, rateLimits: 0, errors: 0, avgLatencyMs: 200, p95LatencyMs: 300 },
]);
vi.mock('../../lib/llm-router.js', () => ({
getLlmRouter: vi.fn(() => ({
chat: mockChat,
getProviders: mockGetProviders,
getHealth: mockGetHealth,
})),
isLlmRouterReady: vi.fn(() => true),
}));
import { isFeatureEnabled } from '../../lib/feature-flags.js';
import { isLlmRouterReady } from '../../lib/llm-router.js';
async function buildApp() {
const app = Fastify({ logger: false });
await app.register(llmRoutes);
return app;
}
beforeEach(() => {
vi.clearAllMocks();
vi.mocked(isFeatureEnabled).mockReturnValue(true);
vi.mocked(isLlmRouterReady).mockReturnValue(true);
});
describe('POST /api/llm/chat', () => {
it('routes a valid chat request', async () => {
mockChat.mockResolvedValue({
response: { id: 'r1', object: 'chat.completion', created: 1, model: 'llama-3.3-70b', choices: [{ index: 0, message: { role: 'assistant', content: 'Hello' }, finish_reason: 'stop' }] },
provider: 'groq',
model: 'llama-3.3-70b-versatile',
totalLatencyMs: 150,
attempts: 1,
});
const app = await buildApp();
const res = await app.inject({
method: 'POST',
url: '/api/llm/chat',
payload: { messages: [{ role: 'user', content: 'Hi' }] },
});
expect(res.statusCode).toBe(200);
const body = JSON.parse(res.body);
expect(body.provider).toBe('groq');
expect(body.model).toBe('llama-3.3-70b-versatile');
expect(mockChat).toHaveBeenCalledOnce();
});
it('returns 400 for invalid request body', async () => {
const app = await buildApp();
const res = await app.inject({
method: 'POST',
url: '/api/llm/chat',
payload: { messages: [] },
});
expect(res.statusCode).toBe(400);
expect(JSON.parse(res.body).error).toBe('Invalid request');
});
it('returns 403 when flag is disabled', async () => {
vi.mocked(isFeatureEnabled).mockReturnValue(false);
const app = await buildApp();
const res = await app.inject({
method: 'POST',
url: '/api/llm/chat',
payload: { messages: [{ role: 'user', content: 'Hi' }] },
});
expect(res.statusCode).toBe(403);
});
it('returns 503 when router not ready', async () => {
vi.mocked(isLlmRouterReady).mockReturnValue(false);
const app = await buildApp();
const res = await app.inject({
method: 'POST',
url: '/api/llm/chat',
payload: { messages: [{ role: 'user', content: 'Hi' }] },
});
expect(res.statusCode).toBe(503);
});
it('returns 502 on router error', async () => {
mockChat.mockRejectedValue(new Error('All providers exhausted'));
const app = await buildApp();
const res = await app.inject({
method: 'POST',
url: '/api/llm/chat',
payload: { messages: [{ role: 'user', content: 'Hi' }] },
});
expect(res.statusCode).toBe(502);
expect(JSON.parse(res.body).error).toContain('All providers exhausted');
});
});
describe('GET /api/llm/providers', () => {
it('returns provider list', async () => {
const app = await buildApp();
const res = await app.inject({ method: 'GET', url: '/api/llm/providers' });
expect(res.statusCode).toBe(200);
const body = JSON.parse(res.body);
expect(body.providers).toEqual(['groq', 'openrouter']);
expect(body.ready).toBe(true);
});
it('returns empty when router not ready', async () => {
vi.mocked(isLlmRouterReady).mockReturnValue(false);
const app = await buildApp();
const res = await app.inject({ method: 'GET', url: '/api/llm/providers' });
expect(res.statusCode).toBe(200);
expect(JSON.parse(res.body)).toEqual({ providers: [], ready: false });
});
});
describe('GET /api/llm/health', () => {
it('returns health snapshots', async () => {
const app = await buildApp();
const res = await app.inject({ method: 'GET', url: '/api/llm/health' });
expect(res.statusCode).toBe(200);
const body = JSON.parse(res.body);
expect(body.snapshots).toHaveLength(1);
expect(body.snapshots[0].provider).toBe('groq');
expect(body.ready).toBe(true);
});
});

View File

@ -0,0 +1,84 @@
/**
* LLM routing endpoints for cowork-service.
*
* POST /api/llm/chat Route a chat completion through the LLM router
* GET /api/llm/providers List available LLM providers
* GET /api/llm/health Health snapshots for all provider+model pairs
*
* All endpoints are gated by the `llm_multi_model_enabled` feature flag.
*/
import type { FastifyInstance } from 'fastify';
import { isFeatureEnabled } from '../../lib/feature-flags.js';
import { getLlmRouter, isLlmRouterReady } from '../../lib/llm-router.js';
import { ChatRequestSchema } from './types.js';
export async function llmRoutes(app: FastifyInstance) {
// ── POST /api/llm/chat ───────────────────────────────────────
app.post('/api/llm/chat', async (req, reply) => {
if (!isFeatureEnabled('llm_multi_model_enabled')) {
reply.code(403);
return { error: 'LLM multi-model routing is disabled' };
}
if (!isLlmRouterReady()) {
reply.code(503);
return { error: 'LLM router not initialized — no providers configured' };
}
const parsed = ChatRequestSchema.safeParse(req.body);
if (!parsed.success) {
reply.code(400);
return { error: 'Invalid request', details: parsed.error.issues };
}
try {
const result = await getLlmRouter().chat(parsed.data);
return {
response: result.response,
provider: result.provider,
model: result.model,
totalLatencyMs: result.totalLatencyMs,
attempts: result.attempts,
};
} catch (err) {
req.log.error(err, 'LLM chat routing failed');
reply.code(502);
return { error: err instanceof Error ? err.message : 'LLM routing failed' };
}
});
// ── GET /api/llm/providers ──────────────────────────────────
app.get('/api/llm/providers', async (_req, reply) => {
if (!isFeatureEnabled('llm_multi_model_enabled')) {
reply.code(403);
return { error: 'LLM multi-model routing is disabled' };
}
if (!isLlmRouterReady()) {
return { providers: [], ready: false };
}
return {
providers: getLlmRouter().getProviders(),
ready: true,
};
});
// ── GET /api/llm/health ─────────────────────────────────────
app.get('/api/llm/health', async (_req, reply) => {
if (!isFeatureEnabled('llm_multi_model_enabled')) {
reply.code(403);
return { error: 'LLM multi-model routing is disabled' };
}
if (!isLlmRouterReady()) {
return { snapshots: [], ready: false };
}
return {
snapshots: getLlmRouter().getHealth(),
ready: true,
};
});
}

View File

@ -0,0 +1,20 @@
/**
* Zod schemas for LLM routing endpoints.
*/
import { z } from 'zod';
export const ChatMessageSchema = z.object({
role: z.enum(['system', 'user', 'assistant']),
content: z.string(),
});
export const ChatRequestSchema = z.object({
messages: z.array(ChatMessageSchema).min(1),
model: z.string().optional(),
temperature: z.number().min(0).max(2).optional(),
max_tokens: z.number().int().positive().optional(),
top_p: z.number().min(0).max(1).optional(),
});
export type ChatRequest = z.infer<typeof ChatRequestSchema>;

View File

@ -62,6 +62,12 @@ vi.mock('./lib/flush-scheduler.js', () => ({
finalFlush: vi.fn(async () => undefined),
})),
}));
vi.mock('./lib/llm-router.js', () => ({
initLlmRouter: vi.fn(() => ({ getProviders: vi.fn(() => []) })),
getLlmRouter: vi.fn(() => ({ getProviders: vi.fn(() => []) })),
isLlmRouterReady: vi.fn(() => false),
}));
vi.mock('./modules/llm/routes.js', () => ({ llmRoutes: vi.fn() }));
describe('cowork-service bootstrap', () => {
beforeEach(() => {
@ -81,9 +87,9 @@ describe('cowork-service bootstrap', () => {
expect(opts.version).toBe('0.1.0');
expect(opts.readiness).toBe(true);
// JWT context + health + task routes = 2 register calls + 1 JWT
// JWT context + health + task + llm routes = 3 register calls + 1 JWT
expect(registerOptionalJwtContextMock).toHaveBeenCalledOnce();
expect(appMock.register).toHaveBeenCalledTimes(2);
expect(appMock.register).toHaveBeenCalledTimes(3);
expect(startServiceMock).toHaveBeenCalledWith(appMock, { port: 4009, host: '0.0.0.0' });
});
});

View File

@ -23,6 +23,8 @@ import { config } from './lib/config.js';
import { productConfig, PRODUCT_ID } from './lib/product-config.js';
import { getIpcBridge } from './lib/ipc-bridge.js';
import { getFlushScheduler } from './lib/flush-scheduler.js';
import { initLlmRouter } from './lib/llm-router.js';
import { llmRoutes } from './modules/llm/routes.js';
import type { JwtPayload } from './lib/request-context.js';
const jwtSecret = new TextEncoder().encode(config.JWT_SECRET);
@ -51,6 +53,7 @@ await registerOptionalJwtContext(app, {
// Register route modules
await app.register(healthRoutes);
await app.register(taskRoutes);
await app.register(llmRoutes);
// Bootstrap endpoint (same pattern as FlowMonk, ActionTrail, etc.)
app.get('/api/bootstrap', async () => ({
@ -69,6 +72,16 @@ try {
app.log.warn({ err }, 'IPC bridge failed to start — running in fallback mode');
}
// Initialize LLM router (best-effort — works without API keys in dev)
try {
const llm = initLlmRouter({
onTelemetry: (entry) => app.log.debug({ llmTelemetry: entry }, 'llm-router event'),
});
app.log.info({ providers: llm.getProviders() }, 'LLM router initialized');
} catch (err) {
app.log.warn({ err }, 'LLM router not available — no provider API keys configured');
}
// Start flush scheduler (periodic drain of IPC buffers → platform-service)
const scheduler = getFlushScheduler(app.log);
if (bridge.isRunning) {