feat(cowork-service): H.7 — wire @bytelyst/llm-router for multi-model routing
Added LLM routing module to cowork-service: - lib/llm-router.ts — singleton LlmRouter with cloud + local Ollama support - modules/llm/types.ts — Zod request schemas - modules/llm/routes.ts — POST /api/llm/chat, GET /api/llm/providers, GET /api/llm/health - All endpoints gated by llm_multi_model_enabled feature flag - Best-effort init: service works without API keys (router stays uninitialized) - 8 new tests (routes), server test updated for 3 route modules - 57 total tests passing, typecheck clean
This commit is contained in:
parent
53c3565874
commit
f542160784
32
pnpm-lock.yaml
generated
32
pnpm-lock.yaml
generated
@ -195,7 +195,7 @@ importers:
|
|||||||
version: 9.39.2(jiti@2.6.1)
|
version: 9.39.2(jiti@2.6.1)
|
||||||
eslint-config-next:
|
eslint-config-next:
|
||||||
specifier: 16.1.6
|
specifier: 16.1.6
|
||||||
version: 16.1.6(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
|
version: 16.1.6(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
|
||||||
husky:
|
husky:
|
||||||
specifier: ^9.0.0
|
specifier: ^9.0.0
|
||||||
version: 9.1.7
|
version: 9.1.7
|
||||||
@ -292,7 +292,7 @@ importers:
|
|||||||
version: 9.39.2(jiti@2.6.1)
|
version: 9.39.2(jiti@2.6.1)
|
||||||
eslint-config-next:
|
eslint-config-next:
|
||||||
specifier: 16.1.6
|
specifier: 16.1.6
|
||||||
version: 16.1.6(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
|
version: 16.1.6(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
|
||||||
husky:
|
husky:
|
||||||
specifier: ^9.0.0
|
specifier: ^9.0.0
|
||||||
version: 9.1.7
|
version: 9.1.7
|
||||||
@ -991,6 +991,9 @@ importers:
|
|||||||
'@bytelyst/fastify-core':
|
'@bytelyst/fastify-core':
|
||||||
specifier: workspace:*
|
specifier: workspace:*
|
||||||
version: link:../../packages/fastify-core
|
version: link:../../packages/fastify-core
|
||||||
|
'@bytelyst/llm-router':
|
||||||
|
specifier: workspace:*
|
||||||
|
version: link:../../packages/llm-router
|
||||||
'@bytelyst/logger':
|
'@bytelyst/logger':
|
||||||
specifier: workspace:*
|
specifier: workspace:*
|
||||||
version: link:../../packages/logger
|
version: link:../../packages/logger
|
||||||
@ -15280,7 +15283,7 @@ snapshots:
|
|||||||
'@next/eslint-plugin-next': 16.1.6
|
'@next/eslint-plugin-next': 16.1.6
|
||||||
eslint: 9.39.2(jiti@2.6.1)
|
eslint: 9.39.2(jiti@2.6.1)
|
||||||
eslint-import-resolver-node: 0.3.9
|
eslint-import-resolver-node: 0.3.9
|
||||||
eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
|
eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1))
|
||||||
eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
|
eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
|
||||||
eslint-plugin-jsx-a11y: 6.10.2(eslint@9.39.2(jiti@2.6.1))
|
eslint-plugin-jsx-a11y: 6.10.2(eslint@9.39.2(jiti@2.6.1))
|
||||||
eslint-plugin-react: 7.37.5(eslint@9.39.2(jiti@2.6.1))
|
eslint-plugin-react: 7.37.5(eslint@9.39.2(jiti@2.6.1))
|
||||||
@ -15303,7 +15306,7 @@ snapshots:
|
|||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- supports-color
|
- supports-color
|
||||||
|
|
||||||
eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)):
|
eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@nolyfill/is-core-module': 1.0.39
|
'@nolyfill/is-core-module': 1.0.39
|
||||||
debug: 4.4.3
|
debug: 4.4.3
|
||||||
@ -15318,21 +15321,6 @@ snapshots:
|
|||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- supports-color
|
- supports-color
|
||||||
|
|
||||||
eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)):
|
|
||||||
dependencies:
|
|
||||||
'@nolyfill/is-core-module': 1.0.39
|
|
||||||
debug: 4.4.3
|
|
||||||
eslint: 9.39.2(jiti@2.6.1)
|
|
||||||
get-tsconfig: 4.13.6
|
|
||||||
is-bun-module: 2.0.0
|
|
||||||
stable-hash: 0.0.5
|
|
||||||
tinyglobby: 0.2.15
|
|
||||||
unrs-resolver: 1.11.1
|
|
||||||
optionalDependencies:
|
|
||||||
eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
|
|
||||||
transitivePeerDependencies:
|
|
||||||
- supports-color
|
|
||||||
|
|
||||||
eslint-module-utils@2.12.1(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)):
|
eslint-module-utils@2.12.1(@typescript-eslint/parser@8.55.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)):
|
||||||
dependencies:
|
dependencies:
|
||||||
debug: 3.2.7
|
debug: 3.2.7
|
||||||
@ -15344,14 +15332,14 @@ snapshots:
|
|||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- supports-color
|
- supports-color
|
||||||
|
|
||||||
eslint-module-utils@2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)):
|
eslint-module-utils@2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)):
|
||||||
dependencies:
|
dependencies:
|
||||||
debug: 3.2.7
|
debug: 3.2.7
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
'@typescript-eslint/parser': 8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
|
'@typescript-eslint/parser': 8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3)
|
||||||
eslint: 9.39.2(jiti@2.6.1)
|
eslint: 9.39.2(jiti@2.6.1)
|
||||||
eslint-import-resolver-node: 0.3.9
|
eslint-import-resolver-node: 0.3.9
|
||||||
eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
|
eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1))
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- supports-color
|
- supports-color
|
||||||
|
|
||||||
@ -15395,7 +15383,7 @@ snapshots:
|
|||||||
doctrine: 2.1.0
|
doctrine: 2.1.0
|
||||||
eslint: 9.39.2(jiti@2.6.1)
|
eslint: 9.39.2(jiti@2.6.1)
|
||||||
eslint-import-resolver-node: 0.3.9
|
eslint-import-resolver-node: 0.3.9
|
||||||
eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
|
eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.56.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
|
||||||
hasown: 2.0.2
|
hasown: 2.0.2
|
||||||
is-core-module: 2.16.1
|
is-core-module: 2.16.1
|
||||||
is-glob: 4.0.3
|
is-glob: 4.0.3
|
||||||
|
|||||||
@ -21,6 +21,7 @@
|
|||||||
"@bytelyst/errors": "workspace:*",
|
"@bytelyst/errors": "workspace:*",
|
||||||
"@bytelyst/fastify-auth": "workspace:*",
|
"@bytelyst/fastify-auth": "workspace:*",
|
||||||
"@bytelyst/fastify-core": "workspace:*",
|
"@bytelyst/fastify-core": "workspace:*",
|
||||||
|
"@bytelyst/llm-router": "workspace:*",
|
||||||
"@bytelyst/logger": "workspace:*",
|
"@bytelyst/logger": "workspace:*",
|
||||||
"@fastify/cors": "^10.0.2",
|
"@fastify/cors": "^10.0.2",
|
||||||
"fastify": "^5.2.1",
|
"fastify": "^5.2.1",
|
||||||
|
|||||||
79
services/cowork-service/src/lib/llm-router.ts
Normal file
79
services/cowork-service/src/lib/llm-router.ts
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
/**
|
||||||
|
* LLM Router singleton for cowork-service.
|
||||||
|
*
|
||||||
|
* Provides multi-model LLM routing via @bytelyst/llm-router.
|
||||||
|
* Supports cloud providers (Groq, OpenRouter, Together, Cerebras) and local Ollama.
|
||||||
|
* Gated by the `llm_multi_model_enabled` feature flag.
|
||||||
|
*
|
||||||
|
* The Rust runtime still handles primary Anthropic calls — this router adds
|
||||||
|
* alternative model access for multi-model comparison, fallback, and cost optimization.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { LlmRouter, createLocalOllamaProvider, type ProviderConfig, type TelemetryEntry } from '@bytelyst/llm-router';
|
||||||
|
import { config } from './config.js';
|
||||||
|
|
||||||
|
let _router: LlmRouter | null = null;
|
||||||
|
|
||||||
|
export interface LlmRouterOptions {
|
||||||
|
/** Additional providers beyond defaults. */
|
||||||
|
extraProviders?: ProviderConfig[];
|
||||||
|
/** Local Ollama model IDs (auto-creates local-ollama provider). */
|
||||||
|
ollamaModels?: string[];
|
||||||
|
/** Ollama base URL. Default: http://localhost:11434/v1 */
|
||||||
|
ollamaBaseUrl?: string;
|
||||||
|
/** Telemetry callback. */
|
||||||
|
onTelemetry?: (entry: TelemetryEntry) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize (or reinitialize) the LLM router singleton.
|
||||||
|
* Call once during server startup.
|
||||||
|
*/
|
||||||
|
export function initLlmRouter(opts?: LlmRouterOptions): LlmRouter {
|
||||||
|
const providers: ProviderConfig[] = [];
|
||||||
|
|
||||||
|
// Add local Ollama if models are specified
|
||||||
|
if (opts?.ollamaModels && opts.ollamaModels.length > 0) {
|
||||||
|
providers.push(
|
||||||
|
createLocalOllamaProvider(opts.ollamaModels, opts.ollamaBaseUrl),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add any extra providers
|
||||||
|
if (opts?.extraProviders) {
|
||||||
|
providers.push(...opts.extraProviders);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no explicit providers, use defaults (cloud free-tier providers)
|
||||||
|
const routerConfig = providers.length > 0 ? { providers } : undefined;
|
||||||
|
|
||||||
|
_router = new LlmRouter({
|
||||||
|
...routerConfig,
|
||||||
|
timeoutMs: config.RUST_RUNTIME_TIMEOUT_MS,
|
||||||
|
onTelemetry: opts?.onTelemetry,
|
||||||
|
});
|
||||||
|
|
||||||
|
return _router;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the LLM router singleton. Throws if not initialized.
|
||||||
|
*/
|
||||||
|
export function getLlmRouter(): LlmRouter {
|
||||||
|
if (!_router) {
|
||||||
|
throw new Error('LLM router not initialized — call initLlmRouter() first');
|
||||||
|
}
|
||||||
|
return _router;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the LLM router has been initialized.
|
||||||
|
*/
|
||||||
|
export function isLlmRouterReady(): boolean {
|
||||||
|
return _router !== null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Reset singleton (for testing). */
|
||||||
|
export function resetLlmRouter(): void {
|
||||||
|
_router = null;
|
||||||
|
}
|
||||||
147
services/cowork-service/src/modules/llm/routes.test.ts
Normal file
147
services/cowork-service/src/modules/llm/routes.test.ts
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
import { describe, expect, it, vi, beforeEach } from 'vitest';
|
||||||
|
import Fastify from 'fastify';
|
||||||
|
import { llmRoutes } from './routes.js';
|
||||||
|
|
||||||
|
// Mock feature flags
|
||||||
|
vi.mock('../../lib/feature-flags.js', () => ({
|
||||||
|
isFeatureEnabled: vi.fn((flag: string) => flag === 'llm_multi_model_enabled'),
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Mock LLM router
|
||||||
|
const mockChat = vi.fn();
|
||||||
|
const mockGetProviders = vi.fn(() => ['groq', 'openrouter']);
|
||||||
|
const mockGetHealth = vi.fn(() => [
|
||||||
|
{ provider: 'groq', model: 'llama-3.3-70b-versatile', healthy: true, totalRequests: 5, successes: 5, rateLimits: 0, errors: 0, avgLatencyMs: 200, p95LatencyMs: 300 },
|
||||||
|
]);
|
||||||
|
|
||||||
|
vi.mock('../../lib/llm-router.js', () => ({
|
||||||
|
getLlmRouter: vi.fn(() => ({
|
||||||
|
chat: mockChat,
|
||||||
|
getProviders: mockGetProviders,
|
||||||
|
getHealth: mockGetHealth,
|
||||||
|
})),
|
||||||
|
isLlmRouterReady: vi.fn(() => true),
|
||||||
|
}));
|
||||||
|
|
||||||
|
import { isFeatureEnabled } from '../../lib/feature-flags.js';
|
||||||
|
import { isLlmRouterReady } from '../../lib/llm-router.js';
|
||||||
|
|
||||||
|
async function buildApp() {
|
||||||
|
const app = Fastify({ logger: false });
|
||||||
|
await app.register(llmRoutes);
|
||||||
|
return app;
|
||||||
|
}
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
vi.mocked(isFeatureEnabled).mockReturnValue(true);
|
||||||
|
vi.mocked(isLlmRouterReady).mockReturnValue(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('POST /api/llm/chat', () => {
|
||||||
|
it('routes a valid chat request', async () => {
|
||||||
|
mockChat.mockResolvedValue({
|
||||||
|
response: { id: 'r1', object: 'chat.completion', created: 1, model: 'llama-3.3-70b', choices: [{ index: 0, message: { role: 'assistant', content: 'Hello' }, finish_reason: 'stop' }] },
|
||||||
|
provider: 'groq',
|
||||||
|
model: 'llama-3.3-70b-versatile',
|
||||||
|
totalLatencyMs: 150,
|
||||||
|
attempts: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
const app = await buildApp();
|
||||||
|
const res = await app.inject({
|
||||||
|
method: 'POST',
|
||||||
|
url: '/api/llm/chat',
|
||||||
|
payload: { messages: [{ role: 'user', content: 'Hi' }] },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(res.statusCode).toBe(200);
|
||||||
|
const body = JSON.parse(res.body);
|
||||||
|
expect(body.provider).toBe('groq');
|
||||||
|
expect(body.model).toBe('llama-3.3-70b-versatile');
|
||||||
|
expect(mockChat).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns 400 for invalid request body', async () => {
|
||||||
|
const app = await buildApp();
|
||||||
|
const res = await app.inject({
|
||||||
|
method: 'POST',
|
||||||
|
url: '/api/llm/chat',
|
||||||
|
payload: { messages: [] },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(res.statusCode).toBe(400);
|
||||||
|
expect(JSON.parse(res.body).error).toBe('Invalid request');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns 403 when flag is disabled', async () => {
|
||||||
|
vi.mocked(isFeatureEnabled).mockReturnValue(false);
|
||||||
|
const app = await buildApp();
|
||||||
|
const res = await app.inject({
|
||||||
|
method: 'POST',
|
||||||
|
url: '/api/llm/chat',
|
||||||
|
payload: { messages: [{ role: 'user', content: 'Hi' }] },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(res.statusCode).toBe(403);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns 503 when router not ready', async () => {
|
||||||
|
vi.mocked(isLlmRouterReady).mockReturnValue(false);
|
||||||
|
const app = await buildApp();
|
||||||
|
const res = await app.inject({
|
||||||
|
method: 'POST',
|
||||||
|
url: '/api/llm/chat',
|
||||||
|
payload: { messages: [{ role: 'user', content: 'Hi' }] },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(res.statusCode).toBe(503);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns 502 on router error', async () => {
|
||||||
|
mockChat.mockRejectedValue(new Error('All providers exhausted'));
|
||||||
|
const app = await buildApp();
|
||||||
|
const res = await app.inject({
|
||||||
|
method: 'POST',
|
||||||
|
url: '/api/llm/chat',
|
||||||
|
payload: { messages: [{ role: 'user', content: 'Hi' }] },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(res.statusCode).toBe(502);
|
||||||
|
expect(JSON.parse(res.body).error).toContain('All providers exhausted');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('GET /api/llm/providers', () => {
|
||||||
|
it('returns provider list', async () => {
|
||||||
|
const app = await buildApp();
|
||||||
|
const res = await app.inject({ method: 'GET', url: '/api/llm/providers' });
|
||||||
|
|
||||||
|
expect(res.statusCode).toBe(200);
|
||||||
|
const body = JSON.parse(res.body);
|
||||||
|
expect(body.providers).toEqual(['groq', 'openrouter']);
|
||||||
|
expect(body.ready).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty when router not ready', async () => {
|
||||||
|
vi.mocked(isLlmRouterReady).mockReturnValue(false);
|
||||||
|
const app = await buildApp();
|
||||||
|
const res = await app.inject({ method: 'GET', url: '/api/llm/providers' });
|
||||||
|
|
||||||
|
expect(res.statusCode).toBe(200);
|
||||||
|
expect(JSON.parse(res.body)).toEqual({ providers: [], ready: false });
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('GET /api/llm/health', () => {
|
||||||
|
it('returns health snapshots', async () => {
|
||||||
|
const app = await buildApp();
|
||||||
|
const res = await app.inject({ method: 'GET', url: '/api/llm/health' });
|
||||||
|
|
||||||
|
expect(res.statusCode).toBe(200);
|
||||||
|
const body = JSON.parse(res.body);
|
||||||
|
expect(body.snapshots).toHaveLength(1);
|
||||||
|
expect(body.snapshots[0].provider).toBe('groq');
|
||||||
|
expect(body.ready).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
84
services/cowork-service/src/modules/llm/routes.ts
Normal file
84
services/cowork-service/src/modules/llm/routes.ts
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
/**
|
||||||
|
* LLM routing endpoints for cowork-service.
|
||||||
|
*
|
||||||
|
* POST /api/llm/chat — Route a chat completion through the LLM router
|
||||||
|
* GET /api/llm/providers — List available LLM providers
|
||||||
|
* GET /api/llm/health — Health snapshots for all provider+model pairs
|
||||||
|
*
|
||||||
|
* All endpoints are gated by the `llm_multi_model_enabled` feature flag.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { FastifyInstance } from 'fastify';
|
||||||
|
import { isFeatureEnabled } from '../../lib/feature-flags.js';
|
||||||
|
import { getLlmRouter, isLlmRouterReady } from '../../lib/llm-router.js';
|
||||||
|
import { ChatRequestSchema } from './types.js';
|
||||||
|
|
||||||
|
export async function llmRoutes(app: FastifyInstance) {
|
||||||
|
// ── POST /api/llm/chat ───────────────────────────────────────
|
||||||
|
app.post('/api/llm/chat', async (req, reply) => {
|
||||||
|
if (!isFeatureEnabled('llm_multi_model_enabled')) {
|
||||||
|
reply.code(403);
|
||||||
|
return { error: 'LLM multi-model routing is disabled' };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isLlmRouterReady()) {
|
||||||
|
reply.code(503);
|
||||||
|
return { error: 'LLM router not initialized — no providers configured' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const parsed = ChatRequestSchema.safeParse(req.body);
|
||||||
|
if (!parsed.success) {
|
||||||
|
reply.code(400);
|
||||||
|
return { error: 'Invalid request', details: parsed.error.issues };
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await getLlmRouter().chat(parsed.data);
|
||||||
|
return {
|
||||||
|
response: result.response,
|
||||||
|
provider: result.provider,
|
||||||
|
model: result.model,
|
||||||
|
totalLatencyMs: result.totalLatencyMs,
|
||||||
|
attempts: result.attempts,
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
req.log.error(err, 'LLM chat routing failed');
|
||||||
|
reply.code(502);
|
||||||
|
return { error: err instanceof Error ? err.message : 'LLM routing failed' };
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── GET /api/llm/providers ──────────────────────────────────
|
||||||
|
app.get('/api/llm/providers', async (_req, reply) => {
|
||||||
|
if (!isFeatureEnabled('llm_multi_model_enabled')) {
|
||||||
|
reply.code(403);
|
||||||
|
return { error: 'LLM multi-model routing is disabled' };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isLlmRouterReady()) {
|
||||||
|
return { providers: [], ready: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
providers: getLlmRouter().getProviders(),
|
||||||
|
ready: true,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── GET /api/llm/health ─────────────────────────────────────
|
||||||
|
app.get('/api/llm/health', async (_req, reply) => {
|
||||||
|
if (!isFeatureEnabled('llm_multi_model_enabled')) {
|
||||||
|
reply.code(403);
|
||||||
|
return { error: 'LLM multi-model routing is disabled' };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isLlmRouterReady()) {
|
||||||
|
return { snapshots: [], ready: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
snapshots: getLlmRouter().getHealth(),
|
||||||
|
ready: true,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
20
services/cowork-service/src/modules/llm/types.ts
Normal file
20
services/cowork-service/src/modules/llm/types.ts
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
/**
|
||||||
|
* Zod schemas for LLM routing endpoints.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
export const ChatMessageSchema = z.object({
|
||||||
|
role: z.enum(['system', 'user', 'assistant']),
|
||||||
|
content: z.string(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const ChatRequestSchema = z.object({
|
||||||
|
messages: z.array(ChatMessageSchema).min(1),
|
||||||
|
model: z.string().optional(),
|
||||||
|
temperature: z.number().min(0).max(2).optional(),
|
||||||
|
max_tokens: z.number().int().positive().optional(),
|
||||||
|
top_p: z.number().min(0).max(1).optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type ChatRequest = z.infer<typeof ChatRequestSchema>;
|
||||||
@ -62,6 +62,12 @@ vi.mock('./lib/flush-scheduler.js', () => ({
|
|||||||
finalFlush: vi.fn(async () => undefined),
|
finalFlush: vi.fn(async () => undefined),
|
||||||
})),
|
})),
|
||||||
}));
|
}));
|
||||||
|
vi.mock('./lib/llm-router.js', () => ({
|
||||||
|
initLlmRouter: vi.fn(() => ({ getProviders: vi.fn(() => []) })),
|
||||||
|
getLlmRouter: vi.fn(() => ({ getProviders: vi.fn(() => []) })),
|
||||||
|
isLlmRouterReady: vi.fn(() => false),
|
||||||
|
}));
|
||||||
|
vi.mock('./modules/llm/routes.js', () => ({ llmRoutes: vi.fn() }));
|
||||||
|
|
||||||
describe('cowork-service bootstrap', () => {
|
describe('cowork-service bootstrap', () => {
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
@ -81,9 +87,9 @@ describe('cowork-service bootstrap', () => {
|
|||||||
expect(opts.version).toBe('0.1.0');
|
expect(opts.version).toBe('0.1.0');
|
||||||
expect(opts.readiness).toBe(true);
|
expect(opts.readiness).toBe(true);
|
||||||
|
|
||||||
// JWT context + health + task routes = 2 register calls + 1 JWT
|
// JWT context + health + task + llm routes = 3 register calls + 1 JWT
|
||||||
expect(registerOptionalJwtContextMock).toHaveBeenCalledOnce();
|
expect(registerOptionalJwtContextMock).toHaveBeenCalledOnce();
|
||||||
expect(appMock.register).toHaveBeenCalledTimes(2);
|
expect(appMock.register).toHaveBeenCalledTimes(3);
|
||||||
expect(startServiceMock).toHaveBeenCalledWith(appMock, { port: 4009, host: '0.0.0.0' });
|
expect(startServiceMock).toHaveBeenCalledWith(appMock, { port: 4009, host: '0.0.0.0' });
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -23,6 +23,8 @@ import { config } from './lib/config.js';
|
|||||||
import { productConfig, PRODUCT_ID } from './lib/product-config.js';
|
import { productConfig, PRODUCT_ID } from './lib/product-config.js';
|
||||||
import { getIpcBridge } from './lib/ipc-bridge.js';
|
import { getIpcBridge } from './lib/ipc-bridge.js';
|
||||||
import { getFlushScheduler } from './lib/flush-scheduler.js';
|
import { getFlushScheduler } from './lib/flush-scheduler.js';
|
||||||
|
import { initLlmRouter } from './lib/llm-router.js';
|
||||||
|
import { llmRoutes } from './modules/llm/routes.js';
|
||||||
import type { JwtPayload } from './lib/request-context.js';
|
import type { JwtPayload } from './lib/request-context.js';
|
||||||
|
|
||||||
const jwtSecret = new TextEncoder().encode(config.JWT_SECRET);
|
const jwtSecret = new TextEncoder().encode(config.JWT_SECRET);
|
||||||
@ -51,6 +53,7 @@ await registerOptionalJwtContext(app, {
|
|||||||
// Register route modules
|
// Register route modules
|
||||||
await app.register(healthRoutes);
|
await app.register(healthRoutes);
|
||||||
await app.register(taskRoutes);
|
await app.register(taskRoutes);
|
||||||
|
await app.register(llmRoutes);
|
||||||
|
|
||||||
// Bootstrap endpoint (same pattern as FlowMonk, ActionTrail, etc.)
|
// Bootstrap endpoint (same pattern as FlowMonk, ActionTrail, etc.)
|
||||||
app.get('/api/bootstrap', async () => ({
|
app.get('/api/bootstrap', async () => ({
|
||||||
@ -69,6 +72,16 @@ try {
|
|||||||
app.log.warn({ err }, 'IPC bridge failed to start — running in fallback mode');
|
app.log.warn({ err }, 'IPC bridge failed to start — running in fallback mode');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize LLM router (best-effort — works without API keys in dev)
|
||||||
|
try {
|
||||||
|
const llm = initLlmRouter({
|
||||||
|
onTelemetry: (entry) => app.log.debug({ llmTelemetry: entry }, 'llm-router event'),
|
||||||
|
});
|
||||||
|
app.log.info({ providers: llm.getProviders() }, 'LLM router initialized');
|
||||||
|
} catch (err) {
|
||||||
|
app.log.warn({ err }, 'LLM router not available — no provider API keys configured');
|
||||||
|
}
|
||||||
|
|
||||||
// Start flush scheduler (periodic drain of IPC buffers → platform-service)
|
// Start flush scheduler (periodic drain of IPC buffers → platform-service)
|
||||||
const scheduler = getFlushScheduler(app.log);
|
const scheduler = getFlushScheduler(app.log);
|
||||||
if (bridge.isRunning) {
|
if (bridge.isRunning) {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user