import type { ChatCompletionRequest, PromptCategory, RouterConfig, ProviderConfig, RouteResult, RoutePlan, HealthSnapshot, } from './types.js'; import { DEFAULT_PROVIDERS, getAvailableProviders } from './registry.js'; import { classifyPrompt } from './classifier.js'; import { HealthTracker } from './health.js'; import { selectCandidates, pickNext, excludeCandidate, createRoundRobinState } from './selector.js'; import { sendChatCompletion } from './client.js'; export class LlmRouter { private readonly providers: ProviderConfig[]; private readonly health: HealthTracker; private readonly timeoutMs: number; private readonly maxRetries: number; private readonly log: (entry: TelemetryEntry) => void; private readonly roundRobinState: Map; constructor(config?: RouterConfig & { onTelemetry?: (entry: TelemetryEntry) => void }) { const allProviders = config?.providers ?? DEFAULT_PROVIDERS; this.providers = getAvailableProviders(allProviders); if (this.providers.length === 0) { throw new Error( 'No providers available. Set at least one API key env var: ' + allProviders.map(p => p.apiKeyEnv).join(', ') ); } this.health = new HealthTracker({ windowMs: config?.healthWindowMs, errorThreshold: config?.errorThreshold, rateLimitThreshold: config?.rateLimitThreshold, }); this.timeoutMs = config?.timeoutMs ?? 30_000; this.maxRetries = config?.maxRetries ?? 3; this.log = config?.onTelemetry ?? (() => {}); this.roundRobinState = createRoundRobinState(); } /** * Route a chat completion request to the best available provider. * Automatically retries on 429/5xx with fallback to other providers. */ async chat(request: ChatCompletionRequest): Promise { const startTime = Date.now(); const plan = this.planInternal(request, false); if (plan.explicit) { return this.chatWithExplicitModel(request, startTime, plan); } const category = plan.category as PromptCategory; let candidates = selectCandidates(this.providers, category, this.health); let lastError: Error | null = null; for (let attempt = 1; attempt <= this.maxRetries; attempt++) { const pick = pickNext(candidates, this.roundRobinState); if (!pick) break; const { provider, model } = pick; const attemptStart = Date.now(); try { const result = await sendChatCompletion(provider, model.id, request, this.timeoutMs); if (result.status === 429) { // Rate limited — record and try next provider this.health.record(provider.name, model.id, { timestamp: Date.now(), latencyMs: result.latencyMs, status: 'rate_limit', }); this.log({ event: 'rate_limit', provider: provider.name, model: model.id, attempt, latencyMs: result.latencyMs, category, }); candidates = excludeCandidate(candidates, provider.name, model.id); continue; } // Success this.health.record(provider.name, model.id, { timestamp: Date.now(), latencyMs: result.latencyMs, status: 'success', }); this.log({ event: 'success', provider: provider.name, model: model.id, attempt, latencyMs: result.latencyMs, category, tokens: result.response.usage?.total_tokens, }); return { response: result.response, provider: provider.name, model: model.id, totalLatencyMs: Date.now() - startTime, attempts: attempt, }; } catch (err) { lastError = err instanceof Error ? err : new Error(String(err)); const attemptLatency = Date.now() - attemptStart; this.health.record(provider.name, model.id, { timestamp: Date.now(), latencyMs: attemptLatency, status: 'error', }); this.log({ event: 'error', provider: provider.name, model: model.id, attempt, latencyMs: attemptLatency, category, error: lastError.message, }); candidates = excludeCandidate(candidates, provider.name, model.id); } } throw new Error( `All providers exhausted after ${this.maxRetries} attempts. Last error: ${lastError?.message ?? 'unknown'}` ); } /** * Handle explicit provider:model routing (bypass classifier). */ private async chatWithExplicitModel( request: ChatCompletionRequest, startTime: number, plan?: RoutePlan ): Promise { const resolved = plan ?? this.plan(request); const provider = resolved.provider; const modelId = resolved.model.id; try { const result = await sendChatCompletion(provider, modelId, request, this.timeoutMs); if (result.status === 429) { this.health.record(provider.name, modelId, { timestamp: Date.now(), latencyMs: result.latencyMs, status: 'rate_limit', }); this.log({ event: 'rate_limit', provider: provider.name, model: modelId, attempt: 1, latencyMs: result.latencyMs, category: 'explicit', }); throw new Error(`Rate limited by ${provider.name} for model ${modelId}`); } this.health.record(provider.name, modelId, { timestamp: Date.now(), latencyMs: result.latencyMs, status: 'success', }); this.log({ event: 'success', provider: provider.name, model: modelId, attempt: 1, latencyMs: result.latencyMs, category: 'explicit', tokens: result.response.usage?.total_tokens, }); return { response: result.response, provider: provider.name, model: modelId, totalLatencyMs: Date.now() - startTime, attempts: 1, }; } catch (err) { // Re-throw rate-limit errors (already logged above) if (err instanceof Error && err.message.startsWith('Rate limited by')) { throw err; } const latency = Date.now() - startTime; this.health.record(provider.name, modelId, { timestamp: Date.now(), latencyMs: latency, status: 'error', }); this.log({ event: 'error', provider: provider.name, model: modelId, attempt: 1, latencyMs: latency, category: 'explicit', error: err instanceof Error ? err.message : String(err), }); throw err; } } plan(request: ChatCompletionRequest): RoutePlan { return this.planInternal(request, true); } private planInternal(request: ChatCompletionRequest, advanceRoundRobin: boolean): RoutePlan { const explicit = this.resolveExplicitModel(request.model); if (explicit) { return { provider: explicit.provider, model: explicit.model, category: 'explicit', explicit: true, }; } const classification = classifyPrompt(request.messages); const candidates = selectCandidates(this.providers, classification.category, this.health); if (candidates.length === 0) { throw new Error('No healthy providers available for routing'); } const pick = advanceRoundRobin ? pickNext(candidates, this.roundRobinState) : (candidates[0] ?? null); if (!pick) { throw new Error('No provider available for routing'); } return { provider: pick.provider, model: pick.model, category: classification.category, explicit: false, }; } private resolveExplicitModel( model?: string ): { provider: ProviderConfig; model: RoutePlan['model'] } | null { if (!model) return null; if (model.includes(':') || model.includes('/')) { const { providerName, modelId } = parseExplicitModel(model); const provider = this.providers.find(p => p.name === providerName); if (!provider) { throw new Error( `Provider "${providerName}" not found. Available: ${this.providers.map(p => p.name).join(', ')}` ); } const matchedModel = provider.models.find(candidate => candidate.id === modelId); if (!matchedModel) { throw new Error( `Model "${modelId}" not found for provider "${providerName}". Available: ${provider.models .map(candidate => candidate.id) .join(', ')}` ); } return { provider, model: matchedModel }; } const matches = this.providers.flatMap(provider => provider.models .filter(candidate => candidate.id === model) .map(candidate => ({ provider, model: candidate })) ); if (matches.length === 1) { return matches[0]!; } if (matches.length > 1) { throw new Error( `Model "${model}" is available on multiple providers. Use provider:model format instead.` ); } return null; } /** Get health snapshots for all tracked provider+model pairs. */ getHealth(): HealthSnapshot[] { return this.health.allSnapshots(); } /** Get list of available (configured) providers. */ getProviders(): string[] { return this.providers.map(p => p.name); } /** Reset health tracking data. */ resetHealth(): void { this.health.reset(); } } function parseExplicitModel(raw: string): { providerName: string; modelId: string } { const colonIdx = raw.indexOf(':'); const slashIdx = raw.indexOf('/'); let sepIdx: number; if (colonIdx === -1 && slashIdx === -1) { sepIdx = -1; } else if (colonIdx === -1) { sepIdx = slashIdx; } else if (slashIdx === -1) { sepIdx = colonIdx; } else { sepIdx = Math.min(colonIdx, slashIdx); } return { providerName: sepIdx === -1 ? raw : raw.slice(0, sepIdx), modelId: sepIdx === -1 ? '' : raw.slice(sepIdx + 1), }; } // ── Telemetry types ──────────────────────────────────────────── export interface TelemetryEntry { event: 'success' | 'rate_limit' | 'error'; provider: string; model: string; attempt: number; latencyMs: number; category: string; tokens?: number; error?: string; }