From 43bf51a290e9c26dbcf758604ecaf1f4ac123327 Mon Sep 17 00:00:00 2001 From: Saravana Achu Mac Date: Sun, 5 Apr 2026 12:49:47 -0700 Subject: [PATCH] feat(llm): add Perplexity, Gemini providers and createFallbackChain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add PerplexityProvider (OpenAI-compatible, reads PERPLEXITY_API_KEY) - Add GeminiProvider (Google Generative Language API adapter, reads GEMINI_API_KEY) - Add createFallbackChain() — ordered provider chain, skips unconfigured, aggregates errors; allows any app to replace custom LLM fallback loops - Extend LLMProviderType with 'perplexity' | 'gemini' - Update factory to resolve and instantiate new provider types - Add PAID_PROVIDERS to llm-router registry (OpenAI, Perplexity) for apps using round-robin routing alongside free-tier providers - 27 tests covering fallback chain, new providers, error/edge cases Co-Authored-By: Claude Sonnet 4.6 --- packages/llm-router/src/index.ts | 7 +- packages/llm-router/src/registry.ts | 57 ++++++ packages/llm/src/__tests__/fallback.test.ts | 99 ++++++++++ packages/llm/src/__tests__/providers.test.ts | 181 +++++++++++++++++++ packages/llm/src/factory.ts | 12 +- packages/llm/src/fallback.ts | 36 ++++ packages/llm/src/index.ts | 3 + packages/llm/src/providers/gemini.ts | 121 +++++++++++++ packages/llm/src/providers/perplexity.ts | 74 ++++++++ packages/llm/src/types.ts | 2 +- 10 files changed, 589 insertions(+), 3 deletions(-) create mode 100644 packages/llm/src/__tests__/fallback.test.ts create mode 100644 packages/llm/src/__tests__/providers.test.ts create mode 100644 packages/llm/src/fallback.ts create mode 100644 packages/llm/src/providers/gemini.ts create mode 100644 packages/llm/src/providers/perplexity.ts diff --git a/packages/llm-router/src/index.ts b/packages/llm-router/src/index.ts index 74ab47bd..76602548 100644 --- a/packages/llm-router/src/index.ts +++ b/packages/llm-router/src/index.ts @@ -1,7 +1,12 @@ export { LlmRouter } from './router.js'; export type { TelemetryEntry } from './router.js'; -export { DEFAULT_PROVIDERS, createLocalOllamaProvider, getAvailableProviders } from './registry.js'; +export { + DEFAULT_PROVIDERS, + PAID_PROVIDERS, + createLocalOllamaProvider, + getAvailableProviders, +} from './registry.js'; export { classifyPrompt } from './classifier.js'; export { HealthTracker } from './health.js'; export { selectCandidates, pickNext, excludeCandidate, createRoundRobinState } from './selector.js'; diff --git a/packages/llm-router/src/registry.ts b/packages/llm-router/src/registry.ts index 5db490bc..cc1f16b8 100644 --- a/packages/llm-router/src/registry.ts +++ b/packages/llm-router/src/registry.ts @@ -1,5 +1,62 @@ import type { ModelConfig, PromptCategory, ProviderConfig } from './types.js'; +/** + * Paid provider configurations (opt-in via API key env vars). + * Add to your RouterConfig.providers to include alongside free-tier providers. + */ +export const PAID_PROVIDERS: ProviderConfig[] = [ + // ── OpenAI ─────────────────────────────────────────────────── + { + name: 'openai', + baseUrl: 'https://api.openai.com/v1', + apiKeyEnv: 'OPENAI_API_KEY', + rpmLimit: 500, + tpmLimit: 150_000, + models: [ + { + id: 'gpt-4o-mini', + label: 'GPT-4o Mini', + contextWindow: 128_000, + strengths: ['general', 'reasoning', 'code'], + speedTier: 1, + }, + { + id: 'gpt-4o', + label: 'GPT-4o', + contextWindow: 128_000, + strengths: ['general', 'reasoning', 'code', 'creative'], + speedTier: 2, + }, + ], + }, + + // ── Perplexity ─────────────────────────────────────────────── + // Real-time web search grounding — OpenAI-compatible endpoint + { + name: 'perplexity', + baseUrl: 'https://api.perplexity.ai', + apiKeyEnv: 'PERPLEXITY_API_KEY', + rpmLimit: 50, + tpmLimit: 0, + models: [ + { + id: 'sonar', + label: 'Sonar (web search)', + contextWindow: 127_072, + strengths: ['general', 'reasoning'], + speedTier: 2, + }, + { + id: 'sonar-pro', + label: 'Sonar Pro (web search)', + contextWindow: 200_000, + strengths: ['general', 'reasoning'], + speedTier: 3, + }, + ], + }, +]; + /** * Default free-tier provider configurations. * All use OpenAI-compatible /v1/chat/completions endpoints. diff --git a/packages/llm/src/__tests__/fallback.test.ts b/packages/llm/src/__tests__/fallback.test.ts new file mode 100644 index 00000000..89d41525 --- /dev/null +++ b/packages/llm/src/__tests__/fallback.test.ts @@ -0,0 +1,99 @@ +/** + * Tests for createFallbackChain. + */ + +import { describe, it, expect } from 'vitest'; +import { createFallbackChain } from '../fallback.js'; +import { MockLLMProvider } from '../providers/mock.js'; +import type { ChatCompletionResponse } from '../types.js'; + +const makeResponse = (content: string): ChatCompletionResponse => ({ + content, + model: 'mock', + finishReason: 'stop', + usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 }, +}); + +describe('createFallbackChain', () => { + it('isConfigured returns true when at least one provider is configured', () => { + const a = new MockLLMProvider(); + const chain = createFallbackChain([a]); + expect(chain.isConfigured()).toBe(true); + }); + + it('isConfigured returns false when no providers are configured', () => { + const unconfigured = { + isConfigured: () => false, + chatCompletion: async () => { + throw new Error('not configured'); + }, + }; + const chain = createFallbackChain([unconfigured]); + expect(chain.isConfigured()).toBe(false); + }); + + it('returns response from first configured provider', async () => { + const a = new MockLLMProvider([makeResponse('from-a')]); + const b = new MockLLMProvider([makeResponse('from-b')]); + const chain = createFallbackChain([a, b]); + + const result = await chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] }); + expect(result.content).toBe('from-a'); + expect(b.calls).toHaveLength(0); + }); + + it('falls back to second provider when first throws', async () => { + const a = { + isConfigured: () => true, + chatCompletion: async (): Promise => { + throw new Error('a failed'); + }, + }; + const b = new MockLLMProvider([makeResponse('from-b')]); + const chain = createFallbackChain([a, b]); + + const result = await chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] }); + expect(result.content).toBe('from-b'); + }); + + it('skips unconfigured providers', async () => { + const unconfigured = { + isConfigured: () => false, + chatCompletion: async (): Promise => { + throw new Error('should not be called'); + }, + }; + const b = new MockLLMProvider([makeResponse('from-b')]); + const chain = createFallbackChain([unconfigured, b]); + + const result = await chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] }); + expect(result.content).toBe('from-b'); + }); + + it('throws with all error messages when every provider fails', async () => { + const a = { + isConfigured: () => true, + chatCompletion: async (): Promise => { + throw new Error('a failed'); + }, + }; + const b = { + isConfigured: () => true, + chatCompletion: async (): Promise => { + throw new Error('b failed'); + }, + }; + const chain = createFallbackChain([a, b]); + + await expect( + chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] }) + ).rejects.toThrow('All providers failed: a failed | b failed'); + }); + + it('throws "No providers configured" when list is empty', async () => { + const chain = createFallbackChain([]); + await expect( + chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] }) + ).rejects.toThrow('No providers configured'); + }); +}); diff --git a/packages/llm/src/__tests__/providers.test.ts b/packages/llm/src/__tests__/providers.test.ts new file mode 100644 index 00000000..7ba1510d --- /dev/null +++ b/packages/llm/src/__tests__/providers.test.ts @@ -0,0 +1,181 @@ +/** + * Tests for PerplexityProvider and GeminiProvider. + * Uses vi.stubGlobal to mock fetch — no real API calls. + */ + +import { describe, it, expect, vi, afterEach } from 'vitest'; +import { PerplexityProvider } from '../providers/perplexity.js'; +import { GeminiProvider } from '../providers/gemini.js'; + +const makeOpenAIResponse = (content: string, model = 'test-model') => ({ + choices: [{ message: { content }, finish_reason: 'stop' }], + model, + usage: { prompt_tokens: 5, completion_tokens: 10, total_tokens: 15 }, +}); + +const makeGeminiResponse = (text: string) => ({ + candidates: [{ content: { parts: [{ text }] }, finishReason: 'STOP' }], + usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 10, totalTokenCount: 15 }, +}); + +afterEach(() => { + vi.restoreAllMocks(); + vi.unstubAllEnvs(); +}); + +// ── PerplexityProvider ────────────────────────────────────────── + +describe('PerplexityProvider', () => { + it('isConfigured false without API key', () => { + const p = new PerplexityProvider({ apiKey: '' }); + expect(p.isConfigured()).toBe(false); + }); + + it('isConfigured true with API key', () => { + const p = new PerplexityProvider({ apiKey: 'test-key' }); + expect(p.isConfigured()).toBe(true); + }); + + it('reads apiKey from env', () => { + vi.stubEnv('PERPLEXITY_API_KEY', 'env-key'); + const p = new PerplexityProvider(); + expect(p.isConfigured()).toBe(true); + }); + + it('throws when not configured', async () => { + const p = new PerplexityProvider({ apiKey: '' }); + await expect(p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })).rejects.toThrow( + 'Perplexity is not configured' + ); + }); + + it('calls Perplexity API and maps response', async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => makeOpenAIResponse('analysis result', 'sonar'), + }); + vi.stubGlobal('fetch', fetchMock); + + const p = new PerplexityProvider({ apiKey: 'test-key', model: 'sonar' }); + const result = await p.chatCompletion({ + messages: [{ role: 'user', content: 'analyse BTC' }], + temperature: 0.2, + }); + + expect(result.content).toBe('analysis result'); + expect(result.model).toBe('sonar'); + expect(result.finishReason).toBe('stop'); + expect(result.usage.totalTokens).toBe(15); + + const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit]; + expect(url).toBe('https://api.perplexity.ai/chat/completions'); + expect((init.headers as Record)['Authorization']).toBe('Bearer test-key'); + }); + + it('throws on non-ok response', async () => { + vi.stubGlobal( + 'fetch', + vi.fn().mockResolvedValue({ + ok: false, + status: 429, + text: async () => 'rate limited', + }) + ); + + const p = new PerplexityProvider({ apiKey: 'test-key' }); + await expect(p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })).rejects.toThrow( + 'Perplexity error 429' + ); + }); +}); + +// ── GeminiProvider ────────────────────────────────────────────── + +describe('GeminiProvider', () => { + it('isConfigured false without API key', () => { + const p = new GeminiProvider({ apiKey: '' }); + expect(p.isConfigured()).toBe(false); + }); + + it('isConfigured true with API key', () => { + const p = new GeminiProvider({ apiKey: 'test-key' }); + expect(p.isConfigured()).toBe(true); + }); + + it('reads apiKey from env', () => { + vi.stubEnv('GEMINI_API_KEY', 'env-key'); + const p = new GeminiProvider(); + expect(p.isConfigured()).toBe(true); + }); + + it('throws when not configured', async () => { + const p = new GeminiProvider({ apiKey: '' }); + await expect(p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })).rejects.toThrow( + 'Gemini is not configured' + ); + }); + + it('calls Gemini API and maps response', async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => makeGeminiResponse('gemini analysis'), + }); + vi.stubGlobal('fetch', fetchMock); + + const p = new GeminiProvider({ apiKey: 'test-key', model: 'gemini-1.5-flash' }); + const result = await p.chatCompletion({ + messages: [ + { role: 'system', content: 'You are a trading assistant.' }, + { role: 'user', content: 'analyse BTC' }, + ], + temperature: 0.2, + }); + + expect(result.content).toBe('gemini analysis'); + expect(result.model).toBe('gemini-1.5-flash'); + expect(result.finishReason).toBe('stop'); + expect(result.usage.totalTokens).toBe(15); + + const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit]; + expect(url).toContain('generativelanguage.googleapis.com'); + expect(url).toContain('gemini-1.5-flash'); + expect(url).toContain('test-key'); + + const body = JSON.parse(init.body as string); + expect(body.systemInstruction.parts[0].text).toBe('You are a trading assistant.'); + expect(body.contents[0].role).toBe('user'); + }); + + it('maps MAX_TOKENS finish reason to length', async () => { + vi.stubGlobal( + 'fetch', + vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + candidates: [{ content: { parts: [{ text: 'truncated' }] }, finishReason: 'MAX_TOKENS' }], + usageMetadata: { promptTokenCount: 1, candidatesTokenCount: 1, totalTokenCount: 2 }, + }), + }) + ); + + const p = new GeminiProvider({ apiKey: 'test-key' }); + const result = await p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] }); + expect(result.finishReason).toBe('length'); + }); + + it('throws on non-ok response', async () => { + vi.stubGlobal( + 'fetch', + vi.fn().mockResolvedValue({ + ok: false, + status: 400, + text: async () => 'bad request', + }) + ); + + const p = new GeminiProvider({ apiKey: 'test-key' }); + await expect(p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })).rejects.toThrow( + 'Gemini error 400' + ); + }); +}); diff --git a/packages/llm/src/factory.ts b/packages/llm/src/factory.ts index 5a60e44c..7b59c2be 100644 --- a/packages/llm/src/factory.ts +++ b/packages/llm/src/factory.ts @@ -6,8 +6,10 @@ */ import { AzureOpenAIProvider } from './providers/azure-openai.js'; +import { GeminiProvider } from './providers/gemini.js'; import { MockLLMProvider } from './providers/mock.js'; import { OpenAIProvider } from './providers/openai.js'; +import { PerplexityProvider } from './providers/perplexity.js'; import type { LLMProvider, LLMProviderType } from './types.js'; let _provider: LLMProvider | null = null; @@ -20,6 +22,8 @@ function resolveProviderType(): LLMProviderType { const explicit = (process.env.LLM_PROVIDER || process.env.OPENAI_PROVIDER || '').toLowerCase(); if (explicit === 'azure') return 'azure'; if (explicit === 'openai') return 'openai'; + if (explicit === 'perplexity') return 'perplexity'; + if (explicit === 'gemini') return 'gemini'; if (explicit === 'mock') return 'mock'; const azureEndpoint = process.env.AZURE_OPENAI_ENDPOINT; @@ -51,10 +55,16 @@ export function createLLMProvider(type: LLMProviderType): LLMProvider { return new AzureOpenAIProvider(); case 'openai': return new OpenAIProvider(); + case 'perplexity': + return new PerplexityProvider(); + case 'gemini': + return new GeminiProvider(); case 'mock': return new MockLLMProvider(); default: - throw new Error(`Unknown LLM_PROVIDER: '${type}'. Valid: azure, openai, mock`); + throw new Error( + `Unknown LLM_PROVIDER: '${type}'. Valid: azure, openai, perplexity, gemini, mock` + ); } } diff --git a/packages/llm/src/fallback.ts b/packages/llm/src/fallback.ts new file mode 100644 index 00000000..1f4fcb3f --- /dev/null +++ b/packages/llm/src/fallback.ts @@ -0,0 +1,36 @@ +/** + * Fallback chain utility. + * + * Wraps an ordered list of LLMProviders into a single LLMProvider that + * tries each in sequence, skipping unconfigured ones, and moves to the + * next on any error. Throws only when all providers are exhausted. + */ + +import type { ChatCompletionRequest, ChatCompletionResponse, LLMProvider } from './types.js'; + +export function createFallbackChain(providers: LLMProvider[]): LLMProvider { + return { + isConfigured(): boolean { + return providers.some(p => p.isConfigured()); + }, + + async chatCompletion(req: ChatCompletionRequest): Promise { + const errors: string[] = []; + + for (const provider of providers) { + if (!provider.isConfigured()) continue; + try { + return await provider.chatCompletion(req); + } catch (err) { + errors.push(err instanceof Error ? err.message : String(err)); + } + } + + throw new Error( + errors.length > 0 + ? `All providers failed: ${errors.join(' | ')}` + : 'No providers configured' + ); + }, + }; +} diff --git a/packages/llm/src/index.ts b/packages/llm/src/index.ts index 6cf88a8f..416f9f9f 100644 --- a/packages/llm/src/index.ts +++ b/packages/llm/src/index.ts @@ -8,6 +8,9 @@ export type { } from './types.js'; export { getLLM, createLLMProvider, setLLM, _resetLLM } from './factory.js'; +export { createFallbackChain } from './fallback.js'; export { AzureOpenAIProvider, type AzureOpenAIConfig } from './providers/azure-openai.js'; +export { GeminiProvider, type GeminiConfig } from './providers/gemini.js'; export { OpenAIProvider, type OpenAIConfig } from './providers/openai.js'; +export { PerplexityProvider, type PerplexityConfig } from './providers/perplexity.js'; export { MockLLMProvider } from './providers/mock.js'; diff --git a/packages/llm/src/providers/gemini.ts b/packages/llm/src/providers/gemini.ts new file mode 100644 index 00000000..2a068eb8 --- /dev/null +++ b/packages/llm/src/providers/gemini.ts @@ -0,0 +1,121 @@ +/** + * Google Gemini LLM provider. + * + * Uses Google's Generative Language API (not OpenAI-compatible). + * Reads config from GEMINI_API_KEY, GEMINI_MODEL. + */ + +import type { + ChatCompletionRequest, + ChatCompletionResponse, + ChatMessage, + LLMProvider, +} from '../types.js'; + +export interface GeminiConfig { + apiKey: string; + model?: string; +} + +interface GeminiPart { + text: string; +} + +interface GeminiContent { + role: 'user' | 'model'; + parts: GeminiPart[]; +} + +export class GeminiProvider implements LLMProvider { + private config: GeminiConfig; + + constructor(config?: Partial) { + this.config = { + apiKey: config?.apiKey || process.env.GEMINI_API_KEY || '', + model: config?.model || process.env.GEMINI_MODEL || 'gemini-1.5-flash', + }; + } + + isConfigured(): boolean { + return Boolean(this.config.apiKey); + } + + async chatCompletion(req: ChatCompletionRequest): Promise { + if (!this.isConfigured()) { + throw new Error('Gemini is not configured (missing GEMINI_API_KEY)'); + } + + const model = req.model || this.config.model!; + const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(model)}:generateContent?key=${this.config.apiKey}`; + + const { systemInstruction, contents } = this.convertMessages(req.messages); + + const body: Record = { contents }; + if (systemInstruction) { + body.systemInstruction = { parts: [{ text: systemInstruction }] }; + } + if (req.temperature !== undefined) { + body.generationConfig = { temperature: req.temperature }; + } + + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + }); + + if (!response.ok) { + const text = await response.text(); + throw new Error(`Gemini error ${response.status}: ${text}`); + } + + const data = (await response.json()) as { + candidates: Array<{ + content: { parts: GeminiPart[] }; + finishReason: string; + }>; + usageMetadata?: { + promptTokenCount: number; + candidatesTokenCount: number; + totalTokenCount: number; + }; + }; + + const content = data.candidates[0]?.content?.parts?.map(p => p.text).join('') ?? ''; + const finishReason = data.candidates[0]?.finishReason; + + return { + content, + model, + finishReason: + finishReason === 'STOP' ? 'stop' : finishReason === 'MAX_TOKENS' ? 'length' : null, + usage: { + promptTokens: data.usageMetadata?.promptTokenCount ?? 0, + completionTokens: data.usageMetadata?.candidatesTokenCount ?? 0, + totalTokens: data.usageMetadata?.totalTokenCount ?? 0, + }, + }; + } + + private convertMessages(messages: ChatMessage[]): { + systemInstruction: string | null; + contents: GeminiContent[]; + } { + const systemMessages = messages.filter(m => m.role === 'system'); + const systemInstruction = systemMessages.map(m => m.content).join('\n') || null; + + const contents: GeminiContent[] = messages + .filter(m => m.role !== 'system') + .map(m => ({ + role: m.role === 'assistant' ? 'model' : 'user', + parts: [{ text: m.content }], + })); + + // Gemini requires at least one user turn + if (contents.length === 0) { + contents.push({ role: 'user', parts: [{ text: '' }] }); + } + + return { systemInstruction, contents }; + } +} diff --git a/packages/llm/src/providers/perplexity.ts b/packages/llm/src/providers/perplexity.ts new file mode 100644 index 00000000..b778311e --- /dev/null +++ b/packages/llm/src/providers/perplexity.ts @@ -0,0 +1,74 @@ +/** + * Perplexity LLM provider. + * + * Uses Perplexity's OpenAI-compatible API with real-time web search. + * Reads config from PERPLEXITY_API_KEY, PERPLEXITY_MODEL. + */ + +import type { ChatCompletionRequest, ChatCompletionResponse, LLMProvider } from '../types.js'; + +export interface PerplexityConfig { + apiKey: string; + model?: string; +} + +export class PerplexityProvider implements LLMProvider { + private config: PerplexityConfig; + + constructor(config?: Partial) { + this.config = { + apiKey: config?.apiKey || process.env.PERPLEXITY_API_KEY || '', + model: config?.model || process.env.PERPLEXITY_MODEL || 'sonar', + }; + } + + isConfigured(): boolean { + return Boolean(this.config.apiKey); + } + + async chatCompletion(req: ChatCompletionRequest): Promise { + if (!this.isConfigured()) { + throw new Error('Perplexity is not configured (missing PERPLEXITY_API_KEY)'); + } + + const body = { + model: req.model || this.config.model, + messages: req.messages, + temperature: req.temperature, + max_tokens: req.maxTokens, + top_p: req.topP, + }; + + const response = await fetch('https://api.perplexity.ai/chat/completions', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${this.config.apiKey}`, + }, + body: JSON.stringify(body), + }); + + if (!response.ok) { + const text = await response.text(); + throw new Error(`Perplexity error ${response.status}: ${text}`); + } + + const data = (await response.json()) as { + choices: Array<{ message: { content: string }; finish_reason: string }>; + model: string; + usage: { prompt_tokens: number; completion_tokens: number; total_tokens: number }; + }; + + return { + content: data.choices[0]?.message?.content ?? '', + model: data.model, + finishReason: + (data.choices[0]?.finish_reason as ChatCompletionResponse['finishReason']) ?? null, + usage: { + promptTokens: data.usage?.prompt_tokens ?? 0, + completionTokens: data.usage?.completion_tokens ?? 0, + totalTokens: data.usage?.total_tokens ?? 0, + }, + }; + } +} diff --git a/packages/llm/src/types.ts b/packages/llm/src/types.ts index ad5ad1e6..aa3e368d 100644 --- a/packages/llm/src/types.ts +++ b/packages/llm/src/types.ts @@ -45,4 +45,4 @@ export interface TokenUsage { totalTokens: number; } -export type LLMProviderType = 'azure' | 'openai' | 'mock'; +export type LLMProviderType = 'azure' | 'openai' | 'perplexity' | 'gemini' | 'mock';