feat(llm): add Perplexity, Gemini providers and createFallbackChain

- Add PerplexityProvider (OpenAI-compatible, reads PERPLEXITY_API_KEY) - Add GeminiProvider (Google Generative Language API adapter, reads GEMINI_API_KEY) - Add createFallbackChain() — ordered provider chain, skips unconfigured, aggregates errors; allows any app to replace custom LLM fallback loops - Extend LLMProviderType with 'perplexity' | 'gemini' - Update factory to resolve and instantiate new provider types - Add PAID_PROVIDERS to llm-router registry (OpenAI, Perplexity) for apps using round-robin routing alongside free-tier providers - 27 tests covering fallback chain, new providers, error/edge cases Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 12:49:47 -07:00 · 2026-04-05 12:49:47 -07:00 · 43bf51a290
commit 43bf51a290
parent a762c5b07f
10 changed files with 589 additions and 3 deletions
--- a/packages/llm-router/src/index.ts
+++ b/packages/llm-router/src/index.ts
@ -1,7 +1,12 @@
 export { LlmRouter } from './router.js';
 export type { TelemetryEntry } from './router.js';

-export { DEFAULT_PROVIDERS, createLocalOllamaProvider, getAvailableProviders } from './registry.js';
+export {
+  DEFAULT_PROVIDERS,
+  PAID_PROVIDERS,
+  createLocalOllamaProvider,
+  getAvailableProviders,
+} from './registry.js';
 export { classifyPrompt } from './classifier.js';
 export { HealthTracker } from './health.js';
 export { selectCandidates, pickNext, excludeCandidate, createRoundRobinState } from './selector.js';
--- a/packages/llm-router/src/registry.ts
+++ b/packages/llm-router/src/registry.ts
@ -1,5 +1,62 @@
 import type { ModelConfig, PromptCategory, ProviderConfig } from './types.js';

+/**
+ * Paid provider configurations (opt-in via API key env vars).
+ * Add to your RouterConfig.providers to include alongside free-tier providers.
+ */
+export const PAID_PROVIDERS: ProviderConfig[] = [
+  // ── OpenAI ───────────────────────────────────────────────────
+  {
+    name: 'openai',
+    baseUrl: 'https://api.openai.com/v1',
+    apiKeyEnv: 'OPENAI_API_KEY',
+    rpmLimit: 500,
+    tpmLimit: 150_000,
+    models: [
+      {
+        id: 'gpt-4o-mini',
+        label: 'GPT-4o Mini',
+        contextWindow: 128_000,
+        strengths: ['general', 'reasoning', 'code'],
+        speedTier: 1,
+      },
+      {
+        id: 'gpt-4o',
+        label: 'GPT-4o',
+        contextWindow: 128_000,
+        strengths: ['general', 'reasoning', 'code', 'creative'],
+        speedTier: 2,
+      },
+    ],
+  },
+
+  // ── Perplexity ───────────────────────────────────────────────
+  // Real-time web search grounding — OpenAI-compatible endpoint
+  {
+    name: 'perplexity',
+    baseUrl: 'https://api.perplexity.ai',
+    apiKeyEnv: 'PERPLEXITY_API_KEY',
+    rpmLimit: 50,
+    tpmLimit: 0,
+    models: [
+      {
+        id: 'sonar',
+        label: 'Sonar (web search)',
+        contextWindow: 127_072,
+        strengths: ['general', 'reasoning'],
+        speedTier: 2,
+      },
+      {
+        id: 'sonar-pro',
+        label: 'Sonar Pro (web search)',
+        contextWindow: 200_000,
+        strengths: ['general', 'reasoning'],
+        speedTier: 3,
+      },
+    ],
+  },
+];
+
 /**
 * Default free-tier provider configurations.
 * All use OpenAI-compatible /v1/chat/completions endpoints.
--- a/packages/llm/src/tests/fallback.test.ts
+++ b/packages/llm/src/tests/fallback.test.ts
@ -0,0 +1,99 @@
+/**
+ * Tests for createFallbackChain.
+ */
+
+import { describe, it, expect } from 'vitest';
+import { createFallbackChain } from '../fallback.js';
+import { MockLLMProvider } from '../providers/mock.js';
+import type { ChatCompletionResponse } from '../types.js';
+
+const makeResponse = (content: string): ChatCompletionResponse => ({
+  content,
+  model: 'mock',
+  finishReason: 'stop',
+  usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
+});
+
+describe('createFallbackChain', () => {
+  it('isConfigured returns true when at least one provider is configured', () => {
+    const a = new MockLLMProvider();
+    const chain = createFallbackChain([a]);
+    expect(chain.isConfigured()).toBe(true);
+  });
+
+  it('isConfigured returns false when no providers are configured', () => {
+    const unconfigured = {
+      isConfigured: () => false,
+      chatCompletion: async () => {
+        throw new Error('not configured');
+      },
+    };
+    const chain = createFallbackChain([unconfigured]);
+    expect(chain.isConfigured()).toBe(false);
+  });
+
+  it('returns response from first configured provider', async () => {
+    const a = new MockLLMProvider([makeResponse('from-a')]);
+    const b = new MockLLMProvider([makeResponse('from-b')]);
+    const chain = createFallbackChain([a, b]);
+
+    const result = await chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] });
+    expect(result.content).toBe('from-a');
+    expect(b.calls).toHaveLength(0);
+  });
+
+  it('falls back to second provider when first throws', async () => {
+    const a = {
+      isConfigured: () => true,
+      chatCompletion: async (): Promise<ChatCompletionResponse> => {
+        throw new Error('a failed');
+      },
+    };
+    const b = new MockLLMProvider([makeResponse('from-b')]);
+    const chain = createFallbackChain([a, b]);
+
+    const result = await chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] });
+    expect(result.content).toBe('from-b');
+  });
+
+  it('skips unconfigured providers', async () => {
+    const unconfigured = {
+      isConfigured: () => false,
+      chatCompletion: async (): Promise<ChatCompletionResponse> => {
+        throw new Error('should not be called');
+      },
+    };
+    const b = new MockLLMProvider([makeResponse('from-b')]);
+    const chain = createFallbackChain([unconfigured, b]);
+
+    const result = await chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] });
+    expect(result.content).toBe('from-b');
+  });
+
+  it('throws with all error messages when every provider fails', async () => {
+    const a = {
+      isConfigured: () => true,
+      chatCompletion: async (): Promise<ChatCompletionResponse> => {
+        throw new Error('a failed');
+      },
+    };
+    const b = {
+      isConfigured: () => true,
+      chatCompletion: async (): Promise<ChatCompletionResponse> => {
+        throw new Error('b failed');
+      },
+    };
+    const chain = createFallbackChain([a, b]);
+
+    await expect(
+      chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })
+    ).rejects.toThrow('All providers failed: a failed | b failed');
+  });
+
+  it('throws "No providers configured" when list is empty', async () => {
+    const chain = createFallbackChain([]);
+    await expect(
+      chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })
+    ).rejects.toThrow('No providers configured');
+  });
+});
--- a/packages/llm/src/tests/providers.test.ts
+++ b/packages/llm/src/tests/providers.test.ts
@ -0,0 +1,181 @@
+/**
+ * Tests for PerplexityProvider and GeminiProvider.
+ * Uses vi.stubGlobal to mock fetch — no real API calls.
+ */
+
+import { describe, it, expect, vi, afterEach } from 'vitest';
+import { PerplexityProvider } from '../providers/perplexity.js';
+import { GeminiProvider } from '../providers/gemini.js';
+
+const makeOpenAIResponse = (content: string, model = 'test-model') => ({
+  choices: [{ message: { content }, finish_reason: 'stop' }],
+  model,
+  usage: { prompt_tokens: 5, completion_tokens: 10, total_tokens: 15 },
+});
+
+const makeGeminiResponse = (text: string) => ({
+  candidates: [{ content: { parts: [{ text }] }, finishReason: 'STOP' }],
+  usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 10, totalTokenCount: 15 },
+});
+
+afterEach(() => {
+  vi.restoreAllMocks();
+  vi.unstubAllEnvs();
+});
+
+// ── PerplexityProvider ──────────────────────────────────────────
+
+describe('PerplexityProvider', () => {
+  it('isConfigured false without API key', () => {
+    const p = new PerplexityProvider({ apiKey: '' });
+    expect(p.isConfigured()).toBe(false);
+  });
+
+  it('isConfigured true with API key', () => {
+    const p = new PerplexityProvider({ apiKey: 'test-key' });
+    expect(p.isConfigured()).toBe(true);
+  });
+
+  it('reads apiKey from env', () => {
+    vi.stubEnv('PERPLEXITY_API_KEY', 'env-key');
+    const p = new PerplexityProvider();
+    expect(p.isConfigured()).toBe(true);
+  });
+
+  it('throws when not configured', async () => {
+    const p = new PerplexityProvider({ apiKey: '' });
+    await expect(p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })).rejects.toThrow(
+      'Perplexity is not configured'
+    );
+  });
+
+  it('calls Perplexity API and maps response', async () => {
+    const fetchMock = vi.fn().mockResolvedValue({
+      ok: true,
+      json: async () => makeOpenAIResponse('analysis result', 'sonar'),
+    });
+    vi.stubGlobal('fetch', fetchMock);
+
+    const p = new PerplexityProvider({ apiKey: 'test-key', model: 'sonar' });
+    const result = await p.chatCompletion({
+      messages: [{ role: 'user', content: 'analyse BTC' }],
+      temperature: 0.2,
+    });
+
+    expect(result.content).toBe('analysis result');
+    expect(result.model).toBe('sonar');
+    expect(result.finishReason).toBe('stop');
+    expect(result.usage.totalTokens).toBe(15);
+
+    const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit];
+    expect(url).toBe('https://api.perplexity.ai/chat/completions');
+    expect((init.headers as Record<string, string>)['Authorization']).toBe('Bearer test-key');
+  });
+
+  it('throws on non-ok response', async () => {
+    vi.stubGlobal(
+      'fetch',
+      vi.fn().mockResolvedValue({
+        ok: false,
+        status: 429,
+        text: async () => 'rate limited',
+      })
+    );
+
+    const p = new PerplexityProvider({ apiKey: 'test-key' });
+    await expect(p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })).rejects.toThrow(
+      'Perplexity error 429'
+    );
+  });
+});
+
+// ── GeminiProvider ──────────────────────────────────────────────
+
+describe('GeminiProvider', () => {
+  it('isConfigured false without API key', () => {
+    const p = new GeminiProvider({ apiKey: '' });
+    expect(p.isConfigured()).toBe(false);
+  });
+
+  it('isConfigured true with API key', () => {
+    const p = new GeminiProvider({ apiKey: 'test-key' });
+    expect(p.isConfigured()).toBe(true);
+  });
+
+  it('reads apiKey from env', () => {
+    vi.stubEnv('GEMINI_API_KEY', 'env-key');
+    const p = new GeminiProvider();
+    expect(p.isConfigured()).toBe(true);
+  });
+
+  it('throws when not configured', async () => {
+    const p = new GeminiProvider({ apiKey: '' });
+    await expect(p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })).rejects.toThrow(
+      'Gemini is not configured'
+    );
+  });
+
+  it('calls Gemini API and maps response', async () => {
+    const fetchMock = vi.fn().mockResolvedValue({
+      ok: true,
+      json: async () => makeGeminiResponse('gemini analysis'),
+    });
+    vi.stubGlobal('fetch', fetchMock);
+
+    const p = new GeminiProvider({ apiKey: 'test-key', model: 'gemini-1.5-flash' });
+    const result = await p.chatCompletion({
+      messages: [
+        { role: 'system', content: 'You are a trading assistant.' },
+        { role: 'user', content: 'analyse BTC' },
+      ],
+      temperature: 0.2,
+    });
+
+    expect(result.content).toBe('gemini analysis');
+    expect(result.model).toBe('gemini-1.5-flash');
+    expect(result.finishReason).toBe('stop');
+    expect(result.usage.totalTokens).toBe(15);
+
+    const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit];
+    expect(url).toContain('generativelanguage.googleapis.com');
+    expect(url).toContain('gemini-1.5-flash');
+    expect(url).toContain('test-key');
+
+    const body = JSON.parse(init.body as string);
+    expect(body.systemInstruction.parts[0].text).toBe('You are a trading assistant.');
+    expect(body.contents[0].role).toBe('user');
+  });
+
+  it('maps MAX_TOKENS finish reason to length', async () => {
+    vi.stubGlobal(
+      'fetch',
+      vi.fn().mockResolvedValue({
+        ok: true,
+        json: async () => ({
+          candidates: [{ content: { parts: [{ text: 'truncated' }] }, finishReason: 'MAX_TOKENS' }],
+          usageMetadata: { promptTokenCount: 1, candidatesTokenCount: 1, totalTokenCount: 2 },
+        }),
+      })
+    );
+
+    const p = new GeminiProvider({ apiKey: 'test-key' });
+    const result = await p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] });
+    expect(result.finishReason).toBe('length');
+  });
+
+  it('throws on non-ok response', async () => {
+    vi.stubGlobal(
+      'fetch',
+      vi.fn().mockResolvedValue({
+        ok: false,
+        status: 400,
+        text: async () => 'bad request',
+      })
+    );
+
+    const p = new GeminiProvider({ apiKey: 'test-key' });
+    await expect(p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })).rejects.toThrow(
+      'Gemini error 400'
+    );
+  });
+});
--- a/packages/llm/src/factory.ts
+++ b/packages/llm/src/factory.ts
@ -6,8 +6,10 @@
 */

 import { AzureOpenAIProvider } from './providers/azure-openai.js';
+import { GeminiProvider } from './providers/gemini.js';
 import { MockLLMProvider } from './providers/mock.js';
 import { OpenAIProvider } from './providers/openai.js';
+import { PerplexityProvider } from './providers/perplexity.js';
 import type { LLMProvider, LLMProviderType } from './types.js';

 let _provider: LLMProvider | null = null;
@ -20,6 +22,8 @@ function resolveProviderType(): LLMProviderType {
  const explicit = (process.env.LLM_PROVIDER || process.env.OPENAI_PROVIDER || '').toLowerCase();
  if (explicit === 'azure') return 'azure';
  if (explicit === 'openai') return 'openai';
+  if (explicit === 'perplexity') return 'perplexity';
+  if (explicit === 'gemini') return 'gemini';
  if (explicit === 'mock') return 'mock';

  const azureEndpoint = process.env.AZURE_OPENAI_ENDPOINT;
@ -51,10 +55,16 @@ export function createLLMProvider(type: LLMProviderType): LLMProvider {
      return new AzureOpenAIProvider();
    case 'openai':
      return new OpenAIProvider();
+    case 'perplexity':
+      return new PerplexityProvider();
+    case 'gemini':
+      return new GeminiProvider();
    case 'mock':
      return new MockLLMProvider();
    default:
-      throw new Error(`Unknown LLM_PROVIDER: '${type}'. Valid: azure, openai, mock`);
+      throw new Error(
+        `Unknown LLM_PROVIDER: '${type}'. Valid: azure, openai, perplexity, gemini, mock`
+      );
  }
 }

--- a/packages/llm/src/fallback.ts
+++ b/packages/llm/src/fallback.ts
@ -0,0 +1,36 @@
+/**
+ * Fallback chain utility.
+ *
+ * Wraps an ordered list of LLMProviders into a single LLMProvider that
+ * tries each in sequence, skipping unconfigured ones, and moves to the
+ * next on any error. Throws only when all providers are exhausted.
+ */
+
+import type { ChatCompletionRequest, ChatCompletionResponse, LLMProvider } from './types.js';
+
+export function createFallbackChain(providers: LLMProvider[]): LLMProvider {
+  return {
+    isConfigured(): boolean {
+      return providers.some(p => p.isConfigured());
+    },
+
+    async chatCompletion(req: ChatCompletionRequest): Promise<ChatCompletionResponse> {
+      const errors: string[] = [];
+
+      for (const provider of providers) {
+        if (!provider.isConfigured()) continue;
+        try {
+          return await provider.chatCompletion(req);
+        } catch (err) {
+          errors.push(err instanceof Error ? err.message : String(err));
+        }
+      }
+
+      throw new Error(
+        errors.length > 0
+          ? `All providers failed: ${errors.join(' | ')}`
+          : 'No providers configured'
+      );
+    },
+  };
+}
--- a/packages/llm/src/index.ts
+++ b/packages/llm/src/index.ts
@ -8,6 +8,9 @@ export type {
 } from './types.js';

 export { getLLM, createLLMProvider, setLLM, _resetLLM } from './factory.js';
+export { createFallbackChain } from './fallback.js';
 export { AzureOpenAIProvider, type AzureOpenAIConfig } from './providers/azure-openai.js';
+export { GeminiProvider, type GeminiConfig } from './providers/gemini.js';
 export { OpenAIProvider, type OpenAIConfig } from './providers/openai.js';
+export { PerplexityProvider, type PerplexityConfig } from './providers/perplexity.js';
 export { MockLLMProvider } from './providers/mock.js';
--- a/packages/llm/src/providers/gemini.ts
+++ b/packages/llm/src/providers/gemini.ts
@ -0,0 +1,121 @@
+/**
+ * Google Gemini LLM provider.
+ *
+ * Uses Google's Generative Language API (not OpenAI-compatible).
+ * Reads config from GEMINI_API_KEY, GEMINI_MODEL.
+ */
+
+import type {
+  ChatCompletionRequest,
+  ChatCompletionResponse,
+  ChatMessage,
+  LLMProvider,
+} from '../types.js';
+
+export interface GeminiConfig {
+  apiKey: string;
+  model?: string;
+}
+
+interface GeminiPart {
+  text: string;
+}
+
+interface GeminiContent {
+  role: 'user' | 'model';
+  parts: GeminiPart[];
+}
+
+export class GeminiProvider implements LLMProvider {
+  private config: GeminiConfig;
+
+  constructor(config?: Partial<GeminiConfig>) {
+    this.config = {
+      apiKey: config?.apiKey || process.env.GEMINI_API_KEY || '',
+      model: config?.model || process.env.GEMINI_MODEL || 'gemini-1.5-flash',
+    };
+  }
+
+  isConfigured(): boolean {
+    return Boolean(this.config.apiKey);
+  }
+
+  async chatCompletion(req: ChatCompletionRequest): Promise<ChatCompletionResponse> {
+    if (!this.isConfigured()) {
+      throw new Error('Gemini is not configured (missing GEMINI_API_KEY)');
+    }
+
+    const model = req.model || this.config.model!;
+    const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(model)}:generateContent?key=${this.config.apiKey}`;
+
+    const { systemInstruction, contents } = this.convertMessages(req.messages);
+
+    const body: Record<string, unknown> = { contents };
+    if (systemInstruction) {
+      body.systemInstruction = { parts: [{ text: systemInstruction }] };
+    }
+    if (req.temperature !== undefined) {
+      body.generationConfig = { temperature: req.temperature };
+    }
+
+    const response = await fetch(url, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(body),
+    });
+
+    if (!response.ok) {
+      const text = await response.text();
+      throw new Error(`Gemini error ${response.status}: ${text}`);
+    }
+
+    const data = (await response.json()) as {
+      candidates: Array<{
+        content: { parts: GeminiPart[] };
+        finishReason: string;
+      }>;
+      usageMetadata?: {
+        promptTokenCount: number;
+        candidatesTokenCount: number;
+        totalTokenCount: number;
+      };
+    };
+
+    const content = data.candidates[0]?.content?.parts?.map(p => p.text).join('') ?? '';
+    const finishReason = data.candidates[0]?.finishReason;
+
+    return {
+      content,
+      model,
+      finishReason:
+        finishReason === 'STOP' ? 'stop' : finishReason === 'MAX_TOKENS' ? 'length' : null,
+      usage: {
+        promptTokens: data.usageMetadata?.promptTokenCount ?? 0,
+        completionTokens: data.usageMetadata?.candidatesTokenCount ?? 0,
+        totalTokens: data.usageMetadata?.totalTokenCount ?? 0,
+      },
+    };
+  }
+
+  private convertMessages(messages: ChatMessage[]): {
+    systemInstruction: string | null;
+    contents: GeminiContent[];
+  } {
+    const systemMessages = messages.filter(m => m.role === 'system');
+    const systemInstruction = systemMessages.map(m => m.content).join('\n') || null;
+
+    const contents: GeminiContent[] = messages
+      .filter(m => m.role !== 'system')
+      .map(m => ({
+        role: m.role === 'assistant' ? 'model' : 'user',
+        parts: [{ text: m.content }],
+      }));
+
+    // Gemini requires at least one user turn
+    if (contents.length === 0) {
+      contents.push({ role: 'user', parts: [{ text: '' }] });
+    }
+
+    return { systemInstruction, contents };
+  }
+}
--- a/packages/llm/src/providers/perplexity.ts
+++ b/packages/llm/src/providers/perplexity.ts
@ -0,0 +1,74 @@
+/**
+ * Perplexity LLM provider.
+ *
+ * Uses Perplexity's OpenAI-compatible API with real-time web search.
+ * Reads config from PERPLEXITY_API_KEY, PERPLEXITY_MODEL.
+ */
+
+import type { ChatCompletionRequest, ChatCompletionResponse, LLMProvider } from '../types.js';
+
+export interface PerplexityConfig {
+  apiKey: string;
+  model?: string;
+}
+
+export class PerplexityProvider implements LLMProvider {
+  private config: PerplexityConfig;
+
+  constructor(config?: Partial<PerplexityConfig>) {
+    this.config = {
+      apiKey: config?.apiKey || process.env.PERPLEXITY_API_KEY || '',
+      model: config?.model || process.env.PERPLEXITY_MODEL || 'sonar',
+    };
+  }
+
+  isConfigured(): boolean {
+    return Boolean(this.config.apiKey);
+  }
+
+  async chatCompletion(req: ChatCompletionRequest): Promise<ChatCompletionResponse> {
+    if (!this.isConfigured()) {
+      throw new Error('Perplexity is not configured (missing PERPLEXITY_API_KEY)');
+    }
+
+    const body = {
+      model: req.model || this.config.model,
+      messages: req.messages,
+      temperature: req.temperature,
+      max_tokens: req.maxTokens,
+      top_p: req.topP,
+    };
+
+    const response = await fetch('https://api.perplexity.ai/chat/completions', {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        Authorization: `Bearer ${this.config.apiKey}`,
+      },
+      body: JSON.stringify(body),
+    });
+
+    if (!response.ok) {
+      const text = await response.text();
+      throw new Error(`Perplexity error ${response.status}: ${text}`);
+    }
+
+    const data = (await response.json()) as {
+      choices: Array<{ message: { content: string }; finish_reason: string }>;
+      model: string;
+      usage: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
+    };
+
+    return {
+      content: data.choices[0]?.message?.content ?? '',
+      model: data.model,
+      finishReason:
+        (data.choices[0]?.finish_reason as ChatCompletionResponse['finishReason']) ?? null,
+      usage: {
+        promptTokens: data.usage?.prompt_tokens ?? 0,
+        completionTokens: data.usage?.completion_tokens ?? 0,
+        totalTokens: data.usage?.total_tokens ?? 0,
+      },
+    };
+  }
+}
--- a/packages/llm/src/types.ts
+++ b/packages/llm/src/types.ts
@ -45,4 +45,4 @@ export interface TokenUsage {
  totalTokens: number;
 }

-export type LLMProviderType = 'azure' | 'openai' | 'mock';
+export type LLMProviderType = 'azure' | 'openai' | 'perplexity' | 'gemini' | 'mock';