feat(llm): add Perplexity, Gemini providers and createFallbackChain

- Add PerplexityProvider (OpenAI-compatible, reads PERPLEXITY_API_KEY)
- Add GeminiProvider (Google Generative Language API adapter, reads GEMINI_API_KEY)
- Add createFallbackChain() — ordered provider chain, skips unconfigured,
  aggregates errors; allows any app to replace custom LLM fallback loops
- Extend LLMProviderType with 'perplexity' | 'gemini'
- Update factory to resolve and instantiate new provider types
- Add PAID_PROVIDERS to llm-router registry (OpenAI, Perplexity) for apps
  using round-robin routing alongside free-tier providers
- 27 tests covering fallback chain, new providers, error/edge cases

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Saravana Achu Mac 2026-04-05 12:49:47 -07:00
parent a762c5b07f
commit 43bf51a290
10 changed files with 589 additions and 3 deletions

View File

@ -1,7 +1,12 @@
export { LlmRouter } from './router.js';
export type { TelemetryEntry } from './router.js';
export { DEFAULT_PROVIDERS, createLocalOllamaProvider, getAvailableProviders } from './registry.js';
export {
DEFAULT_PROVIDERS,
PAID_PROVIDERS,
createLocalOllamaProvider,
getAvailableProviders,
} from './registry.js';
export { classifyPrompt } from './classifier.js';
export { HealthTracker } from './health.js';
export { selectCandidates, pickNext, excludeCandidate, createRoundRobinState } from './selector.js';

View File

@ -1,5 +1,62 @@
import type { ModelConfig, PromptCategory, ProviderConfig } from './types.js';
/**
* Paid provider configurations (opt-in via API key env vars).
* Add to your RouterConfig.providers to include alongside free-tier providers.
*/
export const PAID_PROVIDERS: ProviderConfig[] = [
// ── OpenAI ───────────────────────────────────────────────────
{
name: 'openai',
baseUrl: 'https://api.openai.com/v1',
apiKeyEnv: 'OPENAI_API_KEY',
rpmLimit: 500,
tpmLimit: 150_000,
models: [
{
id: 'gpt-4o-mini',
label: 'GPT-4o Mini',
contextWindow: 128_000,
strengths: ['general', 'reasoning', 'code'],
speedTier: 1,
},
{
id: 'gpt-4o',
label: 'GPT-4o',
contextWindow: 128_000,
strengths: ['general', 'reasoning', 'code', 'creative'],
speedTier: 2,
},
],
},
// ── Perplexity ───────────────────────────────────────────────
// Real-time web search grounding — OpenAI-compatible endpoint
{
name: 'perplexity',
baseUrl: 'https://api.perplexity.ai',
apiKeyEnv: 'PERPLEXITY_API_KEY',
rpmLimit: 50,
tpmLimit: 0,
models: [
{
id: 'sonar',
label: 'Sonar (web search)',
contextWindow: 127_072,
strengths: ['general', 'reasoning'],
speedTier: 2,
},
{
id: 'sonar-pro',
label: 'Sonar Pro (web search)',
contextWindow: 200_000,
strengths: ['general', 'reasoning'],
speedTier: 3,
},
],
},
];
/**
* Default free-tier provider configurations.
* All use OpenAI-compatible /v1/chat/completions endpoints.

View File

@ -0,0 +1,99 @@
/**
* Tests for createFallbackChain.
*/
import { describe, it, expect } from 'vitest';
import { createFallbackChain } from '../fallback.js';
import { MockLLMProvider } from '../providers/mock.js';
import type { ChatCompletionResponse } from '../types.js';
const makeResponse = (content: string): ChatCompletionResponse => ({
content,
model: 'mock',
finishReason: 'stop',
usage: { promptTokens: 1, completionTokens: 1, totalTokens: 2 },
});
describe('createFallbackChain', () => {
it('isConfigured returns true when at least one provider is configured', () => {
const a = new MockLLMProvider();
const chain = createFallbackChain([a]);
expect(chain.isConfigured()).toBe(true);
});
it('isConfigured returns false when no providers are configured', () => {
const unconfigured = {
isConfigured: () => false,
chatCompletion: async () => {
throw new Error('not configured');
},
};
const chain = createFallbackChain([unconfigured]);
expect(chain.isConfigured()).toBe(false);
});
it('returns response from first configured provider', async () => {
const a = new MockLLMProvider([makeResponse('from-a')]);
const b = new MockLLMProvider([makeResponse('from-b')]);
const chain = createFallbackChain([a, b]);
const result = await chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] });
expect(result.content).toBe('from-a');
expect(b.calls).toHaveLength(0);
});
it('falls back to second provider when first throws', async () => {
const a = {
isConfigured: () => true,
chatCompletion: async (): Promise<ChatCompletionResponse> => {
throw new Error('a failed');
},
};
const b = new MockLLMProvider([makeResponse('from-b')]);
const chain = createFallbackChain([a, b]);
const result = await chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] });
expect(result.content).toBe('from-b');
});
it('skips unconfigured providers', async () => {
const unconfigured = {
isConfigured: () => false,
chatCompletion: async (): Promise<ChatCompletionResponse> => {
throw new Error('should not be called');
},
};
const b = new MockLLMProvider([makeResponse('from-b')]);
const chain = createFallbackChain([unconfigured, b]);
const result = await chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] });
expect(result.content).toBe('from-b');
});
it('throws with all error messages when every provider fails', async () => {
const a = {
isConfigured: () => true,
chatCompletion: async (): Promise<ChatCompletionResponse> => {
throw new Error('a failed');
},
};
const b = {
isConfigured: () => true,
chatCompletion: async (): Promise<ChatCompletionResponse> => {
throw new Error('b failed');
},
};
const chain = createFallbackChain([a, b]);
await expect(
chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })
).rejects.toThrow('All providers failed: a failed | b failed');
});
it('throws "No providers configured" when list is empty', async () => {
const chain = createFallbackChain([]);
await expect(
chain.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })
).rejects.toThrow('No providers configured');
});
});

View File

@ -0,0 +1,181 @@
/**
* Tests for PerplexityProvider and GeminiProvider.
* Uses vi.stubGlobal to mock fetch no real API calls.
*/
import { describe, it, expect, vi, afterEach } from 'vitest';
import { PerplexityProvider } from '../providers/perplexity.js';
import { GeminiProvider } from '../providers/gemini.js';
const makeOpenAIResponse = (content: string, model = 'test-model') => ({
choices: [{ message: { content }, finish_reason: 'stop' }],
model,
usage: { prompt_tokens: 5, completion_tokens: 10, total_tokens: 15 },
});
const makeGeminiResponse = (text: string) => ({
candidates: [{ content: { parts: [{ text }] }, finishReason: 'STOP' }],
usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 10, totalTokenCount: 15 },
});
afterEach(() => {
vi.restoreAllMocks();
vi.unstubAllEnvs();
});
// ── PerplexityProvider ──────────────────────────────────────────
describe('PerplexityProvider', () => {
it('isConfigured false without API key', () => {
const p = new PerplexityProvider({ apiKey: '' });
expect(p.isConfigured()).toBe(false);
});
it('isConfigured true with API key', () => {
const p = new PerplexityProvider({ apiKey: 'test-key' });
expect(p.isConfigured()).toBe(true);
});
it('reads apiKey from env', () => {
vi.stubEnv('PERPLEXITY_API_KEY', 'env-key');
const p = new PerplexityProvider();
expect(p.isConfigured()).toBe(true);
});
it('throws when not configured', async () => {
const p = new PerplexityProvider({ apiKey: '' });
await expect(p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })).rejects.toThrow(
'Perplexity is not configured'
);
});
it('calls Perplexity API and maps response', async () => {
const fetchMock = vi.fn().mockResolvedValue({
ok: true,
json: async () => makeOpenAIResponse('analysis result', 'sonar'),
});
vi.stubGlobal('fetch', fetchMock);
const p = new PerplexityProvider({ apiKey: 'test-key', model: 'sonar' });
const result = await p.chatCompletion({
messages: [{ role: 'user', content: 'analyse BTC' }],
temperature: 0.2,
});
expect(result.content).toBe('analysis result');
expect(result.model).toBe('sonar');
expect(result.finishReason).toBe('stop');
expect(result.usage.totalTokens).toBe(15);
const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit];
expect(url).toBe('https://api.perplexity.ai/chat/completions');
expect((init.headers as Record<string, string>)['Authorization']).toBe('Bearer test-key');
});
it('throws on non-ok response', async () => {
vi.stubGlobal(
'fetch',
vi.fn().mockResolvedValue({
ok: false,
status: 429,
text: async () => 'rate limited',
})
);
const p = new PerplexityProvider({ apiKey: 'test-key' });
await expect(p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })).rejects.toThrow(
'Perplexity error 429'
);
});
});
// ── GeminiProvider ──────────────────────────────────────────────
describe('GeminiProvider', () => {
it('isConfigured false without API key', () => {
const p = new GeminiProvider({ apiKey: '' });
expect(p.isConfigured()).toBe(false);
});
it('isConfigured true with API key', () => {
const p = new GeminiProvider({ apiKey: 'test-key' });
expect(p.isConfigured()).toBe(true);
});
it('reads apiKey from env', () => {
vi.stubEnv('GEMINI_API_KEY', 'env-key');
const p = new GeminiProvider();
expect(p.isConfigured()).toBe(true);
});
it('throws when not configured', async () => {
const p = new GeminiProvider({ apiKey: '' });
await expect(p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })).rejects.toThrow(
'Gemini is not configured'
);
});
it('calls Gemini API and maps response', async () => {
const fetchMock = vi.fn().mockResolvedValue({
ok: true,
json: async () => makeGeminiResponse('gemini analysis'),
});
vi.stubGlobal('fetch', fetchMock);
const p = new GeminiProvider({ apiKey: 'test-key', model: 'gemini-1.5-flash' });
const result = await p.chatCompletion({
messages: [
{ role: 'system', content: 'You are a trading assistant.' },
{ role: 'user', content: 'analyse BTC' },
],
temperature: 0.2,
});
expect(result.content).toBe('gemini analysis');
expect(result.model).toBe('gemini-1.5-flash');
expect(result.finishReason).toBe('stop');
expect(result.usage.totalTokens).toBe(15);
const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit];
expect(url).toContain('generativelanguage.googleapis.com');
expect(url).toContain('gemini-1.5-flash');
expect(url).toContain('test-key');
const body = JSON.parse(init.body as string);
expect(body.systemInstruction.parts[0].text).toBe('You are a trading assistant.');
expect(body.contents[0].role).toBe('user');
});
it('maps MAX_TOKENS finish reason to length', async () => {
vi.stubGlobal(
'fetch',
vi.fn().mockResolvedValue({
ok: true,
json: async () => ({
candidates: [{ content: { parts: [{ text: 'truncated' }] }, finishReason: 'MAX_TOKENS' }],
usageMetadata: { promptTokenCount: 1, candidatesTokenCount: 1, totalTokenCount: 2 },
}),
})
);
const p = new GeminiProvider({ apiKey: 'test-key' });
const result = await p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] });
expect(result.finishReason).toBe('length');
});
it('throws on non-ok response', async () => {
vi.stubGlobal(
'fetch',
vi.fn().mockResolvedValue({
ok: false,
status: 400,
text: async () => 'bad request',
})
);
const p = new GeminiProvider({ apiKey: 'test-key' });
await expect(p.chatCompletion({ messages: [{ role: 'user', content: 'hi' }] })).rejects.toThrow(
'Gemini error 400'
);
});
});

View File

@ -6,8 +6,10 @@
*/
import { AzureOpenAIProvider } from './providers/azure-openai.js';
import { GeminiProvider } from './providers/gemini.js';
import { MockLLMProvider } from './providers/mock.js';
import { OpenAIProvider } from './providers/openai.js';
import { PerplexityProvider } from './providers/perplexity.js';
import type { LLMProvider, LLMProviderType } from './types.js';
let _provider: LLMProvider | null = null;
@ -20,6 +22,8 @@ function resolveProviderType(): LLMProviderType {
const explicit = (process.env.LLM_PROVIDER || process.env.OPENAI_PROVIDER || '').toLowerCase();
if (explicit === 'azure') return 'azure';
if (explicit === 'openai') return 'openai';
if (explicit === 'perplexity') return 'perplexity';
if (explicit === 'gemini') return 'gemini';
if (explicit === 'mock') return 'mock';
const azureEndpoint = process.env.AZURE_OPENAI_ENDPOINT;
@ -51,10 +55,16 @@ export function createLLMProvider(type: LLMProviderType): LLMProvider {
return new AzureOpenAIProvider();
case 'openai':
return new OpenAIProvider();
case 'perplexity':
return new PerplexityProvider();
case 'gemini':
return new GeminiProvider();
case 'mock':
return new MockLLMProvider();
default:
throw new Error(`Unknown LLM_PROVIDER: '${type}'. Valid: azure, openai, mock`);
throw new Error(
`Unknown LLM_PROVIDER: '${type}'. Valid: azure, openai, perplexity, gemini, mock`
);
}
}

View File

@ -0,0 +1,36 @@
/**
* Fallback chain utility.
*
* Wraps an ordered list of LLMProviders into a single LLMProvider that
* tries each in sequence, skipping unconfigured ones, and moves to the
* next on any error. Throws only when all providers are exhausted.
*/
import type { ChatCompletionRequest, ChatCompletionResponse, LLMProvider } from './types.js';
export function createFallbackChain(providers: LLMProvider[]): LLMProvider {
return {
isConfigured(): boolean {
return providers.some(p => p.isConfigured());
},
async chatCompletion(req: ChatCompletionRequest): Promise<ChatCompletionResponse> {
const errors: string[] = [];
for (const provider of providers) {
if (!provider.isConfigured()) continue;
try {
return await provider.chatCompletion(req);
} catch (err) {
errors.push(err instanceof Error ? err.message : String(err));
}
}
throw new Error(
errors.length > 0
? `All providers failed: ${errors.join(' | ')}`
: 'No providers configured'
);
},
};
}

View File

@ -8,6 +8,9 @@ export type {
} from './types.js';
export { getLLM, createLLMProvider, setLLM, _resetLLM } from './factory.js';
export { createFallbackChain } from './fallback.js';
export { AzureOpenAIProvider, type AzureOpenAIConfig } from './providers/azure-openai.js';
export { GeminiProvider, type GeminiConfig } from './providers/gemini.js';
export { OpenAIProvider, type OpenAIConfig } from './providers/openai.js';
export { PerplexityProvider, type PerplexityConfig } from './providers/perplexity.js';
export { MockLLMProvider } from './providers/mock.js';

View File

@ -0,0 +1,121 @@
/**
* Google Gemini LLM provider.
*
* Uses Google's Generative Language API (not OpenAI-compatible).
* Reads config from GEMINI_API_KEY, GEMINI_MODEL.
*/
import type {
ChatCompletionRequest,
ChatCompletionResponse,
ChatMessage,
LLMProvider,
} from '../types.js';
export interface GeminiConfig {
apiKey: string;
model?: string;
}
interface GeminiPart {
text: string;
}
interface GeminiContent {
role: 'user' | 'model';
parts: GeminiPart[];
}
export class GeminiProvider implements LLMProvider {
private config: GeminiConfig;
constructor(config?: Partial<GeminiConfig>) {
this.config = {
apiKey: config?.apiKey || process.env.GEMINI_API_KEY || '',
model: config?.model || process.env.GEMINI_MODEL || 'gemini-1.5-flash',
};
}
isConfigured(): boolean {
return Boolean(this.config.apiKey);
}
async chatCompletion(req: ChatCompletionRequest): Promise<ChatCompletionResponse> {
if (!this.isConfigured()) {
throw new Error('Gemini is not configured (missing GEMINI_API_KEY)');
}
const model = req.model || this.config.model!;
const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(model)}:generateContent?key=${this.config.apiKey}`;
const { systemInstruction, contents } = this.convertMessages(req.messages);
const body: Record<string, unknown> = { contents };
if (systemInstruction) {
body.systemInstruction = { parts: [{ text: systemInstruction }] };
}
if (req.temperature !== undefined) {
body.generationConfig = { temperature: req.temperature };
}
const response = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
});
if (!response.ok) {
const text = await response.text();
throw new Error(`Gemini error ${response.status}: ${text}`);
}
const data = (await response.json()) as {
candidates: Array<{
content: { parts: GeminiPart[] };
finishReason: string;
}>;
usageMetadata?: {
promptTokenCount: number;
candidatesTokenCount: number;
totalTokenCount: number;
};
};
const content = data.candidates[0]?.content?.parts?.map(p => p.text).join('') ?? '';
const finishReason = data.candidates[0]?.finishReason;
return {
content,
model,
finishReason:
finishReason === 'STOP' ? 'stop' : finishReason === 'MAX_TOKENS' ? 'length' : null,
usage: {
promptTokens: data.usageMetadata?.promptTokenCount ?? 0,
completionTokens: data.usageMetadata?.candidatesTokenCount ?? 0,
totalTokens: data.usageMetadata?.totalTokenCount ?? 0,
},
};
}
private convertMessages(messages: ChatMessage[]): {
systemInstruction: string | null;
contents: GeminiContent[];
} {
const systemMessages = messages.filter(m => m.role === 'system');
const systemInstruction = systemMessages.map(m => m.content).join('\n') || null;
const contents: GeminiContent[] = messages
.filter(m => m.role !== 'system')
.map(m => ({
role: m.role === 'assistant' ? 'model' : 'user',
parts: [{ text: m.content }],
}));
// Gemini requires at least one user turn
if (contents.length === 0) {
contents.push({ role: 'user', parts: [{ text: '' }] });
}
return { systemInstruction, contents };
}
}

View File

@ -0,0 +1,74 @@
/**
* Perplexity LLM provider.
*
* Uses Perplexity's OpenAI-compatible API with real-time web search.
* Reads config from PERPLEXITY_API_KEY, PERPLEXITY_MODEL.
*/
import type { ChatCompletionRequest, ChatCompletionResponse, LLMProvider } from '../types.js';
export interface PerplexityConfig {
apiKey: string;
model?: string;
}
export class PerplexityProvider implements LLMProvider {
private config: PerplexityConfig;
constructor(config?: Partial<PerplexityConfig>) {
this.config = {
apiKey: config?.apiKey || process.env.PERPLEXITY_API_KEY || '',
model: config?.model || process.env.PERPLEXITY_MODEL || 'sonar',
};
}
isConfigured(): boolean {
return Boolean(this.config.apiKey);
}
async chatCompletion(req: ChatCompletionRequest): Promise<ChatCompletionResponse> {
if (!this.isConfigured()) {
throw new Error('Perplexity is not configured (missing PERPLEXITY_API_KEY)');
}
const body = {
model: req.model || this.config.model,
messages: req.messages,
temperature: req.temperature,
max_tokens: req.maxTokens,
top_p: req.topP,
};
const response = await fetch('https://api.perplexity.ai/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${this.config.apiKey}`,
},
body: JSON.stringify(body),
});
if (!response.ok) {
const text = await response.text();
throw new Error(`Perplexity error ${response.status}: ${text}`);
}
const data = (await response.json()) as {
choices: Array<{ message: { content: string }; finish_reason: string }>;
model: string;
usage: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
};
return {
content: data.choices[0]?.message?.content ?? '',
model: data.model,
finishReason:
(data.choices[0]?.finish_reason as ChatCompletionResponse['finishReason']) ?? null,
usage: {
promptTokens: data.usage?.prompt_tokens ?? 0,
completionTokens: data.usage?.completion_tokens ?? 0,
totalTokens: data.usage?.total_tokens ?? 0,
},
};
}
}

View File

@ -45,4 +45,4 @@ export interface TokenUsage {
totalTokens: number;
}
export type LLMProviderType = 'azure' | 'openai' | 'mock';
export type LLMProviderType = 'azure' | 'openai' | 'perplexity' | 'gemini' | 'mock';