learning_ai_common_plat/packages/llm-router/src/registry.ts
2026-04-12 23:51:10 -07:00

245 lines
7.0 KiB
TypeScript

import type { ModelConfig, PromptCategory, ProviderConfig } from './types.js';
/**
* Paid provider configurations (opt-in via API key env vars).
* Add to your RouterConfig.providers to include alongside free-tier providers.
*/
export const PAID_PROVIDERS: ProviderConfig[] = [
// ── OpenAI ───────────────────────────────────────────────────
{
name: 'openai',
baseUrl: 'https://api.openai.com/v1',
apiKeyEnv: 'OPENAI_API_KEY',
rpmLimit: 500,
tpmLimit: 150_000,
models: [
{
id: 'gpt-4o-mini',
label: 'GPT-4o Mini',
contextWindow: 128_000,
strengths: ['general', 'reasoning', 'code'],
speedTier: 1,
},
{
id: 'gpt-4o',
label: 'GPT-4o',
contextWindow: 128_000,
strengths: ['general', 'reasoning', 'code', 'creative', 'vision'],
speedTier: 2,
supportsVision: true,
},
],
},
// ── Perplexity ───────────────────────────────────────────────
// Real-time web search grounding — OpenAI-compatible endpoint
{
name: 'perplexity',
baseUrl: 'https://api.perplexity.ai',
apiKeyEnv: 'PERPLEXITY_API_KEY',
rpmLimit: 50,
tpmLimit: 0,
models: [
{
id: 'sonar',
label: 'Sonar (web search)',
contextWindow: 127_072,
strengths: ['general', 'reasoning'],
speedTier: 2,
},
{
id: 'sonar-pro',
label: 'Sonar Pro (web search)',
contextWindow: 200_000,
strengths: ['general', 'reasoning'],
speedTier: 3,
},
],
},
];
/**
* Default free-tier provider configurations.
* All use OpenAI-compatible /v1/chat/completions endpoints.
*/
export const DEFAULT_PROVIDERS: ProviderConfig[] = [
// ── Groq ─────────────────────────────────────────────────────
// Free tier: 30 RPM, 14.4K TPM (large), 30K TPM (small)
{
name: 'groq',
baseUrl: 'https://api.groq.com/openai/v1',
apiKeyEnv: 'GROQ_API_KEY',
rpmLimit: 30,
tpmLimit: 14_400,
models: [
{
id: 'llama-3.3-70b-versatile',
label: 'Llama 3.3 70B',
contextWindow: 128_000,
strengths: ['general', 'reasoning', 'code'],
speedTier: 1,
},
{
id: 'llama-3.1-8b-instant',
label: 'Llama 3.1 8B Instant',
contextWindow: 128_000,
strengths: ['general'],
speedTier: 1,
},
{
id: 'gemma2-9b-it',
label: 'Gemma 2 9B',
contextWindow: 8_192,
strengths: ['general', 'creative'],
speedTier: 1,
},
],
},
// ── OpenRouter ───────────────────────────────────────────────
// Free models available (rate-limited per model)
{
name: 'openrouter',
baseUrl: 'https://openrouter.ai/api/v1',
apiKeyEnv: 'OPENROUTER_API_KEY',
extraHeaders: {
'HTTP-Referer': 'https://bytelyst.com',
'X-Title': 'ByteLyst LLM Router',
},
rpmLimit: 20,
tpmLimit: 0,
models: [
{
id: 'deepseek/deepseek-r1:free',
label: 'DeepSeek R1 (Free)',
contextWindow: 64_000,
strengths: ['reasoning', 'code', 'math'],
speedTier: 3,
},
{
id: 'meta-llama/llama-3.3-70b-instruct:free',
label: 'Llama 3.3 70B (Free)',
contextWindow: 128_000,
strengths: ['general', 'reasoning', 'code'],
speedTier: 2,
},
{
id: 'google/gemma-2-9b-it:free',
label: 'Gemma 2 9B (Free)',
contextWindow: 8_192,
strengths: ['general', 'creative'],
speedTier: 2,
},
],
},
// ── Together AI ──────────────────────────────────────────────
// Free tier: limited RPM, several open models
{
name: 'together',
baseUrl: 'https://api.together.xyz/v1',
apiKeyEnv: 'TOGETHER_API_KEY',
rpmLimit: 20,
tpmLimit: 0,
models: [
{
id: 'meta-llama/Llama-3.3-70B-Instruct-Turbo',
label: 'Llama 3.3 70B Turbo',
contextWindow: 128_000,
strengths: ['general', 'reasoning', 'code'],
speedTier: 2,
},
{
id: 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
label: 'DeepSeek R1 Distill 70B',
contextWindow: 128_000,
strengths: ['reasoning', 'math', 'code'],
speedTier: 2,
},
],
},
// ── Cerebras ─────────────────────────────────────────────────
// Free inference tier — extremely fast
{
name: 'cerebras',
baseUrl: 'https://api.cerebras.ai/v1',
apiKeyEnv: 'CEREBRAS_API_KEY',
rpmLimit: 30,
tpmLimit: 60_000,
models: [
{
id: 'llama-3.3-70b',
label: 'Llama 3.3 70B (Cerebras)',
contextWindow: 128_000,
strengths: ['general', 'reasoning', 'code'],
speedTier: 1,
},
],
},
];
function inferStrengths(modelId: string): PromptCategory[] {
const lower = modelId.toLowerCase();
const strengths = new Set<PromptCategory>(['general']);
if (/coder|code|codestral|starcoder|deepseek/.test(lower)) strengths.add('code');
if (/r1|reason|think|math/.test(lower)) {
strengths.add('reasoning');
strengths.add('math');
}
if (/qwen|llama|mistral|chat/.test(lower)) strengths.add('creative');
return [...strengths];
}
function inferContextWindow(modelId: string): number {
const lower = modelId.toLowerCase();
if (/128k|131072/.test(lower)) return 128_000;
if (/64k|65536/.test(lower)) return 64_000;
if (/32k|32768|qwen2\.5/.test(lower)) return 32_768;
if (/16k|16384/.test(lower)) return 16_384;
return 8_192;
}
function inferSpeedTier(modelId: string): 1 | 2 | 3 {
const lower = modelId.toLowerCase();
if (/0\.5b|1b|3b|7b|mini|tiny/.test(lower)) return 1;
if (/14b|15b|16b|20b|22b|30b|32b/.test(lower)) return 2;
return 3;
}
export function createLocalOllamaProvider(
modelIds: string[],
baseUrl: string = 'http://localhost:11434/v1'
): ProviderConfig {
const models: ModelConfig[] = modelIds.map(modelId => ({
id: modelId,
label: modelId,
contextWindow: inferContextWindow(modelId),
strengths: inferStrengths(modelId),
speedTier: inferSpeedTier(modelId),
}));
return {
name: 'local-ollama',
baseUrl,
models,
rpmLimit: 0,
tpmLimit: 0,
};
}
/**
* Filter providers to only those with API keys present in env.
*/
export function getAvailableProviders(
providers: ProviderConfig[] = DEFAULT_PROVIDERS
): ProviderConfig[] {
return providers.filter(p => {
if (!p.apiKeyEnv) return true;
const key = process.env[p.apiKeyEnv];
return key !== undefined && key !== '';
});
}