import type { ModelConfig, PromptCategory, ProviderConfig } from './types.js'; /** * Paid provider configurations (opt-in via API key env vars). * Add to your RouterConfig.providers to include alongside free-tier providers. */ export const PAID_PROVIDERS: ProviderConfig[] = [ // ── OpenAI ─────────────────────────────────────────────────── { name: 'openai', baseUrl: 'https://api.openai.com/v1', apiKeyEnv: 'OPENAI_API_KEY', rpmLimit: 500, tpmLimit: 150_000, models: [ { id: 'gpt-4o-mini', label: 'GPT-4o Mini', contextWindow: 128_000, strengths: ['general', 'reasoning', 'code'], speedTier: 1, }, { id: 'gpt-4o', label: 'GPT-4o', contextWindow: 128_000, strengths: ['general', 'reasoning', 'code', 'creative', 'vision'], speedTier: 2, supportsVision: true, }, ], }, // ── Perplexity ─────────────────────────────────────────────── // Real-time web search grounding — OpenAI-compatible endpoint { name: 'perplexity', baseUrl: 'https://api.perplexity.ai', apiKeyEnv: 'PERPLEXITY_API_KEY', rpmLimit: 50, tpmLimit: 0, models: [ { id: 'sonar', label: 'Sonar (web search)', contextWindow: 127_072, strengths: ['general', 'reasoning'], speedTier: 2, }, { id: 'sonar-pro', label: 'Sonar Pro (web search)', contextWindow: 200_000, strengths: ['general', 'reasoning'], speedTier: 3, }, ], }, ]; /** * Default free-tier provider configurations. * All use OpenAI-compatible /v1/chat/completions endpoints. */ export const DEFAULT_PROVIDERS: ProviderConfig[] = [ // ── Groq ───────────────────────────────────────────────────── // Free tier: 30 RPM, 14.4K TPM (large), 30K TPM (small) { name: 'groq', baseUrl: 'https://api.groq.com/openai/v1', apiKeyEnv: 'GROQ_API_KEY', rpmLimit: 30, tpmLimit: 14_400, models: [ { id: 'llama-3.3-70b-versatile', label: 'Llama 3.3 70B', contextWindow: 128_000, strengths: ['general', 'reasoning', 'code'], speedTier: 1, }, { id: 'llama-3.1-8b-instant', label: 'Llama 3.1 8B Instant', contextWindow: 128_000, strengths: ['general'], speedTier: 1, }, { id: 'gemma2-9b-it', label: 'Gemma 2 9B', contextWindow: 8_192, strengths: ['general', 'creative'], speedTier: 1, }, ], }, // ── OpenRouter ─────────────────────────────────────────────── // Free models available (rate-limited per model) { name: 'openrouter', baseUrl: 'https://openrouter.ai/api/v1', apiKeyEnv: 'OPENROUTER_API_KEY', extraHeaders: { 'HTTP-Referer': 'https://bytelyst.com', 'X-Title': 'ByteLyst LLM Router', }, rpmLimit: 20, tpmLimit: 0, models: [ { id: 'deepseek/deepseek-r1:free', label: 'DeepSeek R1 (Free)', contextWindow: 64_000, strengths: ['reasoning', 'code', 'math'], speedTier: 3, }, { id: 'meta-llama/llama-3.3-70b-instruct:free', label: 'Llama 3.3 70B (Free)', contextWindow: 128_000, strengths: ['general', 'reasoning', 'code'], speedTier: 2, }, { id: 'google/gemma-2-9b-it:free', label: 'Gemma 2 9B (Free)', contextWindow: 8_192, strengths: ['general', 'creative'], speedTier: 2, }, ], }, // ── Together AI ────────────────────────────────────────────── // Free tier: limited RPM, several open models { name: 'together', baseUrl: 'https://api.together.xyz/v1', apiKeyEnv: 'TOGETHER_API_KEY', rpmLimit: 20, tpmLimit: 0, models: [ { id: 'meta-llama/Llama-3.3-70B-Instruct-Turbo', label: 'Llama 3.3 70B Turbo', contextWindow: 128_000, strengths: ['general', 'reasoning', 'code'], speedTier: 2, }, { id: 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B', label: 'DeepSeek R1 Distill 70B', contextWindow: 128_000, strengths: ['reasoning', 'math', 'code'], speedTier: 2, }, ], }, // ── Cerebras ───────────────────────────────────────────────── // Free inference tier — extremely fast { name: 'cerebras', baseUrl: 'https://api.cerebras.ai/v1', apiKeyEnv: 'CEREBRAS_API_KEY', rpmLimit: 30, tpmLimit: 60_000, models: [ { id: 'llama-3.3-70b', label: 'Llama 3.3 70B (Cerebras)', contextWindow: 128_000, strengths: ['general', 'reasoning', 'code'], speedTier: 1, }, ], }, ]; function inferStrengths(modelId: string): PromptCategory[] { const lower = modelId.toLowerCase(); const strengths = new Set(['general']); if (/coder|code|codestral|starcoder|deepseek/.test(lower)) strengths.add('code'); if (/r1|reason|think|math/.test(lower)) { strengths.add('reasoning'); strengths.add('math'); } if (/qwen|llama|mistral|chat/.test(lower)) strengths.add('creative'); return [...strengths]; } function inferContextWindow(modelId: string): number { const lower = modelId.toLowerCase(); if (/128k|131072/.test(lower)) return 128_000; if (/64k|65536/.test(lower)) return 64_000; if (/32k|32768|qwen2\.5/.test(lower)) return 32_768; if (/16k|16384/.test(lower)) return 16_384; return 8_192; } function inferSpeedTier(modelId: string): 1 | 2 | 3 { const lower = modelId.toLowerCase(); if (/0\.5b|1b|3b|7b|mini|tiny/.test(lower)) return 1; if (/14b|15b|16b|20b|22b|30b|32b/.test(lower)) return 2; return 3; } export function createLocalOllamaProvider( modelIds: string[], baseUrl: string = 'http://localhost:11434/v1' ): ProviderConfig { const models: ModelConfig[] = modelIds.map(modelId => ({ id: modelId, label: modelId, contextWindow: inferContextWindow(modelId), strengths: inferStrengths(modelId), speedTier: inferSpeedTier(modelId), })); return { name: 'local-ollama', baseUrl, models, rpmLimit: 0, tpmLimit: 0, }; } /** * Filter providers to only those with API keys present in env. */ export function getAvailableProviders( providers: ProviderConfig[] = DEFAULT_PROVIDERS ): ProviderConfig[] { return providers.filter(p => { if (!p.apiKeyEnv) return true; const key = process.env[p.apiKeyEnv]; return key !== undefined && key !== ''; }); }