245 lines
7.0 KiB
TypeScript
245 lines
7.0 KiB
TypeScript
import type { ModelConfig, PromptCategory, ProviderConfig } from './types.js';
|
|
|
|
/**
|
|
* Paid provider configurations (opt-in via API key env vars).
|
|
* Add to your RouterConfig.providers to include alongside free-tier providers.
|
|
*/
|
|
export const PAID_PROVIDERS: ProviderConfig[] = [
|
|
// ── OpenAI ───────────────────────────────────────────────────
|
|
{
|
|
name: 'openai',
|
|
baseUrl: 'https://api.openai.com/v1',
|
|
apiKeyEnv: 'OPENAI_API_KEY',
|
|
rpmLimit: 500,
|
|
tpmLimit: 150_000,
|
|
models: [
|
|
{
|
|
id: 'gpt-4o-mini',
|
|
label: 'GPT-4o Mini',
|
|
contextWindow: 128_000,
|
|
strengths: ['general', 'reasoning', 'code'],
|
|
speedTier: 1,
|
|
},
|
|
{
|
|
id: 'gpt-4o',
|
|
label: 'GPT-4o',
|
|
contextWindow: 128_000,
|
|
strengths: ['general', 'reasoning', 'code', 'creative', 'vision'],
|
|
speedTier: 2,
|
|
supportsVision: true,
|
|
},
|
|
],
|
|
},
|
|
|
|
// ── Perplexity ───────────────────────────────────────────────
|
|
// Real-time web search grounding — OpenAI-compatible endpoint
|
|
{
|
|
name: 'perplexity',
|
|
baseUrl: 'https://api.perplexity.ai',
|
|
apiKeyEnv: 'PERPLEXITY_API_KEY',
|
|
rpmLimit: 50,
|
|
tpmLimit: 0,
|
|
models: [
|
|
{
|
|
id: 'sonar',
|
|
label: 'Sonar (web search)',
|
|
contextWindow: 127_072,
|
|
strengths: ['general', 'reasoning'],
|
|
speedTier: 2,
|
|
},
|
|
{
|
|
id: 'sonar-pro',
|
|
label: 'Sonar Pro (web search)',
|
|
contextWindow: 200_000,
|
|
strengths: ['general', 'reasoning'],
|
|
speedTier: 3,
|
|
},
|
|
],
|
|
},
|
|
];
|
|
|
|
/**
|
|
* Default free-tier provider configurations.
|
|
* All use OpenAI-compatible /v1/chat/completions endpoints.
|
|
*/
|
|
export const DEFAULT_PROVIDERS: ProviderConfig[] = [
|
|
// ── Groq ─────────────────────────────────────────────────────
|
|
// Free tier: 30 RPM, 14.4K TPM (large), 30K TPM (small)
|
|
{
|
|
name: 'groq',
|
|
baseUrl: 'https://api.groq.com/openai/v1',
|
|
apiKeyEnv: 'GROQ_API_KEY',
|
|
rpmLimit: 30,
|
|
tpmLimit: 14_400,
|
|
models: [
|
|
{
|
|
id: 'llama-3.3-70b-versatile',
|
|
label: 'Llama 3.3 70B',
|
|
contextWindow: 128_000,
|
|
strengths: ['general', 'reasoning', 'code'],
|
|
speedTier: 1,
|
|
},
|
|
{
|
|
id: 'llama-3.1-8b-instant',
|
|
label: 'Llama 3.1 8B Instant',
|
|
contextWindow: 128_000,
|
|
strengths: ['general'],
|
|
speedTier: 1,
|
|
},
|
|
{
|
|
id: 'gemma2-9b-it',
|
|
label: 'Gemma 2 9B',
|
|
contextWindow: 8_192,
|
|
strengths: ['general', 'creative'],
|
|
speedTier: 1,
|
|
},
|
|
],
|
|
},
|
|
|
|
// ── OpenRouter ───────────────────────────────────────────────
|
|
// Free models available (rate-limited per model)
|
|
{
|
|
name: 'openrouter',
|
|
baseUrl: 'https://openrouter.ai/api/v1',
|
|
apiKeyEnv: 'OPENROUTER_API_KEY',
|
|
extraHeaders: {
|
|
'HTTP-Referer': 'https://bytelyst.com',
|
|
'X-Title': 'ByteLyst LLM Router',
|
|
},
|
|
rpmLimit: 20,
|
|
tpmLimit: 0,
|
|
models: [
|
|
{
|
|
id: 'deepseek/deepseek-r1:free',
|
|
label: 'DeepSeek R1 (Free)',
|
|
contextWindow: 64_000,
|
|
strengths: ['reasoning', 'code', 'math'],
|
|
speedTier: 3,
|
|
},
|
|
{
|
|
id: 'meta-llama/llama-3.3-70b-instruct:free',
|
|
label: 'Llama 3.3 70B (Free)',
|
|
contextWindow: 128_000,
|
|
strengths: ['general', 'reasoning', 'code'],
|
|
speedTier: 2,
|
|
},
|
|
{
|
|
id: 'google/gemma-2-9b-it:free',
|
|
label: 'Gemma 2 9B (Free)',
|
|
contextWindow: 8_192,
|
|
strengths: ['general', 'creative'],
|
|
speedTier: 2,
|
|
},
|
|
],
|
|
},
|
|
|
|
// ── Together AI ──────────────────────────────────────────────
|
|
// Free tier: limited RPM, several open models
|
|
{
|
|
name: 'together',
|
|
baseUrl: 'https://api.together.xyz/v1',
|
|
apiKeyEnv: 'TOGETHER_API_KEY',
|
|
rpmLimit: 20,
|
|
tpmLimit: 0,
|
|
models: [
|
|
{
|
|
id: 'meta-llama/Llama-3.3-70B-Instruct-Turbo',
|
|
label: 'Llama 3.3 70B Turbo',
|
|
contextWindow: 128_000,
|
|
strengths: ['general', 'reasoning', 'code'],
|
|
speedTier: 2,
|
|
},
|
|
{
|
|
id: 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
|
|
label: 'DeepSeek R1 Distill 70B',
|
|
contextWindow: 128_000,
|
|
strengths: ['reasoning', 'math', 'code'],
|
|
speedTier: 2,
|
|
},
|
|
],
|
|
},
|
|
|
|
// ── Cerebras ─────────────────────────────────────────────────
|
|
// Free inference tier — extremely fast
|
|
{
|
|
name: 'cerebras',
|
|
baseUrl: 'https://api.cerebras.ai/v1',
|
|
apiKeyEnv: 'CEREBRAS_API_KEY',
|
|
rpmLimit: 30,
|
|
tpmLimit: 60_000,
|
|
models: [
|
|
{
|
|
id: 'llama-3.3-70b',
|
|
label: 'Llama 3.3 70B (Cerebras)',
|
|
contextWindow: 128_000,
|
|
strengths: ['general', 'reasoning', 'code'],
|
|
speedTier: 1,
|
|
},
|
|
],
|
|
},
|
|
];
|
|
|
|
function inferStrengths(modelId: string): PromptCategory[] {
|
|
const lower = modelId.toLowerCase();
|
|
const strengths = new Set<PromptCategory>(['general']);
|
|
|
|
if (/coder|code|codestral|starcoder|deepseek/.test(lower)) strengths.add('code');
|
|
if (/r1|reason|think|math/.test(lower)) {
|
|
strengths.add('reasoning');
|
|
strengths.add('math');
|
|
}
|
|
if (/qwen|llama|mistral|chat/.test(lower)) strengths.add('creative');
|
|
|
|
return [...strengths];
|
|
}
|
|
|
|
function inferContextWindow(modelId: string): number {
|
|
const lower = modelId.toLowerCase();
|
|
if (/128k|131072/.test(lower)) return 128_000;
|
|
if (/64k|65536/.test(lower)) return 64_000;
|
|
if (/32k|32768|qwen2\.5/.test(lower)) return 32_768;
|
|
if (/16k|16384/.test(lower)) return 16_384;
|
|
return 8_192;
|
|
}
|
|
|
|
function inferSpeedTier(modelId: string): 1 | 2 | 3 {
|
|
const lower = modelId.toLowerCase();
|
|
if (/0\.5b|1b|3b|7b|mini|tiny/.test(lower)) return 1;
|
|
if (/14b|15b|16b|20b|22b|30b|32b/.test(lower)) return 2;
|
|
return 3;
|
|
}
|
|
|
|
export function createLocalOllamaProvider(
|
|
modelIds: string[],
|
|
baseUrl: string = 'http://localhost:11434/v1'
|
|
): ProviderConfig {
|
|
const models: ModelConfig[] = modelIds.map(modelId => ({
|
|
id: modelId,
|
|
label: modelId,
|
|
contextWindow: inferContextWindow(modelId),
|
|
strengths: inferStrengths(modelId),
|
|
speedTier: inferSpeedTier(modelId),
|
|
}));
|
|
|
|
return {
|
|
name: 'local-ollama',
|
|
baseUrl,
|
|
models,
|
|
rpmLimit: 0,
|
|
tpmLimit: 0,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Filter providers to only those with API keys present in env.
|
|
*/
|
|
export function getAvailableProviders(
|
|
providers: ProviderConfig[] = DEFAULT_PROVIDERS
|
|
): ProviderConfig[] {
|
|
return providers.filter(p => {
|
|
if (!p.apiKeyEnv) return true;
|
|
const key = process.env[p.apiKeyEnv];
|
|
return key !== undefined && key !== '';
|
|
});
|
|
}
|