learning_ai_common_plat/packages/llm-router/src/registry.ts

187 lines
5.4 KiB
TypeScript

import type { ModelConfig, PromptCategory, ProviderConfig } from './types.js';
/**
* Default free-tier provider configurations.
* All use OpenAI-compatible /v1/chat/completions endpoints.
*/
export const DEFAULT_PROVIDERS: ProviderConfig[] = [
// ── Groq ─────────────────────────────────────────────────────
// Free tier: 30 RPM, 14.4K TPM (large), 30K TPM (small)
{
name: 'groq',
baseUrl: 'https://api.groq.com/openai/v1',
apiKeyEnv: 'GROQ_API_KEY',
rpmLimit: 30,
tpmLimit: 14_400,
models: [
{
id: 'llama-3.3-70b-versatile',
label: 'Llama 3.3 70B',
contextWindow: 128_000,
strengths: ['general', 'reasoning', 'code'],
speedTier: 1,
},
{
id: 'llama-3.1-8b-instant',
label: 'Llama 3.1 8B Instant',
contextWindow: 128_000,
strengths: ['general'],
speedTier: 1,
},
{
id: 'gemma2-9b-it',
label: 'Gemma 2 9B',
contextWindow: 8_192,
strengths: ['general', 'creative'],
speedTier: 1,
},
],
},
// ── OpenRouter ───────────────────────────────────────────────
// Free models available (rate-limited per model)
{
name: 'openrouter',
baseUrl: 'https://openrouter.ai/api/v1',
apiKeyEnv: 'OPENROUTER_API_KEY',
extraHeaders: {
'HTTP-Referer': 'https://bytelyst.com',
'X-Title': 'ByteLyst LLM Router',
},
rpmLimit: 20,
tpmLimit: 0,
models: [
{
id: 'deepseek/deepseek-r1:free',
label: 'DeepSeek R1 (Free)',
contextWindow: 64_000,
strengths: ['reasoning', 'code', 'math'],
speedTier: 3,
},
{
id: 'meta-llama/llama-3.3-70b-instruct:free',
label: 'Llama 3.3 70B (Free)',
contextWindow: 128_000,
strengths: ['general', 'reasoning', 'code'],
speedTier: 2,
},
{
id: 'google/gemma-2-9b-it:free',
label: 'Gemma 2 9B (Free)',
contextWindow: 8_192,
strengths: ['general', 'creative'],
speedTier: 2,
},
],
},
// ── Together AI ──────────────────────────────────────────────
// Free tier: limited RPM, several open models
{
name: 'together',
baseUrl: 'https://api.together.xyz/v1',
apiKeyEnv: 'TOGETHER_API_KEY',
rpmLimit: 20,
tpmLimit: 0,
models: [
{
id: 'meta-llama/Llama-3.3-70B-Instruct-Turbo',
label: 'Llama 3.3 70B Turbo',
contextWindow: 128_000,
strengths: ['general', 'reasoning', 'code'],
speedTier: 2,
},
{
id: 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
label: 'DeepSeek R1 Distill 70B',
contextWindow: 128_000,
strengths: ['reasoning', 'math', 'code'],
speedTier: 2,
},
],
},
// ── Cerebras ─────────────────────────────────────────────────
// Free inference tier — extremely fast
{
name: 'cerebras',
baseUrl: 'https://api.cerebras.ai/v1',
apiKeyEnv: 'CEREBRAS_API_KEY',
rpmLimit: 30,
tpmLimit: 60_000,
models: [
{
id: 'llama-3.3-70b',
label: 'Llama 3.3 70B (Cerebras)',
contextWindow: 128_000,
strengths: ['general', 'reasoning', 'code'],
speedTier: 1,
},
],
},
];
function inferStrengths(modelId: string): PromptCategory[] {
const lower = modelId.toLowerCase();
const strengths = new Set<PromptCategory>(['general']);
if (/coder|code|codestral|starcoder|deepseek/.test(lower)) strengths.add('code');
if (/r1|reason|think|math/.test(lower)) {
strengths.add('reasoning');
strengths.add('math');
}
if (/qwen|llama|mistral|chat/.test(lower)) strengths.add('creative');
return [...strengths];
}
function inferContextWindow(modelId: string): number {
const lower = modelId.toLowerCase();
if (/128k|131072/.test(lower)) return 128_000;
if (/64k|65536/.test(lower)) return 64_000;
if (/32k|32768|qwen2\.5/.test(lower)) return 32_768;
if (/16k|16384/.test(lower)) return 16_384;
return 8_192;
}
function inferSpeedTier(modelId: string): 1 | 2 | 3 {
const lower = modelId.toLowerCase();
if (/0\.5b|1b|3b|7b|mini|tiny/.test(lower)) return 1;
if (/14b|15b|16b|20b|22b|30b|32b/.test(lower)) return 2;
return 3;
}
export function createLocalOllamaProvider(
modelIds: string[],
baseUrl: string = 'http://localhost:11434/v1'
): ProviderConfig {
const models: ModelConfig[] = modelIds.map(modelId => ({
id: modelId,
label: modelId,
contextWindow: inferContextWindow(modelId),
strengths: inferStrengths(modelId),
speedTier: inferSpeedTier(modelId),
}));
return {
name: 'local-ollama',
baseUrl,
models,
rpmLimit: 0,
tpmLimit: 0,
};
}
/**
* Filter providers to only those with API keys present in env.
*/
export function getAvailableProviders(
providers: ProviderConfig[] = DEFAULT_PROVIDERS
): ProviderConfig[] {
return providers.filter(p => {
if (!p.apiKeyEnv) return true;
const key = process.env[p.apiKeyEnv];
return key !== undefined && key !== '';
});
}