- 4 providers: Groq, OpenRouter, Together AI, Cerebras - Regex-based prompt classifier (code/math/reasoning/creative/general) - Instance-level round-robin state (no shared module globals) - Sliding-window health tracker (latency, error rate, rate-limit rate) - Auto-fallback on 429/5xx with per-attempt latency tracking - Telemetry hook for all routing decisions (auto + explicit) - OpenRouter recommended headers (HTTP-Referer, X-Title) - 47 tests across 5 test files, zero runtime deps
67 lines
2.0 KiB
TypeScript
67 lines
2.0 KiB
TypeScript
import type { ChatCompletionRequest, ChatCompletionResponse, ProviderConfig } from './types.js';
|
|
|
|
/**
|
|
* Send an OpenAI-compatible chat completion request to a provider.
|
|
* Returns the parsed response or throws on HTTP/network errors.
|
|
*/
|
|
export async function sendChatCompletion(
|
|
provider: ProviderConfig,
|
|
modelId: string,
|
|
request: ChatCompletionRequest,
|
|
timeoutMs: number = 30_000
|
|
): Promise<{ response: ChatCompletionResponse; latencyMs: number; status: number }> {
|
|
const apiKey = process.env[provider.apiKeyEnv];
|
|
if (!apiKey) {
|
|
throw new Error(`Missing API key: env var ${provider.apiKeyEnv} is not set`);
|
|
}
|
|
|
|
const url = `${provider.baseUrl}/chat/completions`;
|
|
const headers: Record<string, string> = {
|
|
'Content-Type': 'application/json',
|
|
Authorization: `Bearer ${apiKey}`,
|
|
...provider.extraHeaders,
|
|
};
|
|
|
|
const body = JSON.stringify({
|
|
model: modelId,
|
|
messages: request.messages,
|
|
...(request.temperature !== undefined && { temperature: request.temperature }),
|
|
...(request.max_tokens !== undefined && { max_tokens: request.max_tokens }),
|
|
...(request.top_p !== undefined && { top_p: request.top_p }),
|
|
stream: false,
|
|
});
|
|
|
|
const controller = new AbortController();
|
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
const start = Date.now();
|
|
|
|
try {
|
|
const res = await fetch(url, {
|
|
method: 'POST',
|
|
headers,
|
|
body,
|
|
signal: controller.signal,
|
|
});
|
|
|
|
const latencyMs = Date.now() - start;
|
|
|
|
if (res.status === 429) {
|
|
return {
|
|
response: null as unknown as ChatCompletionResponse,
|
|
latencyMs,
|
|
status: 429,
|
|
};
|
|
}
|
|
|
|
if (!res.ok) {
|
|
const text = await res.text().catch(() => '');
|
|
throw new Error(`${provider.name} returned ${res.status}: ${text.slice(0, 200)}`);
|
|
}
|
|
|
|
const data = (await res.json()) as ChatCompletionResponse;
|
|
return { response: data, latencyMs, status: res.status };
|
|
} finally {
|
|
clearTimeout(timer);
|
|
}
|
|
}
|