- 4 providers: Groq, OpenRouter, Together AI, Cerebras - Regex-based prompt classifier (code/math/reasoning/creative/general) - Instance-level round-robin state (no shared module globals) - Sliding-window health tracker (latency, error rate, rate-limit rate) - Auto-fallback on 429/5xx with per-attempt latency tracking - Telemetry hook for all routing decisions (auto + explicit) - OpenRouter recommended headers (HTTP-Referer, X-Title) - 47 tests across 5 test files, zero runtime deps
104 lines
3.3 KiB
TypeScript
104 lines
3.3 KiB
TypeScript
import type { HealthSnapshot, RequestRecord } from './types.js';
|
|
|
|
/**
|
|
* Sliding-window health tracker for provider+model pairs.
|
|
* Tracks latency, error rates, and rate-limit hits.
|
|
*/
|
|
export class HealthTracker {
|
|
private records = new Map<string, RequestRecord[]>();
|
|
private readonly windowMs: number;
|
|
private readonly errorThreshold: number;
|
|
private readonly rateLimitThreshold: number;
|
|
|
|
constructor(opts?: { windowMs?: number; errorThreshold?: number; rateLimitThreshold?: number }) {
|
|
this.windowMs = opts?.windowMs ?? 60_000;
|
|
this.errorThreshold = opts?.errorThreshold ?? 0.5;
|
|
this.rateLimitThreshold = opts?.rateLimitThreshold ?? 0.3;
|
|
}
|
|
|
|
private key(provider: string, model: string): string {
|
|
return `${provider}::${model}`;
|
|
}
|
|
|
|
private prune(records: RequestRecord[]): RequestRecord[] {
|
|
const cutoff = Date.now() - this.windowMs;
|
|
return records.filter(r => r.timestamp >= cutoff);
|
|
}
|
|
|
|
/** Record a completed request (success, rate_limit, or error). */
|
|
record(provider: string, model: string, entry: RequestRecord): void {
|
|
const k = this.key(provider, model);
|
|
const existing = this.records.get(k) ?? [];
|
|
existing.push(entry);
|
|
this.records.set(k, this.prune(existing));
|
|
}
|
|
|
|
/** Get health snapshot for a provider+model pair. */
|
|
snapshot(provider: string, model: string): HealthSnapshot {
|
|
const k = this.key(provider, model);
|
|
const raw = this.records.get(k) ?? [];
|
|
const records = this.prune(raw);
|
|
this.records.set(k, records);
|
|
|
|
const total = records.length;
|
|
const successes = records.filter(r => r.status === 'success').length;
|
|
const rateLimits = records.filter(r => r.status === 'rate_limit').length;
|
|
const errors = records.filter(r => r.status === 'error').length;
|
|
|
|
const successLatencies = records
|
|
.filter(r => r.status === 'success')
|
|
.map(r => r.latencyMs)
|
|
.sort((a, b) => a - b);
|
|
|
|
const avgLatencyMs =
|
|
successLatencies.length > 0
|
|
? successLatencies.reduce((a, b) => a + b, 0) / successLatencies.length
|
|
: 0;
|
|
|
|
const p95LatencyMs =
|
|
successLatencies.length > 0
|
|
? (successLatencies[Math.floor(successLatencies.length * 0.95)] ??
|
|
successLatencies[successLatencies.length - 1]!)
|
|
: 0;
|
|
|
|
// Healthy = not too many errors or rate limits
|
|
const errorRate = total > 0 ? errors / total : 0;
|
|
const rateLimitRate = total > 0 ? rateLimits / total : 0;
|
|
const healthy =
|
|
total < 3 || // not enough data → assume healthy
|
|
(errorRate < this.errorThreshold && rateLimitRate < this.rateLimitThreshold);
|
|
|
|
return {
|
|
provider,
|
|
model,
|
|
totalRequests: total,
|
|
successes,
|
|
rateLimits,
|
|
errors,
|
|
avgLatencyMs: Math.round(avgLatencyMs),
|
|
p95LatencyMs: Math.round(p95LatencyMs),
|
|
healthy,
|
|
};
|
|
}
|
|
|
|
/** Check if a specific provider+model is currently healthy. */
|
|
isHealthy(provider: string, model: string): boolean {
|
|
return this.snapshot(provider, model).healthy;
|
|
}
|
|
|
|
/** Get all tracked snapshots. */
|
|
allSnapshots(): HealthSnapshot[] {
|
|
const snapshots: HealthSnapshot[] = [];
|
|
for (const k of this.records.keys()) {
|
|
const [provider, model] = k.split('::') as [string, string];
|
|
snapshots.push(this.snapshot(provider, model));
|
|
}
|
|
return snapshots;
|
|
}
|
|
|
|
/** Clear all tracking data. */
|
|
reset(): void {
|
|
this.records.clear();
|
|
}
|
|
}
|