learning_ai_common_plat/packages/llm-router/src/health.ts
saravanakumardb1 b1b3fe42df feat(llm-router): add @bytelyst/llm-router — pure-code LLM router for free-tier providers
- 4 providers: Groq, OpenRouter, Together AI, Cerebras
- Regex-based prompt classifier (code/math/reasoning/creative/general)
- Instance-level round-robin state (no shared module globals)
- Sliding-window health tracker (latency, error rate, rate-limit rate)
- Auto-fallback on 429/5xx with per-attempt latency tracking
- Telemetry hook for all routing decisions (auto + explicit)
- OpenRouter recommended headers (HTTP-Referer, X-Title)
- 47 tests across 5 test files, zero runtime deps
2026-03-12 13:45:49 -07:00

104 lines
3.3 KiB
TypeScript

import type { HealthSnapshot, RequestRecord } from './types.js';
/**
* Sliding-window health tracker for provider+model pairs.
* Tracks latency, error rates, and rate-limit hits.
*/
export class HealthTracker {
private records = new Map<string, RequestRecord[]>();
private readonly windowMs: number;
private readonly errorThreshold: number;
private readonly rateLimitThreshold: number;
constructor(opts?: { windowMs?: number; errorThreshold?: number; rateLimitThreshold?: number }) {
this.windowMs = opts?.windowMs ?? 60_000;
this.errorThreshold = opts?.errorThreshold ?? 0.5;
this.rateLimitThreshold = opts?.rateLimitThreshold ?? 0.3;
}
private key(provider: string, model: string): string {
return `${provider}::${model}`;
}
private prune(records: RequestRecord[]): RequestRecord[] {
const cutoff = Date.now() - this.windowMs;
return records.filter(r => r.timestamp >= cutoff);
}
/** Record a completed request (success, rate_limit, or error). */
record(provider: string, model: string, entry: RequestRecord): void {
const k = this.key(provider, model);
const existing = this.records.get(k) ?? [];
existing.push(entry);
this.records.set(k, this.prune(existing));
}
/** Get health snapshot for a provider+model pair. */
snapshot(provider: string, model: string): HealthSnapshot {
const k = this.key(provider, model);
const raw = this.records.get(k) ?? [];
const records = this.prune(raw);
this.records.set(k, records);
const total = records.length;
const successes = records.filter(r => r.status === 'success').length;
const rateLimits = records.filter(r => r.status === 'rate_limit').length;
const errors = records.filter(r => r.status === 'error').length;
const successLatencies = records
.filter(r => r.status === 'success')
.map(r => r.latencyMs)
.sort((a, b) => a - b);
const avgLatencyMs =
successLatencies.length > 0
? successLatencies.reduce((a, b) => a + b, 0) / successLatencies.length
: 0;
const p95LatencyMs =
successLatencies.length > 0
? (successLatencies[Math.floor(successLatencies.length * 0.95)] ??
successLatencies[successLatencies.length - 1]!)
: 0;
// Healthy = not too many errors or rate limits
const errorRate = total > 0 ? errors / total : 0;
const rateLimitRate = total > 0 ? rateLimits / total : 0;
const healthy =
total < 3 || // not enough data → assume healthy
(errorRate < this.errorThreshold && rateLimitRate < this.rateLimitThreshold);
return {
provider,
model,
totalRequests: total,
successes,
rateLimits,
errors,
avgLatencyMs: Math.round(avgLatencyMs),
p95LatencyMs: Math.round(p95LatencyMs),
healthy,
};
}
/** Check if a specific provider+model is currently healthy. */
isHealthy(provider: string, model: string): boolean {
return this.snapshot(provider, model).healthy;
}
/** Get all tracked snapshots. */
allSnapshots(): HealthSnapshot[] {
const snapshots: HealthSnapshot[] = [];
for (const k of this.records.keys()) {
const [provider, model] = k.split('::') as [string, string];
snapshots.push(this.snapshot(provider, model));
}
return snapshots;
}
/** Clear all tracking data. */
reset(): void {
this.records.clear();
}
}