learning_ai_common_plat/packages/llm-router/src/health.ts

import type { HealthSnapshot, RequestRecord } from './types.js';

/**
 * Sliding-window health tracker for provider+model pairs.
 * Tracks latency, error rates, and rate-limit hits.
 */
export class HealthTracker {
  private records = new Map<string, RequestRecord[]>();
  private readonly windowMs: number;
  private readonly errorThreshold: number;
  private readonly rateLimitThreshold: number;

  constructor(opts?: { windowMs?: number; errorThreshold?: number; rateLimitThreshold?: number }) {
    this.windowMs = opts?.windowMs ?? 60_000;
    this.errorThreshold = opts?.errorThreshold ?? 0.5;
    this.rateLimitThreshold = opts?.rateLimitThreshold ?? 0.3;
  }

  private key(provider: string, model: string): string {
    return `${provider}::${model}`;
  }

  private prune(records: RequestRecord[]): RequestRecord[] {
    const cutoff = Date.now() - this.windowMs;
    return records.filter(r => r.timestamp >= cutoff);
  }

  /** Record a completed request (success, rate_limit, or error). */
  record(provider: string, model: string, entry: RequestRecord): void {
    const k = this.key(provider, model);
    const existing = this.records.get(k) ?? [];
    existing.push(entry);
    this.records.set(k, this.prune(existing));
  }

  /** Get health snapshot for a provider+model pair. */
  snapshot(provider: string, model: string): HealthSnapshot {
    const k = this.key(provider, model);
    const raw = this.records.get(k) ?? [];
    const records = this.prune(raw);
    this.records.set(k, records);

    const total = records.length;
    const successes = records.filter(r => r.status === 'success').length;
    const rateLimits = records.filter(r => r.status === 'rate_limit').length;
    const errors = records.filter(r => r.status === 'error').length;

    const successLatencies = records
      .filter(r => r.status === 'success')
      .map(r => r.latencyMs)
      .sort((a, b) => a - b);

    const avgLatencyMs =
      successLatencies.length > 0
        ? successLatencies.reduce((a, b) => a + b, 0) / successLatencies.length
        : 0;

    const p95LatencyMs =
      successLatencies.length > 0
        ? (successLatencies[Math.floor(successLatencies.length * 0.95)] ??
          successLatencies[successLatencies.length - 1]!)
        : 0;

    // Healthy = not too many errors or rate limits
    const errorRate = total > 0 ? errors / total : 0;
    const rateLimitRate = total > 0 ? rateLimits / total : 0;
    const healthy =
      total < 3 || // not enough data → assume healthy
      (errorRate < this.errorThreshold && rateLimitRate < this.rateLimitThreshold);

    return {
      provider,
      model,
      totalRequests: total,
      successes,
      rateLimits,
      errors,
      avgLatencyMs: Math.round(avgLatencyMs),
      p95LatencyMs: Math.round(p95LatencyMs),
      healthy,
    };
  }

  /** Check if a specific provider+model is currently healthy. */
  isHealthy(provider: string, model: string): boolean {
    return this.snapshot(provider, model).healthy;
  }

  /** Get all tracked snapshots. */
  allSnapshots(): HealthSnapshot[] {
    const snapshots: HealthSnapshot[] = [];
    for (const k of this.records.keys()) {
      const [provider, model] = k.split('::') as [string, string];
      snapshots.push(this.snapshot(provider, model));
    }
    return snapshots;
  }

  /** Clear all tracking data. */
  reset(): void {
    this.records.clear();
  }
}