learning_ai_common_plat/packages/llm-router/src/classifier.ts
saravanakumardb1 b1b3fe42df feat(llm-router): add @bytelyst/llm-router — pure-code LLM router for free-tier providers
- 4 providers: Groq, OpenRouter, Together AI, Cerebras
- Regex-based prompt classifier (code/math/reasoning/creative/general)
- Instance-level round-robin state (no shared module globals)
- Sliding-window health tracker (latency, error rate, rate-limit rate)
- Auto-fallback on 429/5xx with per-attempt latency tracking
- Telemetry hook for all routing decisions (auto + explicit)
- OpenRouter recommended headers (HTTP-Referer, X-Title)
- 47 tests across 5 test files, zero runtime deps
2026-03-12 13:45:49 -07:00

86 lines
3.1 KiB
TypeScript

import type { ClassificationResult, PromptCategory } from './types.js';
// ── Keyword patterns for classification ────────────────────────
const CODE_PATTERNS = [
/\b(function|const |let |var |class |import |export |return |async |await )\b/,
/\b(def |print\(|if __name__|lambda )\b/,
/[{}();]=>/,
/```[\s\S]*```/,
/\b(typescript|javascript|python|rust|golang|java|kotlin|swift|sql|html|css|react|node)\b/i,
/\b(debug|refactor|compile|build|deploy|lint|test|api|endpoint|route|middleware)\b/i,
/\b(fix|bug|error|exception|stack trace|undefined|null|NaN)\b/i,
];
const MATH_PATTERNS = [
/\b(calculate|compute|solve|equation|formula|integral|derivative|matrix)\b/i,
/\b(probability|statistics|regression|correlation|variance|median|mean)\b/i,
/\b(algebra|geometry|calculus|theorem|proof|hypothesis)\b/i,
/[+\-*/^=]{2,}/,
/\d+\s*[+\-*/^]\s*\d+/,
];
const REASONING_PATTERNS = [
/\b(explain|analyze|compare|evaluate|reason|logic|argument|conclusion)\b/i,
/\b(why|how does|what if|pros and cons|trade-?offs|implications)\b/i,
/\b(step[- ]by[- ]step|chain of thought|think through|break down)\b/i,
/\b(strategy|approach|methodology|framework|architecture|design)\b/i,
];
const CREATIVE_PATTERNS = [
/\b(write|compose|draft|create|generate|story|poem|essay|blog|article)\b/i,
/\b(creative|imaginative|brainstorm|ideas|fiction|narrative|dialogue)\b/i,
/\b(rewrite|rephrase|summarize|translate|tone|style|voice)\b/i,
];
// ── Token estimation ───────────────────────────────────────────
/**
* Rough token estimate: ~4 chars per token for English text.
* Good enough for routing decisions.
*/
function estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}
// ── Classifier ─────────────────────────────────────────────────
function countMatches(text: string, patterns: RegExp[]): number {
let count = 0;
for (const pattern of patterns) {
if (pattern.test(text)) count++;
}
return count;
}
/**
* Classify a prompt into a category based on keyword matching.
* No LLM needed — pure regex heuristics.
*/
export function classifyPrompt(
messages: { role: string; content: string }[]
): ClassificationResult {
const fullText = messages.map(m => m.content).join('\n');
const estimatedTokens = estimateTokens(fullText);
const scores: Record<PromptCategory, number> = {
code: countMatches(fullText, CODE_PATTERNS),
math: countMatches(fullText, MATH_PATTERNS),
reasoning: countMatches(fullText, REASONING_PATTERNS),
creative: countMatches(fullText, CREATIVE_PATTERNS),
general: 1, // baseline
};
// Pick highest scoring category
let best: PromptCategory = 'general';
let bestScore = 0;
for (const [cat, score] of Object.entries(scores) as [PromptCategory, number][]) {
if (score > bestScore) {
bestScore = score;
best = cat;
}
}
return { category: best, estimatedTokens };
}