116 lines
4.1 KiB
TypeScript
116 lines
4.1 KiB
TypeScript
import type { ClassificationResult, PromptCategory } from './types.js';
|
|
|
|
// ── Keyword patterns for classification ────────────────────────
|
|
|
|
const CODE_PATTERNS = [
|
|
/\b(function|const |let |var |class |import |export |return |async |await )\b/,
|
|
/\b(def |print\(|if __name__|lambda )\b/,
|
|
/[{}();]=>/,
|
|
/```[\s\S]*```/,
|
|
/\b(typescript|javascript|python|rust|golang|java|kotlin|swift|sql|html|css|react|node)\b/i,
|
|
/\b(debug|refactor|compile|build|deploy|lint|test|api|endpoint|route|middleware)\b/i,
|
|
/\b(fix|bug|error|exception|stack trace|undefined|null|NaN)\b/i,
|
|
];
|
|
|
|
const MATH_PATTERNS = [
|
|
/\b(calculate|compute|solve|equation|formula|integral|derivative|matrix)\b/i,
|
|
/\b(probability|statistics|regression|correlation|variance|median|mean)\b/i,
|
|
/\b(algebra|geometry|calculus|theorem|proof|hypothesis)\b/i,
|
|
/[+\-*/^=]{2,}/,
|
|
/\d+\s*[+\-*/^]\s*\d+/,
|
|
];
|
|
|
|
const REASONING_PATTERNS = [
|
|
/\b(explain|analyze|compare|evaluate|reason|logic|argument|conclusion)\b/i,
|
|
/\b(why|how does|what if|pros and cons|trade-?offs|implications)\b/i,
|
|
/\b(step[- ]by[- ]step|chain of thought|think through|break down)\b/i,
|
|
/\b(strategy|approach|methodology|framework|architecture|design)\b/i,
|
|
];
|
|
|
|
const CREATIVE_PATTERNS = [
|
|
/\b(write|compose|draft|create|generate|story|poem|essay|blog|article)\b/i,
|
|
/\b(creative|imaginative|brainstorm|ideas|fiction|narrative|dialogue)\b/i,
|
|
/\b(rewrite|rephrase|summarize|translate|tone|style|voice)\b/i,
|
|
];
|
|
|
|
// ── Token estimation ───────────────────────────────────────────
|
|
|
|
/**
|
|
* Rough token estimate: ~4 chars per token for English text.
|
|
* Good enough for routing decisions.
|
|
*/
|
|
function estimateTokens(text: string): number {
|
|
return Math.ceil(text.length / 4);
|
|
}
|
|
|
|
// ── Classifier ─────────────────────────────────────────────────
|
|
|
|
function countMatches(text: string, patterns: RegExp[]): number {
|
|
let count = 0;
|
|
for (const pattern of patterns) {
|
|
if (pattern.test(text)) count++;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/**
|
|
* Check if messages contain image content parts (vision request).
|
|
* Handles both string content and multipart content arrays.
|
|
*/
|
|
function hasImageContent(messages: { role: string; content: string | unknown[] }[]): boolean {
|
|
for (const msg of messages) {
|
|
if (Array.isArray(msg.content)) {
|
|
for (const part of msg.content) {
|
|
if (
|
|
typeof part === 'object' &&
|
|
part !== null &&
|
|
'type' in part &&
|
|
(part as { type: string }).type === 'image_url'
|
|
) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Classify a prompt into a category based on keyword matching.
|
|
* No LLM needed — pure regex heuristics.
|
|
* Detects vision (image) content and returns 'vision' category when present.
|
|
*/
|
|
export function classifyPrompt(
|
|
messages: { role: string; content: string | unknown[] }[]
|
|
): ClassificationResult {
|
|
// Check for vision content first — image inputs always classify as 'vision'
|
|
if (hasImageContent(messages)) {
|
|
const fullText = messages.map(m => (typeof m.content === 'string' ? m.content : '')).join('\n');
|
|
return { category: 'vision', estimatedTokens: estimateTokens(fullText) + 1000 };
|
|
}
|
|
|
|
const fullText = messages.map(m => (typeof m.content === 'string' ? m.content : '')).join('\n');
|
|
const estimatedTokens = estimateTokens(fullText);
|
|
|
|
const scores: Record<PromptCategory, number> = {
|
|
code: countMatches(fullText, CODE_PATTERNS),
|
|
math: countMatches(fullText, MATH_PATTERNS),
|
|
reasoning: countMatches(fullText, REASONING_PATTERNS),
|
|
creative: countMatches(fullText, CREATIVE_PATTERNS),
|
|
general: 1, // baseline
|
|
vision: 0,
|
|
};
|
|
|
|
// Pick highest scoring category
|
|
let best: PromptCategory = 'general';
|
|
let bestScore = 0;
|
|
for (const [cat, score] of Object.entries(scores) as [PromptCategory, number][]) {
|
|
if (score > bestScore) {
|
|
bestScore = score;
|
|
best = cat;
|
|
}
|
|
}
|
|
|
|
return { category: best, estimatedTokens };
|
|
}
|