/** * Memory extraction utilities — prompt building, response parsing, regex fallback. * * Products call their own LLM provider with the prompt from buildExtractionPrompt(), * then pass the response to parseExtractionResponse(). * If LLM is unavailable, regexFallbackExtraction() provides basic extraction. */ import type { ExtractedMemory } from './types.js'; export interface ExtractionContext { title?: string; context?: string; hallTypes: readonly string[]; } /** * Build a structured extraction prompt for an LLM. * * @param content - The text content to extract memories from * @param ctx - Context including title, additional context, and allowed hall types * @returns A system/user prompt string ready for LLM chat() */ export function buildExtractionPrompt(content: string, ctx: ExtractionContext): string { const hallList = ctx.hallTypes.join(', '); const titleLine = ctx.title ? `\nTitle: ${ctx.title}` : ''; const contextLine = ctx.context ? `\nContext: ${ctx.context}` : ''; return `Extract structured memories from the following content. For each distinct memory, return a JSON array where each element has: - "hall": one of [${hallList}] - "content": the memory summarized in 1-2 sentences - "roomSlug": a short kebab-case topic slug (e.g. "auth-migration", "api-design") - "entities": array of named entities mentioned (people, projects, technologies, places) Rules: - Only extract genuinely important or referenceable facts, decisions, or events - Skip trivial or obvious statements - Each memory should be self-contained (understandable without the original context) - Prefer specific details over vague summaries - Return valid JSON only — no markdown fences, no explanation${titleLine}${contextLine} Content: ${content}`; } /** * Parse an LLM extraction response into ExtractedMemory[]. * * Handles: * - Clean JSON arrays * - JSON wrapped in markdown code fences * - Malformed JSON (returns empty array) */ export function parseExtractionResponse(llmOutput: string): ExtractedMemory[] { if (!llmOutput || llmOutput.trim().length === 0) return []; let cleaned = llmOutput.trim(); // Strip markdown code fences if present if (cleaned.startsWith('```')) { cleaned = cleaned.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```\s*$/, ''); } try { const parsed = JSON.parse(cleaned); if (!Array.isArray(parsed)) return []; return parsed .filter( (item: unknown): item is Record => typeof item === 'object' && item !== null && 'hall' in item && 'content' in item ) .map(item => ({ hall: String(item.hall || ''), content: String(item.content || ''), roomSlug: String(item.roomSlug || item.room_slug || 'general'), entities: Array.isArray(item.entities) ? item.entities.map(String) : [], })); } catch { return []; } } /** * Regex-based fallback extraction when LLM is unavailable. * * Scans for common patterns: * - "Decision:" / "Decided:" → decisions * - "TODO:" / "Action:" → decisions * - "Found:" / "Discovered:" / "Learned:" → discoveries * - "Prefer:" / "Always:" / "Never:" → preferences * - "Event:" / "Happened:" / date patterns → events * - "Tip:" / "Note:" / "Remember:" → advice * * @param content - Raw text content * @returns Array of extracted memories (best-effort) */ export function regexFallbackExtraction(content: string): ExtractedMemory[] { const memories: ExtractedMemory[] = []; const lines = content.split('\n'); const patterns: Array<{ regex: RegExp; hall: string }> = [ { regex: /^(?:decision|decided|resolve[ds]?):\s*(.+)/i, hall: 'decisions' }, { regex: /^(?:todo|action|task):\s*(.+)/i, hall: 'decisions' }, { regex: /^(?:found|discovered|learned|til):\s*(.+)/i, hall: 'discoveries' }, { regex: /^(?:prefer|always|never):\s*(.+)/i, hall: 'preferences' }, { regex: /^(?:event|happened|occurred):\s*(.+)/i, hall: 'events' }, { regex: /^(?:tip|note|remember|important):\s*(.+)/i, hall: 'advice' }, { regex: /^(?:error|bug|issue|broken):\s*(.+)/i, hall: 'errors' }, { regex: /^(?:pattern|recurring|trend):\s*(.+)/i, hall: 'patterns' }, { regex: /^(?:feeling|mood|emotion):\s*(.+)/i, hall: 'emotions' }, { regex: /^(?:insight|observation|noticed):\s*(.+)/i, hall: 'insights' }, ]; for (const line of lines) { const trimmed = line.replace(/^[\s\-*>#]+/, '').trim(); if (!trimmed) continue; for (const { regex, hall } of patterns) { const match = trimmed.match(regex); if (match && match[1]) { memories.push({ hall, content: match[1].trim(), roomSlug: 'general', entities: extractEntities(match[1]), }); break; } } } return memories; } /** * Extract simple entities from text (mentions, tags, capitalized phrases). */ function extractEntities(text: string): string[] { const entities = new Set(); // @mentions const mentions = text.match(/@(\w+)/g); if (mentions) mentions.forEach(m => entities.add(m.slice(1))); // #tags const tags = text.match(/#(\w+)/g); if (tags) tags.forEach(t => entities.add(t.slice(1))); return Array.from(entities); }