learning_ai_common_plat/packages/palace/src/extraction.ts
saravanakumardb1 d1c6cf47c8 feat(palace): add @bytelyst/palace shared package — MemPalace primitives (91 tests)
New shared package: packages/palace/ (@bytelyst/palace)

Modules:
- types.ts — BasePalaceWingDoc, RoomDoc, MemoryDoc, TunnelDoc, KGTripleDoc, DiaryDoc
- halls.ts — HallType union, HALL_PRESETS (notelett/mindlyst/coding), hallFromLabel()
- cosine.ts — cosineSimilarity(), topKByCosine(), normalizeVector()
- dedup.ts — isContentDuplicate(), isExactDuplicate(), findClosestMatch()
- decay.ts — computeDecayedRelevance(), boostRelevance()
- extraction.ts — buildExtractionPrompt(), parseExtractionResponse(), regexFallbackExtraction()
- kg.ts — findContradictions(), mergeTriples(), isTripleCurrent()
- wakeup.ts — buildWakeUpLayers(), truncateToTokenBudget(), WAKEUP_PRESETS
- config.ts — palaceConfigSchema (Zod)

7 test files, 91 tests passing.
Consumed by NoteLett, MindLyst, and future palace-enabled products.
2026-04-10 00:57:00 -07:00

155 lines
5.1 KiB
TypeScript

/**
* Memory extraction utilities — prompt building, response parsing, regex fallback.
*
* Products call their own LLM provider with the prompt from buildExtractionPrompt(),
* then pass the response to parseExtractionResponse().
* If LLM is unavailable, regexFallbackExtraction() provides basic extraction.
*/
import type { ExtractedMemory } from './types.js';
export interface ExtractionContext {
title?: string;
context?: string;
hallTypes: readonly string[];
}
/**
* Build a structured extraction prompt for an LLM.
*
* @param content - The text content to extract memories from
* @param ctx - Context including title, additional context, and allowed hall types
* @returns A system/user prompt string ready for LLM chat()
*/
export function buildExtractionPrompt(content: string, ctx: ExtractionContext): string {
const hallList = ctx.hallTypes.join(', ');
const titleLine = ctx.title ? `\nTitle: ${ctx.title}` : '';
const contextLine = ctx.context ? `\nContext: ${ctx.context}` : '';
return `Extract structured memories from the following content.
For each distinct memory, return a JSON array where each element has:
- "hall": one of [${hallList}]
- "content": the memory summarized in 1-2 sentences
- "roomSlug": a short kebab-case topic slug (e.g. "auth-migration", "api-design")
- "entities": array of named entities mentioned (people, projects, technologies, places)
Rules:
- Only extract genuinely important or referenceable facts, decisions, or events
- Skip trivial or obvious statements
- Each memory should be self-contained (understandable without the original context)
- Prefer specific details over vague summaries
- Return valid JSON only — no markdown fences, no explanation${titleLine}${contextLine}
Content:
${content}`;
}
/**
* Parse an LLM extraction response into ExtractedMemory[].
*
* Handles:
* - Clean JSON arrays
* - JSON wrapped in markdown code fences
* - Malformed JSON (returns empty array)
*/
export function parseExtractionResponse(llmOutput: string): ExtractedMemory[] {
if (!llmOutput || llmOutput.trim().length === 0) return [];
let cleaned = llmOutput.trim();
// Strip markdown code fences if present
if (cleaned.startsWith('```')) {
cleaned = cleaned.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```\s*$/, '');
}
try {
const parsed = JSON.parse(cleaned);
if (!Array.isArray(parsed)) return [];
return parsed
.filter(
(item: unknown): item is Record<string, unknown> =>
typeof item === 'object' && item !== null && 'hall' in item && 'content' in item
)
.map(item => ({
hall: String(item.hall || ''),
content: String(item.content || ''),
roomSlug: String(item.roomSlug || item.room_slug || 'general'),
entities: Array.isArray(item.entities) ? item.entities.map(String) : [],
}));
} catch {
return [];
}
}
/**
* Regex-based fallback extraction when LLM is unavailable.
*
* Scans for common patterns:
* - "Decision:" / "Decided:" → decisions
* - "TODO:" / "Action:" → decisions
* - "Found:" / "Discovered:" / "Learned:" → discoveries
* - "Prefer:" / "Always:" / "Never:" → preferences
* - "Event:" / "Happened:" / date patterns → events
* - "Tip:" / "Note:" / "Remember:" → advice
*
* @param content - Raw text content
* @returns Array of extracted memories (best-effort)
*/
export function regexFallbackExtraction(content: string): ExtractedMemory[] {
const memories: ExtractedMemory[] = [];
const lines = content.split('\n');
const patterns: Array<{ regex: RegExp; hall: string }> = [
{ regex: /^(?:decision|decided|resolve[ds]?):\s*(.+)/i, hall: 'decisions' },
{ regex: /^(?:todo|action|task):\s*(.+)/i, hall: 'decisions' },
{ regex: /^(?:found|discovered|learned|til):\s*(.+)/i, hall: 'discoveries' },
{ regex: /^(?:prefer|always|never):\s*(.+)/i, hall: 'preferences' },
{ regex: /^(?:event|happened|occurred):\s*(.+)/i, hall: 'events' },
{ regex: /^(?:tip|note|remember|important):\s*(.+)/i, hall: 'advice' },
{ regex: /^(?:error|bug|issue|broken):\s*(.+)/i, hall: 'errors' },
{ regex: /^(?:pattern|recurring|trend):\s*(.+)/i, hall: 'patterns' },
{ regex: /^(?:feeling|mood|emotion):\s*(.+)/i, hall: 'emotions' },
{ regex: /^(?:insight|observation|noticed):\s*(.+)/i, hall: 'insights' },
];
for (const line of lines) {
const trimmed = line.replace(/^[\s\-*>#]+/, '').trim();
if (!trimmed) continue;
for (const { regex, hall } of patterns) {
const match = trimmed.match(regex);
if (match && match[1]) {
memories.push({
hall,
content: match[1].trim(),
roomSlug: 'general',
entities: extractEntities(match[1]),
});
break;
}
}
}
return memories;
}
/**
* Extract simple entities from text (mentions, tags, capitalized phrases).
*/
function extractEntities(text: string): string[] {
const entities = new Set<string>();
// @mentions
const mentions = text.match(/@(\w+)/g);
if (mentions) mentions.forEach(m => entities.add(m.slice(1)));
// #tags
const tags = text.match(/#(\w+)/g);
if (tags) tags.forEach(t => entities.add(t.slice(1)));
return Array.from(entities);
}