New shared package: packages/palace/ (@bytelyst/palace) Modules: - types.ts — BasePalaceWingDoc, RoomDoc, MemoryDoc, TunnelDoc, KGTripleDoc, DiaryDoc - halls.ts — HallType union, HALL_PRESETS (notelett/mindlyst/coding), hallFromLabel() - cosine.ts — cosineSimilarity(), topKByCosine(), normalizeVector() - dedup.ts — isContentDuplicate(), isExactDuplicate(), findClosestMatch() - decay.ts — computeDecayedRelevance(), boostRelevance() - extraction.ts — buildExtractionPrompt(), parseExtractionResponse(), regexFallbackExtraction() - kg.ts — findContradictions(), mergeTriples(), isTripleCurrent() - wakeup.ts — buildWakeUpLayers(), truncateToTokenBudget(), WAKEUP_PRESETS - config.ts — palaceConfigSchema (Zod) 7 test files, 91 tests passing. Consumed by NoteLett, MindLyst, and future palace-enabled products.
155 lines
5.1 KiB
TypeScript
155 lines
5.1 KiB
TypeScript
/**
|
|
* Memory extraction utilities — prompt building, response parsing, regex fallback.
|
|
*
|
|
* Products call their own LLM provider with the prompt from buildExtractionPrompt(),
|
|
* then pass the response to parseExtractionResponse().
|
|
* If LLM is unavailable, regexFallbackExtraction() provides basic extraction.
|
|
*/
|
|
|
|
import type { ExtractedMemory } from './types.js';
|
|
|
|
export interface ExtractionContext {
|
|
title?: string;
|
|
context?: string;
|
|
hallTypes: readonly string[];
|
|
}
|
|
|
|
/**
|
|
* Build a structured extraction prompt for an LLM.
|
|
*
|
|
* @param content - The text content to extract memories from
|
|
* @param ctx - Context including title, additional context, and allowed hall types
|
|
* @returns A system/user prompt string ready for LLM chat()
|
|
*/
|
|
export function buildExtractionPrompt(content: string, ctx: ExtractionContext): string {
|
|
const hallList = ctx.hallTypes.join(', ');
|
|
const titleLine = ctx.title ? `\nTitle: ${ctx.title}` : '';
|
|
const contextLine = ctx.context ? `\nContext: ${ctx.context}` : '';
|
|
|
|
return `Extract structured memories from the following content.
|
|
|
|
For each distinct memory, return a JSON array where each element has:
|
|
- "hall": one of [${hallList}]
|
|
- "content": the memory summarized in 1-2 sentences
|
|
- "roomSlug": a short kebab-case topic slug (e.g. "auth-migration", "api-design")
|
|
- "entities": array of named entities mentioned (people, projects, technologies, places)
|
|
|
|
Rules:
|
|
- Only extract genuinely important or referenceable facts, decisions, or events
|
|
- Skip trivial or obvious statements
|
|
- Each memory should be self-contained (understandable without the original context)
|
|
- Prefer specific details over vague summaries
|
|
- Return valid JSON only — no markdown fences, no explanation${titleLine}${contextLine}
|
|
|
|
Content:
|
|
${content}`;
|
|
}
|
|
|
|
/**
|
|
* Parse an LLM extraction response into ExtractedMemory[].
|
|
*
|
|
* Handles:
|
|
* - Clean JSON arrays
|
|
* - JSON wrapped in markdown code fences
|
|
* - Malformed JSON (returns empty array)
|
|
*/
|
|
export function parseExtractionResponse(llmOutput: string): ExtractedMemory[] {
|
|
if (!llmOutput || llmOutput.trim().length === 0) return [];
|
|
|
|
let cleaned = llmOutput.trim();
|
|
|
|
// Strip markdown code fences if present
|
|
if (cleaned.startsWith('```')) {
|
|
cleaned = cleaned.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```\s*$/, '');
|
|
}
|
|
|
|
try {
|
|
const parsed = JSON.parse(cleaned);
|
|
|
|
if (!Array.isArray(parsed)) return [];
|
|
|
|
return parsed
|
|
.filter(
|
|
(item: unknown): item is Record<string, unknown> =>
|
|
typeof item === 'object' && item !== null && 'hall' in item && 'content' in item
|
|
)
|
|
.map(item => ({
|
|
hall: String(item.hall || ''),
|
|
content: String(item.content || ''),
|
|
roomSlug: String(item.roomSlug || item.room_slug || 'general'),
|
|
entities: Array.isArray(item.entities) ? item.entities.map(String) : [],
|
|
}));
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Regex-based fallback extraction when LLM is unavailable.
|
|
*
|
|
* Scans for common patterns:
|
|
* - "Decision:" / "Decided:" → decisions
|
|
* - "TODO:" / "Action:" → decisions
|
|
* - "Found:" / "Discovered:" / "Learned:" → discoveries
|
|
* - "Prefer:" / "Always:" / "Never:" → preferences
|
|
* - "Event:" / "Happened:" / date patterns → events
|
|
* - "Tip:" / "Note:" / "Remember:" → advice
|
|
*
|
|
* @param content - Raw text content
|
|
* @returns Array of extracted memories (best-effort)
|
|
*/
|
|
export function regexFallbackExtraction(content: string): ExtractedMemory[] {
|
|
const memories: ExtractedMemory[] = [];
|
|
const lines = content.split('\n');
|
|
|
|
const patterns: Array<{ regex: RegExp; hall: string }> = [
|
|
{ regex: /^(?:decision|decided|resolve[ds]?):\s*(.+)/i, hall: 'decisions' },
|
|
{ regex: /^(?:todo|action|task):\s*(.+)/i, hall: 'decisions' },
|
|
{ regex: /^(?:found|discovered|learned|til):\s*(.+)/i, hall: 'discoveries' },
|
|
{ regex: /^(?:prefer|always|never):\s*(.+)/i, hall: 'preferences' },
|
|
{ regex: /^(?:event|happened|occurred):\s*(.+)/i, hall: 'events' },
|
|
{ regex: /^(?:tip|note|remember|important):\s*(.+)/i, hall: 'advice' },
|
|
{ regex: /^(?:error|bug|issue|broken):\s*(.+)/i, hall: 'errors' },
|
|
{ regex: /^(?:pattern|recurring|trend):\s*(.+)/i, hall: 'patterns' },
|
|
{ regex: /^(?:feeling|mood|emotion):\s*(.+)/i, hall: 'emotions' },
|
|
{ regex: /^(?:insight|observation|noticed):\s*(.+)/i, hall: 'insights' },
|
|
];
|
|
|
|
for (const line of lines) {
|
|
const trimmed = line.replace(/^[\s\-*>#]+/, '').trim();
|
|
if (!trimmed) continue;
|
|
|
|
for (const { regex, hall } of patterns) {
|
|
const match = trimmed.match(regex);
|
|
if (match && match[1]) {
|
|
memories.push({
|
|
hall,
|
|
content: match[1].trim(),
|
|
roomSlug: 'general',
|
|
entities: extractEntities(match[1]),
|
|
});
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return memories;
|
|
}
|
|
|
|
/**
|
|
* Extract simple entities from text (mentions, tags, capitalized phrases).
|
|
*/
|
|
function extractEntities(text: string): string[] {
|
|
const entities = new Set<string>();
|
|
|
|
// @mentions
|
|
const mentions = text.match(/@(\w+)/g);
|
|
if (mentions) mentions.forEach(m => entities.add(m.slice(1)));
|
|
|
|
// #tags
|
|
const tags = text.match(/#(\w+)/g);
|
|
if (tags) tags.forEach(t => entities.add(t.slice(1)));
|
|
|
|
return Array.from(entities);
|
|
}
|