New shared package: packages/palace/ (@bytelyst/palace) Modules: - types.ts — BasePalaceWingDoc, RoomDoc, MemoryDoc, TunnelDoc, KGTripleDoc, DiaryDoc - halls.ts — HallType union, HALL_PRESETS (notelett/mindlyst/coding), hallFromLabel() - cosine.ts — cosineSimilarity(), topKByCosine(), normalizeVector() - dedup.ts — isContentDuplicate(), isExactDuplicate(), findClosestMatch() - decay.ts — computeDecayedRelevance(), boostRelevance() - extraction.ts — buildExtractionPrompt(), parseExtractionResponse(), regexFallbackExtraction() - kg.ts — findContradictions(), mergeTriples(), isTripleCurrent() - wakeup.ts — buildWakeUpLayers(), truncateToTokenBudget(), WAKEUP_PRESETS - config.ts — palaceConfigSchema (Zod) 7 test files, 91 tests passing. Consumed by NoteLett, MindLyst, and future palace-enabled products.
65 lines
1.9 KiB
TypeScript
65 lines
1.9 KiB
TypeScript
/**
|
|
* Deduplication utilities for palace memories.
|
|
*
|
|
* Detects near-duplicate content using cosine similarity over embeddings.
|
|
* Products handle the Cosmos/DB queries; this module operates on pure data.
|
|
*/
|
|
|
|
import { cosineSimilarity } from './cosine.js';
|
|
|
|
/**
|
|
* Check if a candidate embedding is a near-duplicate of any existing embedding.
|
|
*
|
|
* @param candidate - Embedding of the new memory
|
|
* @param existingEmbeddings - Embeddings of existing memories in the same room/hall
|
|
* @param threshold - Cosine similarity threshold (default: 0.90)
|
|
* @returns true if any existing embedding exceeds the threshold
|
|
*/
|
|
export function isContentDuplicate(
|
|
candidate: number[],
|
|
existingEmbeddings: number[][],
|
|
threshold = 0.9
|
|
): boolean {
|
|
for (const existing of existingEmbeddings) {
|
|
if (existing.length !== candidate.length) continue;
|
|
if (cosineSimilarity(candidate, existing) > threshold) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Check if two text strings are exact duplicates after normalization.
|
|
* Trims whitespace and lowercases before comparison.
|
|
*/
|
|
export function isExactDuplicate(a: string, b: string): boolean {
|
|
return a.trim().toLowerCase() === b.trim().toLowerCase();
|
|
}
|
|
|
|
/**
|
|
* Find the most similar embedding and return its index + score.
|
|
* Returns null if no embeddings exist or none exceed minScore.
|
|
*/
|
|
export function findClosestMatch(
|
|
candidate: number[],
|
|
existingEmbeddings: number[][],
|
|
minScore = 0
|
|
): { index: number; score: number } | null {
|
|
let bestIndex = -1;
|
|
let bestScore = minScore;
|
|
|
|
for (let i = 0; i < existingEmbeddings.length; i++) {
|
|
const existing = existingEmbeddings[i];
|
|
if (existing.length !== candidate.length) continue;
|
|
|
|
const score = cosineSimilarity(candidate, existing);
|
|
if (score > bestScore) {
|
|
bestScore = score;
|
|
bestIndex = i;
|
|
}
|
|
}
|
|
|
|
return bestIndex >= 0 ? { index: bestIndex, score: bestScore } : null;
|
|
}
|