learning_ai_common_plat/packages/palace/src/dedup.ts
saravanakumardb1 d1c6cf47c8 feat(palace): add @bytelyst/palace shared package — MemPalace primitives (91 tests)
New shared package: packages/palace/ (@bytelyst/palace)

Modules:
- types.ts — BasePalaceWingDoc, RoomDoc, MemoryDoc, TunnelDoc, KGTripleDoc, DiaryDoc
- halls.ts — HallType union, HALL_PRESETS (notelett/mindlyst/coding), hallFromLabel()
- cosine.ts — cosineSimilarity(), topKByCosine(), normalizeVector()
- dedup.ts — isContentDuplicate(), isExactDuplicate(), findClosestMatch()
- decay.ts — computeDecayedRelevance(), boostRelevance()
- extraction.ts — buildExtractionPrompt(), parseExtractionResponse(), regexFallbackExtraction()
- kg.ts — findContradictions(), mergeTriples(), isTripleCurrent()
- wakeup.ts — buildWakeUpLayers(), truncateToTokenBudget(), WAKEUP_PRESETS
- config.ts — palaceConfigSchema (Zod)

7 test files, 91 tests passing.
Consumed by NoteLett, MindLyst, and future palace-enabled products.
2026-04-10 00:57:00 -07:00

65 lines
1.9 KiB
TypeScript

/**
* Deduplication utilities for palace memories.
*
* Detects near-duplicate content using cosine similarity over embeddings.
* Products handle the Cosmos/DB queries; this module operates on pure data.
*/
import { cosineSimilarity } from './cosine.js';
/**
* Check if a candidate embedding is a near-duplicate of any existing embedding.
*
* @param candidate - Embedding of the new memory
* @param existingEmbeddings - Embeddings of existing memories in the same room/hall
* @param threshold - Cosine similarity threshold (default: 0.90)
* @returns true if any existing embedding exceeds the threshold
*/
export function isContentDuplicate(
candidate: number[],
existingEmbeddings: number[][],
threshold = 0.9
): boolean {
for (const existing of existingEmbeddings) {
if (existing.length !== candidate.length) continue;
if (cosineSimilarity(candidate, existing) > threshold) {
return true;
}
}
return false;
}
/**
* Check if two text strings are exact duplicates after normalization.
* Trims whitespace and lowercases before comparison.
*/
export function isExactDuplicate(a: string, b: string): boolean {
return a.trim().toLowerCase() === b.trim().toLowerCase();
}
/**
* Find the most similar embedding and return its index + score.
* Returns null if no embeddings exist or none exceed minScore.
*/
export function findClosestMatch(
candidate: number[],
existingEmbeddings: number[][],
minScore = 0
): { index: number; score: number } | null {
let bestIndex = -1;
let bestScore = minScore;
for (let i = 0; i < existingEmbeddings.length; i++) {
const existing = existingEmbeddings[i];
if (existing.length !== candidate.length) continue;
const score = cosineSimilarity(candidate, existing);
if (score > bestScore) {
bestScore = score;
bestIndex = i;
}
}
return bestIndex >= 0 ? { index: bestIndex, score: bestScore } : null;
}