New shared package: packages/palace/ (@bytelyst/palace) Modules: - types.ts — BasePalaceWingDoc, RoomDoc, MemoryDoc, TunnelDoc, KGTripleDoc, DiaryDoc - halls.ts — HallType union, HALL_PRESETS (notelett/mindlyst/coding), hallFromLabel() - cosine.ts — cosineSimilarity(), topKByCosine(), normalizeVector() - dedup.ts — isContentDuplicate(), isExactDuplicate(), findClosestMatch() - decay.ts — computeDecayedRelevance(), boostRelevance() - extraction.ts — buildExtractionPrompt(), parseExtractionResponse(), regexFallbackExtraction() - kg.ts — findContradictions(), mergeTriples(), isTripleCurrent() - wakeup.ts — buildWakeUpLayers(), truncateToTokenBudget(), WAKEUP_PRESETS - config.ts — palaceConfigSchema (Zod) 7 test files, 91 tests passing. Consumed by NoteLett, MindLyst, and future palace-enabled products.
71 lines
2.0 KiB
TypeScript
71 lines
2.0 KiB
TypeScript
/**
|
|
* Vector similarity utilities for semantic search and deduplication.
|
|
*/
|
|
|
|
/**
|
|
* Compute cosine similarity between two vectors.
|
|
* Returns a value between -1 and 1 (1 = identical direction).
|
|
* Returns 0 if either vector is zero-length or dimensions don't match.
|
|
*/
|
|
export function cosineSimilarity(a: number[], b: number[]): number {
|
|
if (a.length !== b.length || a.length === 0) return 0;
|
|
|
|
let dotProduct = 0;
|
|
let normA = 0;
|
|
let normB = 0;
|
|
|
|
for (let i = 0; i < a.length; i++) {
|
|
dotProduct += a[i] * b[i];
|
|
normA += a[i] * a[i];
|
|
normB += b[i] * b[i];
|
|
}
|
|
|
|
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
if (denominator === 0) return 0;
|
|
|
|
return dotProduct / denominator;
|
|
}
|
|
|
|
/**
|
|
* Normalize a vector to unit length (magnitude = 1).
|
|
* Returns a zero vector if input is zero-length.
|
|
*/
|
|
export function normalizeVector(v: number[]): number[] {
|
|
const magnitude = Math.sqrt(v.reduce((sum, val) => sum + val * val, 0));
|
|
if (magnitude === 0) return v.map(() => 0);
|
|
return v.map(val => val / magnitude);
|
|
}
|
|
|
|
/**
|
|
* Find the top-K most similar items to a query vector.
|
|
*
|
|
* @param query - The query embedding vector
|
|
* @param items - Array of items to search
|
|
* @param getEmbedding - Function to extract embedding from an item (returns undefined if missing)
|
|
* @param k - Maximum number of results to return
|
|
* @param minScore - Minimum cosine similarity score (default: 0)
|
|
* @returns Sorted array of { item, score } pairs, highest score first
|
|
*/
|
|
export function topKByCosine<T>(
|
|
query: number[],
|
|
items: T[],
|
|
getEmbedding: (item: T) => number[] | undefined,
|
|
k: number,
|
|
minScore = 0
|
|
): Array<{ item: T; score: number }> {
|
|
const scored: Array<{ item: T; score: number }> = [];
|
|
|
|
for (const item of items) {
|
|
const embedding = getEmbedding(item);
|
|
if (!embedding || embedding.length === 0) continue;
|
|
|
|
const score = cosineSimilarity(query, embedding);
|
|
if (score >= minScore) {
|
|
scored.push({ item, score });
|
|
}
|
|
}
|
|
|
|
scored.sort((a, b) => b.score - a.score);
|
|
return scored.slice(0, k);
|
|
}
|