learning_ai_common_plat/packages/palace/src/cosine.ts
saravanakumardb1 d1c6cf47c8 feat(palace): add @bytelyst/palace shared package — MemPalace primitives (91 tests)
New shared package: packages/palace/ (@bytelyst/palace)

Modules:
- types.ts — BasePalaceWingDoc, RoomDoc, MemoryDoc, TunnelDoc, KGTripleDoc, DiaryDoc
- halls.ts — HallType union, HALL_PRESETS (notelett/mindlyst/coding), hallFromLabel()
- cosine.ts — cosineSimilarity(), topKByCosine(), normalizeVector()
- dedup.ts — isContentDuplicate(), isExactDuplicate(), findClosestMatch()
- decay.ts — computeDecayedRelevance(), boostRelevance()
- extraction.ts — buildExtractionPrompt(), parseExtractionResponse(), regexFallbackExtraction()
- kg.ts — findContradictions(), mergeTriples(), isTripleCurrent()
- wakeup.ts — buildWakeUpLayers(), truncateToTokenBudget(), WAKEUP_PRESETS
- config.ts — palaceConfigSchema (Zod)

7 test files, 91 tests passing.
Consumed by NoteLett, MindLyst, and future palace-enabled products.
2026-04-10 00:57:00 -07:00

71 lines
2.0 KiB
TypeScript

/**
* Vector similarity utilities for semantic search and deduplication.
*/
/**
* Compute cosine similarity between two vectors.
* Returns a value between -1 and 1 (1 = identical direction).
* Returns 0 if either vector is zero-length or dimensions don't match.
*/
export function cosineSimilarity(a: number[], b: number[]): number {
if (a.length !== b.length || a.length === 0) return 0;
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
if (denominator === 0) return 0;
return dotProduct / denominator;
}
/**
* Normalize a vector to unit length (magnitude = 1).
* Returns a zero vector if input is zero-length.
*/
export function normalizeVector(v: number[]): number[] {
const magnitude = Math.sqrt(v.reduce((sum, val) => sum + val * val, 0));
if (magnitude === 0) return v.map(() => 0);
return v.map(val => val / magnitude);
}
/**
* Find the top-K most similar items to a query vector.
*
* @param query - The query embedding vector
* @param items - Array of items to search
* @param getEmbedding - Function to extract embedding from an item (returns undefined if missing)
* @param k - Maximum number of results to return
* @param minScore - Minimum cosine similarity score (default: 0)
* @returns Sorted array of { item, score } pairs, highest score first
*/
export function topKByCosine<T>(
query: number[],
items: T[],
getEmbedding: (item: T) => number[] | undefined,
k: number,
minScore = 0
): Array<{ item: T; score: number }> {
const scored: Array<{ item: T; score: number }> = [];
for (const item of items) {
const embedding = getEmbedding(item);
if (!embedding || embedding.length === 0) continue;
const score = cosineSimilarity(query, embedding);
if (score >= minScore) {
scored.push({ item, score });
}
}
scored.sort((a, b) => b.score - a.score);
return scored.slice(0, k);
}