/** * Embedding utilities for note intelligence — duplicate detection, related notes, knowledge gaps. * * Uses @bytelyst/llm embed() when available, falls back gracefully. */ import { llm } from './llm.js'; import { config } from './config.js'; /** * Generate an embedding vector for a text string. * Returns null if the LLM provider doesn't support embeddings. */ export async function embedText(text: string): Promise { const provider = llm(); if (!provider.embed) return null; const trimmed = text.slice(0, 8000); // Most embedding models cap at ~8k tokens const res = await provider.embed({ input: trimmed, model: config.LLM_EMBEDDING_MODEL, }); return res.embeddings[0] ?? null; } /** * Compute cosine similarity between two embedding vectors. * Returns a value between -1 and 1 (1 = identical). */ export function cosineSimilarity(a: number[], b: number[]): number { if (a.length !== b.length || a.length === 0) return 0; let dot = 0; let magA = 0; let magB = 0; for (let i = 0; i < a.length; i++) { dot += a[i] * b[i]; magA += a[i] * a[i]; magB += b[i] * b[i]; } const denom = Math.sqrt(magA) * Math.sqrt(magB); if (denom === 0) return 0; return dot / denom; } /** * Strip HTML and normalize whitespace for embedding input. */ export function stripHtmlForEmbedding(html: string): string { return html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim(); }