learning_ai_notes/backend/src/lib/embeddings.ts
saravanakumardb1 fe3b0f9b3e feat(backend): add note intelligence — embeddings, auto-summarize, duplicates, suggest-links, knowledge gaps
Phase 2 of Smart Actions Roadmap:

- Create lib/embeddings.ts: embedText(), cosineSimilarity(), stripHtmlForEmbedding()
- Add embedding + summaryArtifactId fields to NoteDoc
- Create lib/note-hooks.ts: runPostSaveHooks() for background AI enrichment
  - backgroundEmbed: compute and store note embedding vectors
  - backgroundAutoSummarize: generate summary artifact for notes > 300 words
  - Both gated behind feature flags (notelett_auto_embed_enabled, notelett_auto_summarize_enabled)
- Add intelligence endpoints to note-prompts routes:
  - POST /api/notes/:id/suggest-tags (F5) — LLM-generated tag suggestions
  - POST /api/notes/:id/check-duplicates (F8) — cosine similarity duplicate detection
  - POST /api/notes/:id/suggest-links (F9) — related note suggestions
  - POST /api/workspaces/:wsId/knowledge-gaps (F12) — workspace gap analysis
  - POST /api/notes/compare (F14) — multi-note comparison
  - POST /api/notes/merge (F13) — multi-note merge
- Add 4 feature flags for intelligence features
- 9 new tests in embeddings.test.ts (cosine similarity, HTML stripping, embedText)
2026-04-06 08:10:26 -07:00

51 lines
1.4 KiB
TypeScript

/**
* Embedding utilities for note intelligence — duplicate detection, related notes, knowledge gaps.
*
* Uses @bytelyst/llm embed() when available, falls back gracefully.
*/
import { llm } from './llm.js';
import { config } from './config.js';
/**
* Generate an embedding vector for a text string.
* Returns null if the LLM provider doesn't support embeddings.
*/
export async function embedText(text: string): Promise<number[] | null> {
const provider = llm();
if (!provider.embed) return null;
const trimmed = text.slice(0, 8000); // Most embedding models cap at ~8k tokens
const res = await provider.embed({
input: trimmed,
model: config.LLM_EMBEDDING_MODEL,
});
return res.embeddings[0] ?? null;
}
/**
* Compute cosine similarity between two embedding vectors.
* Returns a value between -1 and 1 (1 = identical).
*/
export function cosineSimilarity(a: number[], b: number[]): number {
if (a.length !== b.length || a.length === 0) return 0;
let dot = 0;
let magA = 0;
let magB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
magA += a[i] * a[i];
magB += b[i] * b[i];
}
const denom = Math.sqrt(magA) * Math.sqrt(magB);
if (denom === 0) return 0;
return dot / denom;
}
/**
* Strip HTML and normalize whitespace for embedding input.
*/
export function stripHtmlForEmbedding(html: string): string {
return html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
}