Phase 2 of Smart Actions Roadmap: - Create lib/embeddings.ts: embedText(), cosineSimilarity(), stripHtmlForEmbedding() - Add embedding + summaryArtifactId fields to NoteDoc - Create lib/note-hooks.ts: runPostSaveHooks() for background AI enrichment - backgroundEmbed: compute and store note embedding vectors - backgroundAutoSummarize: generate summary artifact for notes > 300 words - Both gated behind feature flags (notelett_auto_embed_enabled, notelett_auto_summarize_enabled) - Add intelligence endpoints to note-prompts routes: - POST /api/notes/:id/suggest-tags (F5) — LLM-generated tag suggestions - POST /api/notes/:id/check-duplicates (F8) — cosine similarity duplicate detection - POST /api/notes/:id/suggest-links (F9) — related note suggestions - POST /api/workspaces/:wsId/knowledge-gaps (F12) — workspace gap analysis - POST /api/notes/compare (F14) — multi-note comparison - POST /api/notes/merge (F13) — multi-note merge - Add 4 feature flags for intelligence features - 9 new tests in embeddings.test.ts (cosine similarity, HTML stripping, embedText)
51 lines
1.4 KiB
TypeScript
51 lines
1.4 KiB
TypeScript
/**
|
|
* Embedding utilities for note intelligence — duplicate detection, related notes, knowledge gaps.
|
|
*
|
|
* Uses @bytelyst/llm embed() when available, falls back gracefully.
|
|
*/
|
|
|
|
import { llm } from './llm.js';
|
|
import { config } from './config.js';
|
|
|
|
/**
|
|
* Generate an embedding vector for a text string.
|
|
* Returns null if the LLM provider doesn't support embeddings.
|
|
*/
|
|
export async function embedText(text: string): Promise<number[] | null> {
|
|
const provider = llm();
|
|
if (!provider.embed) return null;
|
|
|
|
const trimmed = text.slice(0, 8000); // Most embedding models cap at ~8k tokens
|
|
const res = await provider.embed({
|
|
input: trimmed,
|
|
model: config.LLM_EMBEDDING_MODEL,
|
|
});
|
|
return res.embeddings[0] ?? null;
|
|
}
|
|
|
|
/**
|
|
* Compute cosine similarity between two embedding vectors.
|
|
* Returns a value between -1 and 1 (1 = identical).
|
|
*/
|
|
export function cosineSimilarity(a: number[], b: number[]): number {
|
|
if (a.length !== b.length || a.length === 0) return 0;
|
|
let dot = 0;
|
|
let magA = 0;
|
|
let magB = 0;
|
|
for (let i = 0; i < a.length; i++) {
|
|
dot += a[i] * b[i];
|
|
magA += a[i] * a[i];
|
|
magB += b[i] * b[i];
|
|
}
|
|
const denom = Math.sqrt(magA) * Math.sqrt(magB);
|
|
if (denom === 0) return 0;
|
|
return dot / denom;
|
|
}
|
|
|
|
/**
|
|
* Strip HTML and normalize whitespace for embedding input.
|
|
*/
|
|
export function stripHtmlForEmbedding(html: string): string {
|
|
return html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
|
|
}
|