Phase 2 of Smart Actions Roadmap: - Create lib/embeddings.ts: embedText(), cosineSimilarity(), stripHtmlForEmbedding() - Add embedding + summaryArtifactId fields to NoteDoc - Create lib/note-hooks.ts: runPostSaveHooks() for background AI enrichment - backgroundEmbed: compute and store note embedding vectors - backgroundAutoSummarize: generate summary artifact for notes > 300 words - Both gated behind feature flags (notelett_auto_embed_enabled, notelett_auto_summarize_enabled) - Add intelligence endpoints to note-prompts routes: - POST /api/notes/:id/suggest-tags (F5) — LLM-generated tag suggestions - POST /api/notes/:id/check-duplicates (F8) — cosine similarity duplicate detection - POST /api/notes/:id/suggest-links (F9) — related note suggestions - POST /api/workspaces/:wsId/knowledge-gaps (F12) — workspace gap analysis - POST /api/notes/compare (F14) — multi-note comparison - POST /api/notes/merge (F13) — multi-note merge - Add 4 feature flags for intelligence features - 9 new tests in embeddings.test.ts (cosine similarity, HTML stripping, embedText)
82 lines
2.4 KiB
TypeScript
82 lines
2.4 KiB
TypeScript
/**
|
|
* Tests for embeddings utility — embedText + cosineSimilarity + stripHtmlForEmbedding.
|
|
*/
|
|
|
|
import { describe, it, expect, vi } from 'vitest';
|
|
import { cosineSimilarity, stripHtmlForEmbedding } from './embeddings.js';
|
|
|
|
vi.mock('./llm.js', () => ({
|
|
llm: vi.fn(() => ({
|
|
embed: vi.fn(async () => ({
|
|
embeddings: [[0.1, 0.2, 0.3, 0.4, 0.5]],
|
|
model: 'mock-embed',
|
|
usage: { promptTokens: 5, completionTokens: 0, totalTokens: 5 },
|
|
})),
|
|
})),
|
|
}));
|
|
vi.mock('./config.js', () => ({
|
|
config: { LLM_EMBEDDING_MODEL: 'text-embedding-3-small' },
|
|
}));
|
|
|
|
describe('cosineSimilarity', () => {
|
|
it('returns 1 for identical vectors', () => {
|
|
const v = [1, 2, 3, 4];
|
|
expect(cosineSimilarity(v, v)).toBeCloseTo(1, 5);
|
|
});
|
|
|
|
it('returns 0 for orthogonal vectors', () => {
|
|
expect(cosineSimilarity([1, 0], [0, 1])).toBeCloseTo(0, 5);
|
|
});
|
|
|
|
it('returns -1 for opposite vectors', () => {
|
|
expect(cosineSimilarity([1, 0], [-1, 0])).toBeCloseTo(-1, 5);
|
|
});
|
|
|
|
it('returns 0 for empty vectors', () => {
|
|
expect(cosineSimilarity([], [])).toBe(0);
|
|
});
|
|
|
|
it('returns 0 for mismatched lengths', () => {
|
|
expect(cosineSimilarity([1, 2], [1, 2, 3])).toBe(0);
|
|
});
|
|
|
|
it('returns 0 for zero vectors', () => {
|
|
expect(cosineSimilarity([0, 0, 0], [1, 2, 3])).toBe(0);
|
|
});
|
|
|
|
it('computes correct similarity for known vectors', () => {
|
|
const a = [1, 2, 3];
|
|
const b = [4, 5, 6];
|
|
// dot = 4+10+18 = 32, |a| = sqrt(14), |b| = sqrt(77)
|
|
const expected = 32 / (Math.sqrt(14) * Math.sqrt(77));
|
|
expect(cosineSimilarity(a, b)).toBeCloseTo(expected, 5);
|
|
});
|
|
});
|
|
|
|
describe('stripHtmlForEmbedding', () => {
|
|
it('strips HTML tags', () => {
|
|
expect(stripHtmlForEmbedding('<p>Hello <b>world</b></p>')).toBe('Hello world');
|
|
});
|
|
|
|
it('collapses whitespace', () => {
|
|
expect(stripHtmlForEmbedding('Hello \n\n world')).toBe('Hello world');
|
|
});
|
|
|
|
it('handles empty string', () => {
|
|
expect(stripHtmlForEmbedding('')).toBe('');
|
|
});
|
|
|
|
it('strips complex HTML', () => {
|
|
const html = '<div class="note"><h1>Title</h1><p>Content with <a href="#">link</a></p></div>';
|
|
expect(stripHtmlForEmbedding(html)).toBe('Title Content with link');
|
|
});
|
|
});
|
|
|
|
describe('embedText', () => {
|
|
it('returns embedding vector', async () => {
|
|
const { embedText } = await import('./embeddings.js');
|
|
const result = await embedText('hello world');
|
|
expect(result).toEqual([0.1, 0.2, 0.3, 0.4, 0.5]);
|
|
});
|
|
});
|