From fe3b0f9b3ed8b6caf6295e4d07c1f5217e64dc10 Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Mon, 6 Apr 2026 08:10:26 -0700 Subject: [PATCH] =?UTF-8?q?feat(backend):=20add=20note=20intelligence=20?= =?UTF-8?q?=E2=80=94=20embeddings,=20auto-summarize,=20duplicates,=20sugge?= =?UTF-8?q?st-links,=20knowledge=20gaps?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 of Smart Actions Roadmap: - Create lib/embeddings.ts: embedText(), cosineSimilarity(), stripHtmlForEmbedding() - Add embedding + summaryArtifactId fields to NoteDoc - Create lib/note-hooks.ts: runPostSaveHooks() for background AI enrichment - backgroundEmbed: compute and store note embedding vectors - backgroundAutoSummarize: generate summary artifact for notes > 300 words - Both gated behind feature flags (notelett_auto_embed_enabled, notelett_auto_summarize_enabled) - Add intelligence endpoints to note-prompts routes: - POST /api/notes/:id/suggest-tags (F5) — LLM-generated tag suggestions - POST /api/notes/:id/check-duplicates (F8) — cosine similarity duplicate detection - POST /api/notes/:id/suggest-links (F9) — related note suggestions - POST /api/workspaces/:wsId/knowledge-gaps (F12) — workspace gap analysis - POST /api/notes/compare (F14) — multi-note comparison - POST /api/notes/merge (F13) — multi-note merge - Add 4 feature flags for intelligence features - 9 new tests in embeddings.test.ts (cosine similarity, HTML stripping, embedText) --- backend/src/lib/embeddings.test.ts | 81 ++++++ backend/src/lib/embeddings.ts | 50 ++++ backend/src/lib/feature-flags.ts | 4 + backend/src/lib/note-hooks.ts | 138 ++++++++++ backend/src/modules/note-prompts/routes.ts | 284 +++++++++++++++++++++ backend/src/modules/notes/types.ts | 2 + 6 files changed, 559 insertions(+) create mode 100644 backend/src/lib/embeddings.test.ts create mode 100644 backend/src/lib/embeddings.ts create mode 100644 backend/src/lib/note-hooks.ts diff --git a/backend/src/lib/embeddings.test.ts b/backend/src/lib/embeddings.test.ts new file mode 100644 index 0000000..092037f --- /dev/null +++ b/backend/src/lib/embeddings.test.ts @@ -0,0 +1,81 @@ +/** + * Tests for embeddings utility — embedText + cosineSimilarity + stripHtmlForEmbedding. + */ + +import { describe, it, expect, vi } from 'vitest'; +import { cosineSimilarity, stripHtmlForEmbedding } from './embeddings.js'; + +vi.mock('./llm.js', () => ({ + llm: vi.fn(() => ({ + embed: vi.fn(async () => ({ + embeddings: [[0.1, 0.2, 0.3, 0.4, 0.5]], + model: 'mock-embed', + usage: { promptTokens: 5, completionTokens: 0, totalTokens: 5 }, + })), + })), +})); +vi.mock('./config.js', () => ({ + config: { LLM_EMBEDDING_MODEL: 'text-embedding-3-small' }, +})); + +describe('cosineSimilarity', () => { + it('returns 1 for identical vectors', () => { + const v = [1, 2, 3, 4]; + expect(cosineSimilarity(v, v)).toBeCloseTo(1, 5); + }); + + it('returns 0 for orthogonal vectors', () => { + expect(cosineSimilarity([1, 0], [0, 1])).toBeCloseTo(0, 5); + }); + + it('returns -1 for opposite vectors', () => { + expect(cosineSimilarity([1, 0], [-1, 0])).toBeCloseTo(-1, 5); + }); + + it('returns 0 for empty vectors', () => { + expect(cosineSimilarity([], [])).toBe(0); + }); + + it('returns 0 for mismatched lengths', () => { + expect(cosineSimilarity([1, 2], [1, 2, 3])).toBe(0); + }); + + it('returns 0 for zero vectors', () => { + expect(cosineSimilarity([0, 0, 0], [1, 2, 3])).toBe(0); + }); + + it('computes correct similarity for known vectors', () => { + const a = [1, 2, 3]; + const b = [4, 5, 6]; + // dot = 4+10+18 = 32, |a| = sqrt(14), |b| = sqrt(77) + const expected = 32 / (Math.sqrt(14) * Math.sqrt(77)); + expect(cosineSimilarity(a, b)).toBeCloseTo(expected, 5); + }); +}); + +describe('stripHtmlForEmbedding', () => { + it('strips HTML tags', () => { + expect(stripHtmlForEmbedding('

Hello world

')).toBe('Hello world'); + }); + + it('collapses whitespace', () => { + expect(stripHtmlForEmbedding('Hello \n\n world')).toBe('Hello world'); + }); + + it('handles empty string', () => { + expect(stripHtmlForEmbedding('')).toBe(''); + }); + + it('strips complex HTML', () => { + const html = '

Title

Content with link

'; + expect(stripHtmlForEmbedding(html)).toBe('Title Content with link'); + }); +}); + +describe('embedText', () => { + it('returns embedding vector', async () => { + const { embedText } = await import('./embeddings.js'); + const result = await embedText('hello world'); + expect(result).toEqual([0.1, 0.2, 0.3, 0.4, 0.5]); + }); +}); diff --git a/backend/src/lib/embeddings.ts b/backend/src/lib/embeddings.ts new file mode 100644 index 0000000..0dcd448 --- /dev/null +++ b/backend/src/lib/embeddings.ts @@ -0,0 +1,50 @@ +/** + * Embedding utilities for note intelligence — duplicate detection, related notes, knowledge gaps. + * + * Uses @bytelyst/llm embed() when available, falls back gracefully. + */ + +import { llm } from './llm.js'; +import { config } from './config.js'; + +/** + * Generate an embedding vector for a text string. + * Returns null if the LLM provider doesn't support embeddings. + */ +export async function embedText(text: string): Promise { + const provider = llm(); + if (!provider.embed) return null; + + const trimmed = text.slice(0, 8000); // Most embedding models cap at ~8k tokens + const res = await provider.embed({ + input: trimmed, + model: config.LLM_EMBEDDING_MODEL, + }); + return res.embeddings[0] ?? null; +} + +/** + * Compute cosine similarity between two embedding vectors. + * Returns a value between -1 and 1 (1 = identical). + */ +export function cosineSimilarity(a: number[], b: number[]): number { + if (a.length !== b.length || a.length === 0) return 0; + let dot = 0; + let magA = 0; + let magB = 0; + for (let i = 0; i < a.length; i++) { + dot += a[i] * b[i]; + magA += a[i] * a[i]; + magB += b[i] * b[i]; + } + const denom = Math.sqrt(magA) * Math.sqrt(magB); + if (denom === 0) return 0; + return dot / denom; +} + +/** + * Strip HTML and normalize whitespace for embedding input. + */ +export function stripHtmlForEmbedding(html: string): string { + return html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim(); +} diff --git a/backend/src/lib/feature-flags.ts b/backend/src/lib/feature-flags.ts index bd021b5..047b706 100644 --- a/backend/src/lib/feature-flags.ts +++ b/backend/src/lib/feature-flags.ts @@ -13,6 +13,10 @@ const registry = createFlagRegistry({ 'copilot.enabled': true, 'chat.rag_enabled': true, 'onboarding.seed_enabled': true, + 'notelett_auto_summarize_enabled': false, + 'notelett_auto_embed_enabled': false, + 'notelett_duplicate_check_enabled': true, + 'notelett_suggest_links_enabled': true, }, enabled: config.FEATURE_FLAGS_ENABLED, }); diff --git a/backend/src/lib/note-hooks.ts b/backend/src/lib/note-hooks.ts new file mode 100644 index 0000000..e859c4e --- /dev/null +++ b/backend/src/lib/note-hooks.ts @@ -0,0 +1,138 @@ +/** + * Note lifecycle hooks — background AI enrichment triggered after save. + * + * Runs non-blocking (fire-and-forget) so note save is never delayed. + * Gated behind feature flags. + */ + +import { isFeatureEnabled } from './feature-flags.js'; +import { embedText, stripHtmlForEmbedding } from './embeddings.js'; +import { llm } from './llm.js'; +import type { NoteDoc } from '../modules/notes/types.js'; +import { getCollection } from './datastore.js'; +import type { FastifyBaseLogger } from 'fastify'; + +const MIN_WORDS_FOR_SUMMARY = 300; + +/** + * Run after a note is created or updated. + * Triggers background embedding + auto-summarize if enabled. + */ +export function runPostSaveHooks( + note: NoteDoc, + log: FastifyBaseLogger, +): void { + // Fire-and-forget — errors are logged, never thrown + void backgroundEmbed(note, log); + void backgroundAutoSummarize(note, log); +} + +/** + * Compute and store embedding vector for the note. + */ +async function backgroundEmbed( + note: NoteDoc, + log: FastifyBaseLogger, +): Promise { + if (!isFeatureEnabled('notelett_auto_embed_enabled')) return; + + try { + const plainText = stripHtmlForEmbedding(note.body ?? ''); + if (plainText.length < 20) return; // Too short to embed meaningfully + + const embedding = await embedText(plainText); + if (!embedding) return; + + // Update the note document with the embedding (don't overwrite other fields) + const col = getCollection('notes', '/workspaceId'); + const existing = await col.findById(note.id, note.workspaceId); + if (!existing) return; + + await col.upsert({ ...existing, embedding }); + log.debug({ noteId: note.id }, 'note embedding computed'); + } catch (err) { + log.warn({ noteId: note.id, err }, 'background embed failed'); + } +} + +/** + * Auto-generate a summary artifact for long notes. + */ +async function backgroundAutoSummarize( + note: NoteDoc, + log: FastifyBaseLogger, +): Promise { + if (!isFeatureEnabled('notelett_auto_summarize_enabled')) return; + + try { + const plainText = stripHtmlForEmbedding(note.body ?? ''); + const wordCount = plainText.split(/\s+/).filter(Boolean).length; + if (wordCount < MIN_WORDS_FOR_SUMMARY) return; + + // Skip if already has a summary + if (note.summaryArtifactId) return; + + const provider = llm(); + const result = await provider.chatCompletion({ + messages: [ + { role: 'system', content: 'Create a concise summary (2-4 sentences) of the following note. Return only the summary.' }, + { role: 'user', content: plainText.slice(0, 8000) }, + ], + temperature: 0.3, + maxTokens: 512, + }); + + const summary = result.content.trim(); + if (!summary) return; + + // Store as artifact + const { createNoteArtifact } = await import('../modules/note-artifacts/repository.js'); + const now = new Date().toISOString(); + const artifact = await createNoteArtifact({ + id: `summary-${note.id}-${Date.now()}`, + productId: note.productId, + workspaceId: note.workspaceId, + userId: note.userId, + noteId: note.id, + artifactType: 'summary', + title: `Auto-summary of ${note.title}`, + description: summary, + createdAt: now, + updatedAt: now, + createdBy: 'system', + updatedBy: 'system', + }); + + // Link artifact back to note + const col = getCollection('notes', '/workspaceId'); + const existing = await col.findById(note.id, note.workspaceId); + if (existing) { + await col.upsert({ ...existing, summaryArtifactId: artifact.id }); + } + + // Record agent action + const { createNoteAgentAction } = await import('../modules/note-agent-actions/repository.js'); + await createNoteAgentAction({ + id: `auto-summary-${note.id}-${Date.now()}`, + productId: note.productId, + workspaceId: note.workspaceId, + userId: note.userId, + noteId: note.id, + actorId: 'system', + actorType: 'agent', + toolName: 'auto_summarize', + actionType: 'auto_enrich', + state: 'applied', + reason: `Auto-generated summary for note with ${wordCount} words`, + afterSummary: summary.slice(0, 200), + createdAt: now, + updatedAt: now, + createdBy: 'system', + updatedBy: 'system', + }); + + log.info({ noteId: note.id, artifactId: artifact.id }, 'auto-summary generated'); + } catch (err) { + log.warn({ noteId: note.id, err }, 'background auto-summarize failed'); + } +} diff --git a/backend/src/modules/note-prompts/routes.ts b/backend/src/modules/note-prompts/routes.ts index 73a33c7..edab652 100644 --- a/backend/src/modules/note-prompts/routes.ts +++ b/backend/src/modules/note-prompts/routes.ts @@ -3,8 +3,11 @@ */ import type { FastifyInstance } from 'fastify'; +import { z } from 'zod'; import { getUserId, getRequestProductId } from '../../lib/request-context.js'; import { BadRequestError, NotFoundError } from '@bytelyst/errors'; +import { embedText, cosineSimilarity, stripHtmlForEmbedding } from '../../lib/embeddings.js'; +import { llm } from '../../lib/llm.js'; import { CreatePromptTemplateSchema, UpdatePromptTemplateSchema, @@ -121,4 +124,285 @@ export async function notePromptRoutes(app: FastifyInstance): Promise { return { wordCount, readingTimeMinutes }; }); + + // ── Suggest tags via LLM (F5) ────────────────────────────────── + app.post('/notes/:id/suggest-tags', async (req) => { + const userId = getUserId(req); + const productId = getRequestProductId(req); + const { id } = req.params as { id: string }; + const { workspaceId } = req.body as { workspaceId: string }; + if (!workspaceId) throw new BadRequestError('workspaceId required'); + + const note = await noteRepo.getNote(id, workspaceId); + if (!note || note.userId !== userId || note.productId !== productId) { + throw new NotFoundError('Note not found'); + } + + const plain = stripHtmlForEmbedding(note.body ?? ''); + const provider = llm(); + const result = await provider.chatCompletion({ + messages: [ + { role: 'system', content: 'Suggest 3-5 tags for this note. Return ONLY a JSON array of lowercase tag strings, e.g. ["tag1","tag2"]. No other text.' }, + { role: 'user', content: `Title: ${note.title}\n\n${plain.slice(0, 4000)}` }, + ], + temperature: 0.3, + maxTokens: 128, + }); + + try { + const tags = JSON.parse(result.content.trim()) as string[]; + return { tags: tags.filter((t) => typeof t === 'string').slice(0, 5) }; + } catch { + return { tags: [] }; + } + }); + + // ── Duplicate/similar note detection (F8) ─────────────────────── + const CheckDuplicatesSchema = z.object({ + workspaceId: z.string().min(1).max(128), + threshold: z.coerce.number().min(0).max(1).default(0.85), + limit: z.coerce.number().int().min(1).max(20).default(5), + }); + + app.post('/notes/:id/check-duplicates', async (req) => { + const userId = getUserId(req); + const productId = getRequestProductId(req); + const { id } = req.params as { id: string }; + const input = CheckDuplicatesSchema.parse(req.body); + + const note = await noteRepo.getNote(id, input.workspaceId); + if (!note || note.userId !== userId || note.productId !== productId) { + throw new NotFoundError('Note not found'); + } + + const plain = stripHtmlForEmbedding(note.body ?? ''); + const noteEmbedding = await embedText(plain); + if (!noteEmbedding) { + return { duplicates: [], message: 'Embedding not available' }; + } + + // Fetch all notes in workspace + const { items: allNotes } = await noteRepo.listNotes(userId, productId, { + workspaceId: input.workspaceId, + limit: 100, + offset: 0, + }); + + const duplicates: Array<{ id: string; title: string; similarity: number }> = []; + + for (const other of allNotes) { + if (other.id === id) continue; + + let otherEmbedding = other.embedding; + if (!otherEmbedding) { + const otherPlain = stripHtmlForEmbedding(other.body ?? ''); + if (otherPlain.length < 20) continue; + otherEmbedding = await embedText(otherPlain) ?? undefined; + } + if (!otherEmbedding) continue; + + const similarity = cosineSimilarity(noteEmbedding, otherEmbedding); + if (similarity >= input.threshold) { + duplicates.push({ id: other.id, title: other.title, similarity: Math.round(similarity * 100) / 100 }); + } + } + + duplicates.sort((a, b) => b.similarity - a.similarity); + return { duplicates: duplicates.slice(0, input.limit) }; + }); + + // ── Suggest related notes to link (F9) ────────────────────────── + const SuggestLinksSchema = z.object({ + workspaceId: z.string().min(1).max(128), + threshold: z.coerce.number().min(0).max(1).default(0.6), + limit: z.coerce.number().int().min(1).max(10).default(5), + }); + + app.post('/notes/:id/suggest-links', async (req) => { + const userId = getUserId(req); + const productId = getRequestProductId(req); + const { id } = req.params as { id: string }; + const input = SuggestLinksSchema.parse(req.body); + + const note = await noteRepo.getNote(id, input.workspaceId); + if (!note || note.userId !== userId || note.productId !== productId) { + throw new NotFoundError('Note not found'); + } + + const plain = stripHtmlForEmbedding(note.body ?? ''); + const noteEmbedding = await embedText(plain); + if (!noteEmbedding) { + return { suggestions: [], message: 'Embedding not available' }; + } + + const { items: allNotes } = await noteRepo.listNotes(userId, productId, { + workspaceId: input.workspaceId, + limit: 100, + offset: 0, + }); + + // Exclude already-linked notes + const existingLinks = new Set(note.links ?? []); + + const suggestions: Array<{ id: string; title: string; similarity: number }> = []; + + for (const other of allNotes) { + if (other.id === id || existingLinks.has(other.id)) continue; + + let otherEmbedding = other.embedding; + if (!otherEmbedding) { + const otherPlain = stripHtmlForEmbedding(other.body ?? ''); + if (otherPlain.length < 20) continue; + otherEmbedding = await embedText(otherPlain) ?? undefined; + } + if (!otherEmbedding) continue; + + const similarity = cosineSimilarity(noteEmbedding, otherEmbedding); + if (similarity >= input.threshold) { + suggestions.push({ id: other.id, title: other.title, similarity: Math.round(similarity * 100) / 100 }); + } + } + + suggestions.sort((a, b) => b.similarity - a.similarity); + return { suggestions: suggestions.slice(0, input.limit) }; + }); + + // ── Knowledge gap detection (F12) ─────────────────────────────── + app.post('/workspaces/:wsId/knowledge-gaps', async (req) => { + const userId = getUserId(req); + const productId = getRequestProductId(req); + const { wsId } = req.params as { wsId: string }; + + const { items: notes } = await noteRepo.listNotes(userId, productId, { + workspaceId: wsId, + limit: 100, + offset: 0, + }); + + if (notes.length === 0) { + return { gaps: [], topicMap: {} }; + } + + // Build topic summary from note titles and tags + const topicLines = notes.map((n) => { + const tags = (n.tags ?? []).join(', '); + return `- "${n.title}"${tags ? ` [tags: ${tags}]` : ''}`; + }).join('\n'); + + const provider = llm(); + const result = await provider.chatCompletion({ + messages: [ + { + role: 'system', + content: `You analyze a workspace of notes and identify knowledge gaps. +Return a JSON object with: +- "gaps": array of { "topic": string, "description": string, "suggestedTitle": string } +- "topicMap": object mapping topic names to counts +Return ONLY valid JSON, no other text.`, + }, + { + role: 'user', + content: `This workspace has ${notes.length} notes:\n${topicLines}\n\nIdentify 3-5 topics that are mentioned but under-covered, or important related topics that are missing entirely.`, + }, + ], + temperature: 0.5, + maxTokens: 1024, + }); + + try { + const parsed = JSON.parse(result.content.trim()); + return { + gaps: parsed.gaps ?? [], + topicMap: parsed.topicMap ?? {}, + }; + } catch { + return { gaps: [], topicMap: {}, raw: result.content }; + } + }); + + // ── Compare notes (F14) ───────────────────────────────────────── + const CompareNotesSchema = z.object({ + noteIds: z.array(z.string().min(1)).min(2).max(5), + workspaceId: z.string().min(1).max(128), + }); + + app.post('/notes/compare', async (req) => { + const userId = getUserId(req); + const productId = getRequestProductId(req); + const input = CompareNotesSchema.parse(req.body); + + const notes = await Promise.all( + input.noteIds.map((nid) => noteRepo.getNote(nid, input.workspaceId)), + ); + + const validNotes = notes.filter((n) => n && n.userId === userId && n.productId === productId); + if (validNotes.length < 2) { + throw new BadRequestError('Need at least 2 accessible notes to compare'); + } + + const noteSummaries = validNotes.map((n) => { + const plain = stripHtmlForEmbedding(n!.body ?? '').slice(0, 2000); + return `## ${n!.title}\n${plain}`; + }).join('\n\n---\n\n'); + + const provider = llm(); + const result = await provider.chatCompletion({ + messages: [ + { role: 'system', content: 'Compare the following notes. Identify similarities, differences, contradictions, and complementary information. Structure your response with clear headings.' }, + { role: 'user', content: noteSummaries }, + ], + temperature: 0.4, + maxTokens: 2048, + }); + + return { + content: result.content, + model: result.model, + usage: result.usage, + noteCount: validNotes.length, + }; + }); + + // ── Merge notes (F13) ────────────────────────────────────────── + const MergeNotesSchema = z.object({ + noteIds: z.array(z.string().min(1)).min(2).max(10), + workspaceId: z.string().min(1).max(128), + }); + + app.post('/notes/merge', async (req) => { + const userId = getUserId(req); + const productId = getRequestProductId(req); + const input = MergeNotesSchema.parse(req.body); + + const notes = await Promise.all( + input.noteIds.map((nid) => noteRepo.getNote(nid, input.workspaceId)), + ); + + const validNotes = notes.filter((n) => n && n.userId === userId && n.productId === productId); + if (validNotes.length < 2) { + throw new BadRequestError('Need at least 2 accessible notes to merge'); + } + + const noteContents = validNotes.map((n) => { + const plain = stripHtmlForEmbedding(n!.body ?? '').slice(0, 3000); + return `## ${n!.title}\n${plain}`; + }).join('\n\n---\n\n'); + + const provider = llm(); + const result = await provider.chatCompletion({ + messages: [ + { role: 'system', content: 'Merge the following notes into a single coherent document. Combine information logically, remove redundancy, preserve all unique facts. Use clear headings and structure.' }, + { role: 'user', content: noteContents }, + ], + temperature: 0.3, + maxTokens: 4096, + }); + + return { + content: result.content, + model: result.model, + usage: result.usage, + sourceNoteIds: validNotes.map((n) => n!.id), + }; + }); } diff --git a/backend/src/modules/notes/types.ts b/backend/src/modules/notes/types.ts index 0a7943f..2797151 100644 --- a/backend/src/modules/notes/types.ts +++ b/backend/src/modules/notes/types.ts @@ -20,6 +20,8 @@ export interface NoteDoc { createdBy: string; updatedBy: string; agentId?: string; + embedding?: number[]; + summaryArtifactId?: string; _ts?: number; _etag?: string; }