diff --git a/backend/src/lib/embeddings.test.ts b/backend/src/lib/embeddings.test.ts
new file mode 100644
index 0000000..092037f
--- /dev/null
+++ b/backend/src/lib/embeddings.test.ts
@@ -0,0 +1,81 @@
+/**
+ * Tests for embeddings utility — embedText + cosineSimilarity + stripHtmlForEmbedding.
+ */
+
+import { describe, it, expect, vi } from 'vitest';
+import { cosineSimilarity, stripHtmlForEmbedding } from './embeddings.js';
+
+vi.mock('./llm.js', () => ({
+ llm: vi.fn(() => ({
+ embed: vi.fn(async () => ({
+ embeddings: [[0.1, 0.2, 0.3, 0.4, 0.5]],
+ model: 'mock-embed',
+ usage: { promptTokens: 5, completionTokens: 0, totalTokens: 5 },
+ })),
+ })),
+}));
+vi.mock('./config.js', () => ({
+ config: { LLM_EMBEDDING_MODEL: 'text-embedding-3-small' },
+}));
+
+describe('cosineSimilarity', () => {
+ it('returns 1 for identical vectors', () => {
+ const v = [1, 2, 3, 4];
+ expect(cosineSimilarity(v, v)).toBeCloseTo(1, 5);
+ });
+
+ it('returns 0 for orthogonal vectors', () => {
+ expect(cosineSimilarity([1, 0], [0, 1])).toBeCloseTo(0, 5);
+ });
+
+ it('returns -1 for opposite vectors', () => {
+ expect(cosineSimilarity([1, 0], [-1, 0])).toBeCloseTo(-1, 5);
+ });
+
+ it('returns 0 for empty vectors', () => {
+ expect(cosineSimilarity([], [])).toBe(0);
+ });
+
+ it('returns 0 for mismatched lengths', () => {
+ expect(cosineSimilarity([1, 2], [1, 2, 3])).toBe(0);
+ });
+
+ it('returns 0 for zero vectors', () => {
+ expect(cosineSimilarity([0, 0, 0], [1, 2, 3])).toBe(0);
+ });
+
+ it('computes correct similarity for known vectors', () => {
+ const a = [1, 2, 3];
+ const b = [4, 5, 6];
+ // dot = 4+10+18 = 32, |a| = sqrt(14), |b| = sqrt(77)
+ const expected = 32 / (Math.sqrt(14) * Math.sqrt(77));
+ expect(cosineSimilarity(a, b)).toBeCloseTo(expected, 5);
+ });
+});
+
+describe('stripHtmlForEmbedding', () => {
+ it('strips HTML tags', () => {
+ expect(stripHtmlForEmbedding('
Hello world
')).toBe('Hello world');
+ });
+
+ it('collapses whitespace', () => {
+ expect(stripHtmlForEmbedding('Hello \n\n world')).toBe('Hello world');
+ });
+
+ it('handles empty string', () => {
+ expect(stripHtmlForEmbedding('')).toBe('');
+ });
+
+ it('strips complex HTML', () => {
+ const html = '';
+ expect(stripHtmlForEmbedding(html)).toBe('Title Content with link');
+ });
+});
+
+describe('embedText', () => {
+ it('returns embedding vector', async () => {
+ const { embedText } = await import('./embeddings.js');
+ const result = await embedText('hello world');
+ expect(result).toEqual([0.1, 0.2, 0.3, 0.4, 0.5]);
+ });
+});
diff --git a/backend/src/lib/embeddings.ts b/backend/src/lib/embeddings.ts
new file mode 100644
index 0000000..0dcd448
--- /dev/null
+++ b/backend/src/lib/embeddings.ts
@@ -0,0 +1,50 @@
+/**
+ * Embedding utilities for note intelligence — duplicate detection, related notes, knowledge gaps.
+ *
+ * Uses @bytelyst/llm embed() when available, falls back gracefully.
+ */
+
+import { llm } from './llm.js';
+import { config } from './config.js';
+
+/**
+ * Generate an embedding vector for a text string.
+ * Returns null if the LLM provider doesn't support embeddings.
+ */
+export async function embedText(text: string): Promise {
+ const provider = llm();
+ if (!provider.embed) return null;
+
+ const trimmed = text.slice(0, 8000); // Most embedding models cap at ~8k tokens
+ const res = await provider.embed({
+ input: trimmed,
+ model: config.LLM_EMBEDDING_MODEL,
+ });
+ return res.embeddings[0] ?? null;
+}
+
+/**
+ * Compute cosine similarity between two embedding vectors.
+ * Returns a value between -1 and 1 (1 = identical).
+ */
+export function cosineSimilarity(a: number[], b: number[]): number {
+ if (a.length !== b.length || a.length === 0) return 0;
+ let dot = 0;
+ let magA = 0;
+ let magB = 0;
+ for (let i = 0; i < a.length; i++) {
+ dot += a[i] * b[i];
+ magA += a[i] * a[i];
+ magB += b[i] * b[i];
+ }
+ const denom = Math.sqrt(magA) * Math.sqrt(magB);
+ if (denom === 0) return 0;
+ return dot / denom;
+}
+
+/**
+ * Strip HTML and normalize whitespace for embedding input.
+ */
+export function stripHtmlForEmbedding(html: string): string {
+ return html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
+}
diff --git a/backend/src/lib/feature-flags.ts b/backend/src/lib/feature-flags.ts
index bd021b5..047b706 100644
--- a/backend/src/lib/feature-flags.ts
+++ b/backend/src/lib/feature-flags.ts
@@ -13,6 +13,10 @@ const registry = createFlagRegistry({
'copilot.enabled': true,
'chat.rag_enabled': true,
'onboarding.seed_enabled': true,
+ 'notelett_auto_summarize_enabled': false,
+ 'notelett_auto_embed_enabled': false,
+ 'notelett_duplicate_check_enabled': true,
+ 'notelett_suggest_links_enabled': true,
},
enabled: config.FEATURE_FLAGS_ENABLED,
});
diff --git a/backend/src/lib/note-hooks.ts b/backend/src/lib/note-hooks.ts
new file mode 100644
index 0000000..e859c4e
--- /dev/null
+++ b/backend/src/lib/note-hooks.ts
@@ -0,0 +1,138 @@
+/**
+ * Note lifecycle hooks — background AI enrichment triggered after save.
+ *
+ * Runs non-blocking (fire-and-forget) so note save is never delayed.
+ * Gated behind feature flags.
+ */
+
+import { isFeatureEnabled } from './feature-flags.js';
+import { embedText, stripHtmlForEmbedding } from './embeddings.js';
+import { llm } from './llm.js';
+import type { NoteDoc } from '../modules/notes/types.js';
+import { getCollection } from './datastore.js';
+import type { FastifyBaseLogger } from 'fastify';
+
+const MIN_WORDS_FOR_SUMMARY = 300;
+
+/**
+ * Run after a note is created or updated.
+ * Triggers background embedding + auto-summarize if enabled.
+ */
+export function runPostSaveHooks(
+ note: NoteDoc,
+ log: FastifyBaseLogger,
+): void {
+ // Fire-and-forget — errors are logged, never thrown
+ void backgroundEmbed(note, log);
+ void backgroundAutoSummarize(note, log);
+}
+
+/**
+ * Compute and store embedding vector for the note.
+ */
+async function backgroundEmbed(
+ note: NoteDoc,
+ log: FastifyBaseLogger,
+): Promise {
+ if (!isFeatureEnabled('notelett_auto_embed_enabled')) return;
+
+ try {
+ const plainText = stripHtmlForEmbedding(note.body ?? '');
+ if (plainText.length < 20) return; // Too short to embed meaningfully
+
+ const embedding = await embedText(plainText);
+ if (!embedding) return;
+
+ // Update the note document with the embedding (don't overwrite other fields)
+ const col = getCollection('notes', '/workspaceId');
+ const existing = await col.findById(note.id, note.workspaceId);
+ if (!existing) return;
+
+ await col.upsert({ ...existing, embedding });
+ log.debug({ noteId: note.id }, 'note embedding computed');
+ } catch (err) {
+ log.warn({ noteId: note.id, err }, 'background embed failed');
+ }
+}
+
+/**
+ * Auto-generate a summary artifact for long notes.
+ */
+async function backgroundAutoSummarize(
+ note: NoteDoc,
+ log: FastifyBaseLogger,
+): Promise {
+ if (!isFeatureEnabled('notelett_auto_summarize_enabled')) return;
+
+ try {
+ const plainText = stripHtmlForEmbedding(note.body ?? '');
+ const wordCount = plainText.split(/\s+/).filter(Boolean).length;
+ if (wordCount < MIN_WORDS_FOR_SUMMARY) return;
+
+ // Skip if already has a summary
+ if (note.summaryArtifactId) return;
+
+ const provider = llm();
+ const result = await provider.chatCompletion({
+ messages: [
+ { role: 'system', content: 'Create a concise summary (2-4 sentences) of the following note. Return only the summary.' },
+ { role: 'user', content: plainText.slice(0, 8000) },
+ ],
+ temperature: 0.3,
+ maxTokens: 512,
+ });
+
+ const summary = result.content.trim();
+ if (!summary) return;
+
+ // Store as artifact
+ const { createNoteArtifact } = await import('../modules/note-artifacts/repository.js');
+ const now = new Date().toISOString();
+ const artifact = await createNoteArtifact({
+ id: `summary-${note.id}-${Date.now()}`,
+ productId: note.productId,
+ workspaceId: note.workspaceId,
+ userId: note.userId,
+ noteId: note.id,
+ artifactType: 'summary',
+ title: `Auto-summary of ${note.title}`,
+ description: summary,
+ createdAt: now,
+ updatedAt: now,
+ createdBy: 'system',
+ updatedBy: 'system',
+ });
+
+ // Link artifact back to note
+ const col = getCollection('notes', '/workspaceId');
+ const existing = await col.findById(note.id, note.workspaceId);
+ if (existing) {
+ await col.upsert({ ...existing, summaryArtifactId: artifact.id });
+ }
+
+ // Record agent action
+ const { createNoteAgentAction } = await import('../modules/note-agent-actions/repository.js');
+ await createNoteAgentAction({
+ id: `auto-summary-${note.id}-${Date.now()}`,
+ productId: note.productId,
+ workspaceId: note.workspaceId,
+ userId: note.userId,
+ noteId: note.id,
+ actorId: 'system',
+ actorType: 'agent',
+ toolName: 'auto_summarize',
+ actionType: 'auto_enrich',
+ state: 'applied',
+ reason: `Auto-generated summary for note with ${wordCount} words`,
+ afterSummary: summary.slice(0, 200),
+ createdAt: now,
+ updatedAt: now,
+ createdBy: 'system',
+ updatedBy: 'system',
+ });
+
+ log.info({ noteId: note.id, artifactId: artifact.id }, 'auto-summary generated');
+ } catch (err) {
+ log.warn({ noteId: note.id, err }, 'background auto-summarize failed');
+ }
+}
diff --git a/backend/src/modules/note-prompts/routes.ts b/backend/src/modules/note-prompts/routes.ts
index 73a33c7..edab652 100644
--- a/backend/src/modules/note-prompts/routes.ts
+++ b/backend/src/modules/note-prompts/routes.ts
@@ -3,8 +3,11 @@
*/
import type { FastifyInstance } from 'fastify';
+import { z } from 'zod';
import { getUserId, getRequestProductId } from '../../lib/request-context.js';
import { BadRequestError, NotFoundError } from '@bytelyst/errors';
+import { embedText, cosineSimilarity, stripHtmlForEmbedding } from '../../lib/embeddings.js';
+import { llm } from '../../lib/llm.js';
import {
CreatePromptTemplateSchema,
UpdatePromptTemplateSchema,
@@ -121,4 +124,285 @@ export async function notePromptRoutes(app: FastifyInstance): Promise {
return { wordCount, readingTimeMinutes };
});
+
+ // ── Suggest tags via LLM (F5) ──────────────────────────────────
+ app.post('/notes/:id/suggest-tags', async (req) => {
+ const userId = getUserId(req);
+ const productId = getRequestProductId(req);
+ const { id } = req.params as { id: string };
+ const { workspaceId } = req.body as { workspaceId: string };
+ if (!workspaceId) throw new BadRequestError('workspaceId required');
+
+ const note = await noteRepo.getNote(id, workspaceId);
+ if (!note || note.userId !== userId || note.productId !== productId) {
+ throw new NotFoundError('Note not found');
+ }
+
+ const plain = stripHtmlForEmbedding(note.body ?? '');
+ const provider = llm();
+ const result = await provider.chatCompletion({
+ messages: [
+ { role: 'system', content: 'Suggest 3-5 tags for this note. Return ONLY a JSON array of lowercase tag strings, e.g. ["tag1","tag2"]. No other text.' },
+ { role: 'user', content: `Title: ${note.title}\n\n${plain.slice(0, 4000)}` },
+ ],
+ temperature: 0.3,
+ maxTokens: 128,
+ });
+
+ try {
+ const tags = JSON.parse(result.content.trim()) as string[];
+ return { tags: tags.filter((t) => typeof t === 'string').slice(0, 5) };
+ } catch {
+ return { tags: [] };
+ }
+ });
+
+ // ── Duplicate/similar note detection (F8) ───────────────────────
+ const CheckDuplicatesSchema = z.object({
+ workspaceId: z.string().min(1).max(128),
+ threshold: z.coerce.number().min(0).max(1).default(0.85),
+ limit: z.coerce.number().int().min(1).max(20).default(5),
+ });
+
+ app.post('/notes/:id/check-duplicates', async (req) => {
+ const userId = getUserId(req);
+ const productId = getRequestProductId(req);
+ const { id } = req.params as { id: string };
+ const input = CheckDuplicatesSchema.parse(req.body);
+
+ const note = await noteRepo.getNote(id, input.workspaceId);
+ if (!note || note.userId !== userId || note.productId !== productId) {
+ throw new NotFoundError('Note not found');
+ }
+
+ const plain = stripHtmlForEmbedding(note.body ?? '');
+ const noteEmbedding = await embedText(plain);
+ if (!noteEmbedding) {
+ return { duplicates: [], message: 'Embedding not available' };
+ }
+
+ // Fetch all notes in workspace
+ const { items: allNotes } = await noteRepo.listNotes(userId, productId, {
+ workspaceId: input.workspaceId,
+ limit: 100,
+ offset: 0,
+ });
+
+ const duplicates: Array<{ id: string; title: string; similarity: number }> = [];
+
+ for (const other of allNotes) {
+ if (other.id === id) continue;
+
+ let otherEmbedding = other.embedding;
+ if (!otherEmbedding) {
+ const otherPlain = stripHtmlForEmbedding(other.body ?? '');
+ if (otherPlain.length < 20) continue;
+ otherEmbedding = await embedText(otherPlain) ?? undefined;
+ }
+ if (!otherEmbedding) continue;
+
+ const similarity = cosineSimilarity(noteEmbedding, otherEmbedding);
+ if (similarity >= input.threshold) {
+ duplicates.push({ id: other.id, title: other.title, similarity: Math.round(similarity * 100) / 100 });
+ }
+ }
+
+ duplicates.sort((a, b) => b.similarity - a.similarity);
+ return { duplicates: duplicates.slice(0, input.limit) };
+ });
+
+ // ── Suggest related notes to link (F9) ──────────────────────────
+ const SuggestLinksSchema = z.object({
+ workspaceId: z.string().min(1).max(128),
+ threshold: z.coerce.number().min(0).max(1).default(0.6),
+ limit: z.coerce.number().int().min(1).max(10).default(5),
+ });
+
+ app.post('/notes/:id/suggest-links', async (req) => {
+ const userId = getUserId(req);
+ const productId = getRequestProductId(req);
+ const { id } = req.params as { id: string };
+ const input = SuggestLinksSchema.parse(req.body);
+
+ const note = await noteRepo.getNote(id, input.workspaceId);
+ if (!note || note.userId !== userId || note.productId !== productId) {
+ throw new NotFoundError('Note not found');
+ }
+
+ const plain = stripHtmlForEmbedding(note.body ?? '');
+ const noteEmbedding = await embedText(plain);
+ if (!noteEmbedding) {
+ return { suggestions: [], message: 'Embedding not available' };
+ }
+
+ const { items: allNotes } = await noteRepo.listNotes(userId, productId, {
+ workspaceId: input.workspaceId,
+ limit: 100,
+ offset: 0,
+ });
+
+ // Exclude already-linked notes
+ const existingLinks = new Set(note.links ?? []);
+
+ const suggestions: Array<{ id: string; title: string; similarity: number }> = [];
+
+ for (const other of allNotes) {
+ if (other.id === id || existingLinks.has(other.id)) continue;
+
+ let otherEmbedding = other.embedding;
+ if (!otherEmbedding) {
+ const otherPlain = stripHtmlForEmbedding(other.body ?? '');
+ if (otherPlain.length < 20) continue;
+ otherEmbedding = await embedText(otherPlain) ?? undefined;
+ }
+ if (!otherEmbedding) continue;
+
+ const similarity = cosineSimilarity(noteEmbedding, otherEmbedding);
+ if (similarity >= input.threshold) {
+ suggestions.push({ id: other.id, title: other.title, similarity: Math.round(similarity * 100) / 100 });
+ }
+ }
+
+ suggestions.sort((a, b) => b.similarity - a.similarity);
+ return { suggestions: suggestions.slice(0, input.limit) };
+ });
+
+ // ── Knowledge gap detection (F12) ───────────────────────────────
+ app.post('/workspaces/:wsId/knowledge-gaps', async (req) => {
+ const userId = getUserId(req);
+ const productId = getRequestProductId(req);
+ const { wsId } = req.params as { wsId: string };
+
+ const { items: notes } = await noteRepo.listNotes(userId, productId, {
+ workspaceId: wsId,
+ limit: 100,
+ offset: 0,
+ });
+
+ if (notes.length === 0) {
+ return { gaps: [], topicMap: {} };
+ }
+
+ // Build topic summary from note titles and tags
+ const topicLines = notes.map((n) => {
+ const tags = (n.tags ?? []).join(', ');
+ return `- "${n.title}"${tags ? ` [tags: ${tags}]` : ''}`;
+ }).join('\n');
+
+ const provider = llm();
+ const result = await provider.chatCompletion({
+ messages: [
+ {
+ role: 'system',
+ content: `You analyze a workspace of notes and identify knowledge gaps.
+Return a JSON object with:
+- "gaps": array of { "topic": string, "description": string, "suggestedTitle": string }
+- "topicMap": object mapping topic names to counts
+Return ONLY valid JSON, no other text.`,
+ },
+ {
+ role: 'user',
+ content: `This workspace has ${notes.length} notes:\n${topicLines}\n\nIdentify 3-5 topics that are mentioned but under-covered, or important related topics that are missing entirely.`,
+ },
+ ],
+ temperature: 0.5,
+ maxTokens: 1024,
+ });
+
+ try {
+ const parsed = JSON.parse(result.content.trim());
+ return {
+ gaps: parsed.gaps ?? [],
+ topicMap: parsed.topicMap ?? {},
+ };
+ } catch {
+ return { gaps: [], topicMap: {}, raw: result.content };
+ }
+ });
+
+ // ── Compare notes (F14) ─────────────────────────────────────────
+ const CompareNotesSchema = z.object({
+ noteIds: z.array(z.string().min(1)).min(2).max(5),
+ workspaceId: z.string().min(1).max(128),
+ });
+
+ app.post('/notes/compare', async (req) => {
+ const userId = getUserId(req);
+ const productId = getRequestProductId(req);
+ const input = CompareNotesSchema.parse(req.body);
+
+ const notes = await Promise.all(
+ input.noteIds.map((nid) => noteRepo.getNote(nid, input.workspaceId)),
+ );
+
+ const validNotes = notes.filter((n) => n && n.userId === userId && n.productId === productId);
+ if (validNotes.length < 2) {
+ throw new BadRequestError('Need at least 2 accessible notes to compare');
+ }
+
+ const noteSummaries = validNotes.map((n) => {
+ const plain = stripHtmlForEmbedding(n!.body ?? '').slice(0, 2000);
+ return `## ${n!.title}\n${plain}`;
+ }).join('\n\n---\n\n');
+
+ const provider = llm();
+ const result = await provider.chatCompletion({
+ messages: [
+ { role: 'system', content: 'Compare the following notes. Identify similarities, differences, contradictions, and complementary information. Structure your response with clear headings.' },
+ { role: 'user', content: noteSummaries },
+ ],
+ temperature: 0.4,
+ maxTokens: 2048,
+ });
+
+ return {
+ content: result.content,
+ model: result.model,
+ usage: result.usage,
+ noteCount: validNotes.length,
+ };
+ });
+
+ // ── Merge notes (F13) ──────────────────────────────────────────
+ const MergeNotesSchema = z.object({
+ noteIds: z.array(z.string().min(1)).min(2).max(10),
+ workspaceId: z.string().min(1).max(128),
+ });
+
+ app.post('/notes/merge', async (req) => {
+ const userId = getUserId(req);
+ const productId = getRequestProductId(req);
+ const input = MergeNotesSchema.parse(req.body);
+
+ const notes = await Promise.all(
+ input.noteIds.map((nid) => noteRepo.getNote(nid, input.workspaceId)),
+ );
+
+ const validNotes = notes.filter((n) => n && n.userId === userId && n.productId === productId);
+ if (validNotes.length < 2) {
+ throw new BadRequestError('Need at least 2 accessible notes to merge');
+ }
+
+ const noteContents = validNotes.map((n) => {
+ const plain = stripHtmlForEmbedding(n!.body ?? '').slice(0, 3000);
+ return `## ${n!.title}\n${plain}`;
+ }).join('\n\n---\n\n');
+
+ const provider = llm();
+ const result = await provider.chatCompletion({
+ messages: [
+ { role: 'system', content: 'Merge the following notes into a single coherent document. Combine information logically, remove redundancy, preserve all unique facts. Use clear headings and structure.' },
+ { role: 'user', content: noteContents },
+ ],
+ temperature: 0.3,
+ maxTokens: 4096,
+ });
+
+ return {
+ content: result.content,
+ model: result.model,
+ usage: result.usage,
+ sourceNoteIds: validNotes.map((n) => n!.id),
+ };
+ });
}
diff --git a/backend/src/modules/notes/types.ts b/backend/src/modules/notes/types.ts
index 0a7943f..2797151 100644
--- a/backend/src/modules/notes/types.ts
+++ b/backend/src/modules/notes/types.ts
@@ -20,6 +20,8 @@ export interface NoteDoc {
createdBy: string;
updatedBy: string;
agentId?: string;
+ embedding?: number[];
+ summaryArtifactId?: string;
_ts?: number;
_etag?: string;
}