diff --git a/services/extraction-service/src/modules/tasks/seed.test.ts b/services/extraction-service/src/modules/tasks/seed.test.ts new file mode 100644 index 00000000..563e0626 --- /dev/null +++ b/services/extraction-service/src/modules/tasks/seed.test.ts @@ -0,0 +1,95 @@ +import { describe, it, expect } from 'vitest'; + +/** + * Seed module tests — verify built-in task definitions are well-formed. + * We can't test the actual Cosmos upsert here without mocking, + * so we validate the task shapes via the Zod schema. + */ + +import { ExtractionTaskSchema } from './types.js'; + +// Inline the same task list to verify schema compliance +const BUILTIN_TASK_IDS = [ + 'transcript-extraction', + 'triage', + 'memory-insight', + 'reflection-enrichment', + 'bug-report-extraction', +]; + +describe('seed built-in tasks', () => { + it('defines exactly 5 built-in tasks', () => { + expect(BUILTIN_TASK_IDS).toHaveLength(5); + }); + + it('all task IDs are unique', () => { + const unique = new Set(BUILTIN_TASK_IDS); + expect(unique.size).toBe(BUILTIN_TASK_IDS.length); + }); + + it('transcript-extraction task validates against schema', () => { + const result = ExtractionTaskSchema.safeParse({ + id: 'transcript-extraction', + name: 'Transcript Extraction', + prompt: 'Extract action items, decisions, questions, deadlines, people, and topics.', + classes: ['action_item', 'decision', 'question', 'deadline', 'person', 'topic'], + builtIn: true, + productId: 'lysnrai', + }); + expect(result.success).toBe(true); + }); + + it('triage task validates against schema', () => { + const result = ExtractionTaskSchema.safeParse({ + id: 'triage', + name: 'MindLyst Triage', + prompt: 'Analyze captures and extract topics, entities, actions, emotions, brain signals.', + classes: ['topic', 'entity', 'action', 'emotion', 'date_reference', 'brain_signal'], + builtIn: true, + productId: 'lysnrai', + }); + expect(result.success).toBe(true); + }); + + it('memory-insight task validates against schema', () => { + const result = ExtractionTaskSchema.safeParse({ + id: 'memory-insight', + name: 'Memory Insight Extraction', + prompt: 'Extract patterns, themes, relationships, milestones.', + classes: ['pattern', 'recurring_theme', 'relationship', 'milestone'], + builtIn: true, + productId: 'lysnrai', + }); + expect(result.success).toBe(true); + }); + + it('reflection-enrichment task validates against schema', () => { + const result = ExtractionTaskSchema.safeParse({ + id: 'reflection-enrichment', + name: 'Reflection Enrichment', + prompt: 'Extract emotional states, accomplishments, concerns, goal progress.', + classes: ['emotional_state', 'accomplishment', 'concern', 'goal_progress'], + builtIn: true, + productId: 'lysnrai', + }); + expect(result.success).toBe(true); + }); + + it('bug-report-extraction task validates against schema', () => { + const result = ExtractionTaskSchema.safeParse({ + id: 'bug-report-extraction', + name: 'Bug Report Extraction', + prompt: 'Extract STR, expected, actual, component, severity.', + classes: [ + 'steps_to_reproduce', + 'expected_behavior', + 'actual_behavior', + 'affected_component', + 'severity', + ], + builtIn: true, + productId: 'lysnrai', + }); + expect(result.success).toBe(true); + }); +}); diff --git a/services/extraction-service/src/modules/tasks/seed.ts b/services/extraction-service/src/modules/tasks/seed.ts new file mode 100644 index 00000000..9749dd70 --- /dev/null +++ b/services/extraction-service/src/modules/tasks/seed.ts @@ -0,0 +1,205 @@ +/** + * Seed built-in extraction tasks into Cosmos DB on service startup. + * Idempotent — uses upsert so safe to call on every boot. + */ + +import { upsertTask } from './repository.js'; +import { DEFAULT_PRODUCT_ID } from '../../lib/product-config.js'; +import type { ExtractionTaskDoc } from './types.js'; + +/** + * Built-in task definitions. + * These match the Python task_registry.py definitions exactly. + */ +const BUILTIN_TASKS: Omit[] = [ + { + id: 'transcript-extraction', + name: 'Transcript Extraction', + description: 'Extract structured entities from meeting transcripts and voice notes.', + prompt: + 'Extract action items, decisions, questions, deadlines, people, and topics ' + + 'from the following transcript. Each extraction should be verbatim text from ' + + 'the source with the appropriate classification.', + classes: ['action_item', 'decision', 'question', 'deadline', 'person', 'topic'], + examples: [ + { + text: 'John said we need to ship the feature by Friday. Sarah agreed to handle the testing.', + extractions: [ + { extraction_class: 'deadline', extraction_text: 'ship the feature by Friday' }, + { extraction_class: 'person', extraction_text: 'John' }, + { extraction_class: 'person', extraction_text: 'Sarah' }, + { + extraction_class: 'action_item', + extraction_text: 'Sarah agreed to handle the testing', + }, + { + extraction_class: 'decision', + extraction_text: 'we need to ship the feature by Friday', + }, + ], + }, + ], + defaultModelId: 'gemini-2.5-flash', + builtIn: true, + productId: DEFAULT_PRODUCT_ID, + }, + { + id: 'triage', + name: 'MindLyst Triage', + description: + 'Extract topics, entities, actions, emotions, and brain routing signals from user captures.', + prompt: + 'Analyze the following user capture and extract: topics, named entities, ' + + 'action items, emotional signals, date references, and brain routing signals. ' + + 'Brain signals should include which brain (work, home, money, health, global) ' + + 'the content belongs to with a confidence score.', + classes: ['topic', 'entity', 'action', 'emotion', 'date_reference', 'brain_signal'], + examples: [ + { + text: "Remind me to call the dentist tomorrow about my appointment. I'm stressed about the cost.", + extractions: [ + { extraction_class: 'action', extraction_text: 'call the dentist tomorrow' }, + { extraction_class: 'date_reference', extraction_text: 'tomorrow' }, + { + extraction_class: 'emotion', + extraction_text: 'stressed about the cost', + attributes: { valence: 'negative' }, + }, + { + extraction_class: 'brain_signal', + extraction_text: 'dentist', + attributes: { brain: 'health', confidence: '0.9' }, + }, + { + extraction_class: 'brain_signal', + extraction_text: 'cost', + attributes: { brain: 'money', confidence: '0.6' }, + }, + ], + }, + ], + defaultModelId: 'gemini-2.5-flash', + builtIn: true, + productId: DEFAULT_PRODUCT_ID, + }, + { + id: 'memory-insight', + name: 'Memory Insight Extraction', + description: + 'Extract patterns, recurring themes, relationships, and milestones from accumulated brain memories.', + prompt: + 'Analyze the following collection of memory items and extract: recurring patterns, ' + + 'themes, relationships between items, and milestones.', + classes: ['pattern', 'recurring_theme', 'relationship', 'milestone'], + examples: [ + { + text: 'Item 1: Skipped gym again. Item 2: Feeling tired at work. Item 3: Had coffee at 4pm.', + extractions: [ + { + extraction_class: 'pattern', + extraction_text: 'Skipped gym again', + attributes: { frequency: 'recurring' }, + }, + { + extraction_class: 'relationship', + extraction_text: 'Feeling tired at work', + attributes: { related_to: 'Skipped gym again' }, + }, + ], + }, + ], + defaultModelId: 'gemini-2.5-flash', + builtIn: true, + productId: DEFAULT_PRODUCT_ID, + }, + { + id: 'reflection-enrichment', + name: 'Reflection Enrichment', + description: + 'Extract emotional states, accomplishments, concerns, and goal progress from journal-style text.', + prompt: + 'Analyze the following reflection or journal entry and extract: emotional states, ' + + 'accomplishments, concerns, and goal progress indicators.', + classes: ['emotional_state', 'accomplishment', 'concern', 'goal_progress'], + examples: [ + { + text: "Good day overall. Finally finished the proposal I've been putting off. Still worried about the budget review next week.", + extractions: [ + { + extraction_class: 'emotional_state', + extraction_text: 'Good day overall', + attributes: { valence: 'positive' }, + }, + { extraction_class: 'accomplishment', extraction_text: 'finished the proposal' }, + { + extraction_class: 'concern', + extraction_text: 'worried about the budget review next week', + }, + ], + }, + ], + defaultModelId: 'gemini-2.5-flash', + builtIn: true, + productId: DEFAULT_PRODUCT_ID, + }, + { + id: 'bug-report-extraction', + name: 'Bug Report Extraction', + description: 'Extract structured fields from bug report submissions.', + prompt: + 'Extract steps to reproduce, expected behavior, actual behavior, affected component, ' + + 'and severity from the following bug report.', + classes: [ + 'steps_to_reproduce', + 'expected_behavior', + 'actual_behavior', + 'affected_component', + 'severity', + ], + examples: [ + { + text: 'When I click the save button on the settings page, nothing happens. It should save my preferences. This is a critical issue affecting all users.', + extractions: [ + { + extraction_class: 'steps_to_reproduce', + extraction_text: 'click the save button on the settings page', + }, + { extraction_class: 'actual_behavior', extraction_text: 'nothing happens' }, + { + extraction_class: 'expected_behavior', + extraction_text: 'should save my preferences', + }, + { extraction_class: 'affected_component', extraction_text: 'settings page' }, + { + extraction_class: 'severity', + extraction_text: 'critical issue affecting all users', + attributes: { level: 'critical' }, + }, + ], + }, + ], + defaultModelId: 'gemini-2.5-flash', + builtIn: true, + productId: DEFAULT_PRODUCT_ID, + }, +]; + +/** + * Seed all built-in tasks. Idempotent via upsert. + * Call this during service startup. + */ +export async function seedBuiltInTasks(log?: { + info: (...args: unknown[]) => void; +}): Promise { + let seeded = 0; + for (const task of BUILTIN_TASKS) { + const now = new Date().toISOString(); + await upsertTask({ + ...task, + createdAt: task.createdAt || now, + updatedAt: now, + }); + seeded++; + } + log?.info({ seeded, total: BUILTIN_TASKS.length }, 'built-in extraction tasks seeded'); +}