- extraction-service: Fastify scaffold (port 4005) with extract/tasks modules - src/lib/: config, errors, cosmos, product-config, python-bridge - src/modules/extract/: types (Zod schemas), routes (POST /extract, batch, models) - src/modules/tasks/: types, repository (Cosmos CRUD), routes (CRUD endpoints) - Python sidecar: FastAPI app, LangExtract wrapper, models, task registry - @bytelyst/extraction package: types, client factory, index exports - Both pnpm build pass clean
202 lines
8.3 KiB
Python
202 lines
8.3 KiB
Python
"""
|
|
Built-in extraction task definitions.
|
|
These are seeded into Cosmos DB on service startup.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
BUILTIN_TASKS: list[dict] = [
|
|
{
|
|
"id": "transcript-extraction",
|
|
"name": "Transcript Extraction",
|
|
"description": "Extract structured entities from meeting transcripts and voice notes.",
|
|
"prompt": (
|
|
"Extract action items, decisions, questions, deadlines, people, and topics "
|
|
"from the following transcript. Each extraction should be verbatim text from "
|
|
"the source with the appropriate classification."
|
|
),
|
|
"classes": ["action_item", "decision", "question", "deadline", "person", "topic"],
|
|
"examples": [
|
|
{
|
|
"text": "John said we need to ship the feature by Friday. Sarah agreed to handle the testing.",
|
|
"extractions": [
|
|
{
|
|
"extraction_class": "deadline",
|
|
"extraction_text": "ship the feature by Friday",
|
|
},
|
|
{
|
|
"extraction_class": "person",
|
|
"extraction_text": "John",
|
|
},
|
|
{
|
|
"extraction_class": "person",
|
|
"extraction_text": "Sarah",
|
|
},
|
|
{
|
|
"extraction_class": "action_item",
|
|
"extraction_text": "Sarah agreed to handle the testing",
|
|
},
|
|
{
|
|
"extraction_class": "decision",
|
|
"extraction_text": "we need to ship the feature by Friday",
|
|
},
|
|
],
|
|
},
|
|
],
|
|
"defaultModelId": "gemini-2.5-flash",
|
|
"builtIn": True,
|
|
},
|
|
{
|
|
"id": "triage",
|
|
"name": "MindLyst Triage",
|
|
"description": "Extract topics, entities, actions, emotions, and brain routing signals from user captures.",
|
|
"prompt": (
|
|
"Analyze the following user capture and extract: topics, named entities, "
|
|
"action items, emotional signals, date references, and brain routing signals. "
|
|
"Brain signals should include which brain (work, home, money, health, global) "
|
|
"the content belongs to with a confidence score."
|
|
),
|
|
"classes": ["topic", "entity", "action", "emotion", "date_reference", "brain_signal"],
|
|
"examples": [
|
|
{
|
|
"text": "Remind me to call the dentist tomorrow about my appointment. I'm stressed about the cost.",
|
|
"extractions": [
|
|
{
|
|
"extraction_class": "action",
|
|
"extraction_text": "call the dentist tomorrow",
|
|
},
|
|
{
|
|
"extraction_class": "date_reference",
|
|
"extraction_text": "tomorrow",
|
|
},
|
|
{
|
|
"extraction_class": "emotion",
|
|
"extraction_text": "stressed about the cost",
|
|
"attributes": {"valence": "negative"},
|
|
},
|
|
{
|
|
"extraction_class": "brain_signal",
|
|
"extraction_text": "dentist",
|
|
"attributes": {"brain": "health", "confidence": "0.9"},
|
|
},
|
|
{
|
|
"extraction_class": "brain_signal",
|
|
"extraction_text": "cost",
|
|
"attributes": {"brain": "money", "confidence": "0.6"},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
"defaultModelId": "gemini-2.5-flash",
|
|
"builtIn": True,
|
|
},
|
|
{
|
|
"id": "memory-insight",
|
|
"name": "Memory Insight Extraction",
|
|
"description": "Extract patterns, recurring themes, relationships, and milestones from accumulated brain memories.",
|
|
"prompt": (
|
|
"Analyze the following collection of memory items and extract: recurring patterns, "
|
|
"themes, relationships between items, and milestones. Focus on cross-cutting insights "
|
|
"that span multiple items."
|
|
),
|
|
"classes": ["pattern", "recurring_theme", "relationship", "milestone"],
|
|
"examples": [
|
|
{
|
|
"text": "Item 1: Skipped gym again. Item 2: Feeling tired at work. Item 3: Had coffee at 4pm.",
|
|
"extractions": [
|
|
{
|
|
"extraction_class": "pattern",
|
|
"extraction_text": "Skipped gym again",
|
|
"attributes": {"frequency": "recurring"},
|
|
},
|
|
{
|
|
"extraction_class": "relationship",
|
|
"extraction_text": "Feeling tired at work",
|
|
"attributes": {"related_to": "Skipped gym again"},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
"defaultModelId": "gemini-2.5-flash",
|
|
"builtIn": True,
|
|
},
|
|
{
|
|
"id": "reflection-enrichment",
|
|
"name": "Reflection Enrichment",
|
|
"description": "Extract emotional states, accomplishments, concerns, and goal progress from journal-style text.",
|
|
"prompt": (
|
|
"Analyze the following reflection or journal entry and extract: emotional states, "
|
|
"accomplishments, concerns, and goal progress indicators."
|
|
),
|
|
"classes": ["emotional_state", "accomplishment", "concern", "goal_progress"],
|
|
"examples": [
|
|
{
|
|
"text": "Good day overall. Finally finished the proposal I've been putting off. Still worried about the budget review next week.",
|
|
"extractions": [
|
|
{
|
|
"extraction_class": "emotional_state",
|
|
"extraction_text": "Good day overall",
|
|
"attributes": {"valence": "positive"},
|
|
},
|
|
{
|
|
"extraction_class": "accomplishment",
|
|
"extraction_text": "finished the proposal",
|
|
},
|
|
{
|
|
"extraction_class": "concern",
|
|
"extraction_text": "worried about the budget review next week",
|
|
},
|
|
],
|
|
},
|
|
],
|
|
"defaultModelId": "gemini-2.5-flash",
|
|
"builtIn": True,
|
|
},
|
|
{
|
|
"id": "bug-report-extraction",
|
|
"name": "Bug Report Extraction",
|
|
"description": "Extract structured fields from bug report submissions.",
|
|
"prompt": (
|
|
"Extract steps to reproduce, expected behavior, actual behavior, affected component, "
|
|
"and severity from the following bug report."
|
|
),
|
|
"classes": [
|
|
"steps_to_reproduce",
|
|
"expected_behavior",
|
|
"actual_behavior",
|
|
"affected_component",
|
|
"severity",
|
|
],
|
|
"examples": [
|
|
{
|
|
"text": "When I click the save button on the settings page, nothing happens. It should save my preferences. This is a critical issue affecting all users.",
|
|
"extractions": [
|
|
{
|
|
"extraction_class": "steps_to_reproduce",
|
|
"extraction_text": "click the save button on the settings page",
|
|
},
|
|
{
|
|
"extraction_class": "actual_behavior",
|
|
"extraction_text": "nothing happens",
|
|
},
|
|
{
|
|
"extraction_class": "expected_behavior",
|
|
"extraction_text": "should save my preferences",
|
|
},
|
|
{
|
|
"extraction_class": "affected_component",
|
|
"extraction_text": "settings page",
|
|
},
|
|
{
|
|
"extraction_class": "severity",
|
|
"extraction_text": "critical issue affecting all users",
|
|
"attributes": {"level": "critical"},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
"defaultModelId": "gemini-2.5-flash",
|
|
"builtIn": True,
|
|
},
|
|
]
|