learning_ai_common_plat/services/extraction-service/python/src/task_registry.py
saravanakumardb1 c292bb5cc1 feat(extraction): scaffold extraction-service + @bytelyst/extraction package
- extraction-service: Fastify scaffold (port 4005) with extract/tasks modules
- src/lib/: config, errors, cosmos, product-config, python-bridge
- src/modules/extract/: types (Zod schemas), routes (POST /extract, batch, models)
- src/modules/tasks/: types, repository (Cosmos CRUD), routes (CRUD endpoints)
- Python sidecar: FastAPI app, LangExtract wrapper, models, task registry
- @bytelyst/extraction package: types, client factory, index exports
- Both pnpm build pass clean
2026-02-14 13:31:40 -08:00

202 lines
8.3 KiB
Python

"""
Built-in extraction task definitions.
These are seeded into Cosmos DB on service startup.
"""
from __future__ import annotations
BUILTIN_TASKS: list[dict] = [
{
"id": "transcript-extraction",
"name": "Transcript Extraction",
"description": "Extract structured entities from meeting transcripts and voice notes.",
"prompt": (
"Extract action items, decisions, questions, deadlines, people, and topics "
"from the following transcript. Each extraction should be verbatim text from "
"the source with the appropriate classification."
),
"classes": ["action_item", "decision", "question", "deadline", "person", "topic"],
"examples": [
{
"text": "John said we need to ship the feature by Friday. Sarah agreed to handle the testing.",
"extractions": [
{
"extraction_class": "deadline",
"extraction_text": "ship the feature by Friday",
},
{
"extraction_class": "person",
"extraction_text": "John",
},
{
"extraction_class": "person",
"extraction_text": "Sarah",
},
{
"extraction_class": "action_item",
"extraction_text": "Sarah agreed to handle the testing",
},
{
"extraction_class": "decision",
"extraction_text": "we need to ship the feature by Friday",
},
],
},
],
"defaultModelId": "gemini-2.5-flash",
"builtIn": True,
},
{
"id": "triage",
"name": "MindLyst Triage",
"description": "Extract topics, entities, actions, emotions, and brain routing signals from user captures.",
"prompt": (
"Analyze the following user capture and extract: topics, named entities, "
"action items, emotional signals, date references, and brain routing signals. "
"Brain signals should include which brain (work, home, money, health, global) "
"the content belongs to with a confidence score."
),
"classes": ["topic", "entity", "action", "emotion", "date_reference", "brain_signal"],
"examples": [
{
"text": "Remind me to call the dentist tomorrow about my appointment. I'm stressed about the cost.",
"extractions": [
{
"extraction_class": "action",
"extraction_text": "call the dentist tomorrow",
},
{
"extraction_class": "date_reference",
"extraction_text": "tomorrow",
},
{
"extraction_class": "emotion",
"extraction_text": "stressed about the cost",
"attributes": {"valence": "negative"},
},
{
"extraction_class": "brain_signal",
"extraction_text": "dentist",
"attributes": {"brain": "health", "confidence": "0.9"},
},
{
"extraction_class": "brain_signal",
"extraction_text": "cost",
"attributes": {"brain": "money", "confidence": "0.6"},
},
],
},
],
"defaultModelId": "gemini-2.5-flash",
"builtIn": True,
},
{
"id": "memory-insight",
"name": "Memory Insight Extraction",
"description": "Extract patterns, recurring themes, relationships, and milestones from accumulated brain memories.",
"prompt": (
"Analyze the following collection of memory items and extract: recurring patterns, "
"themes, relationships between items, and milestones. Focus on cross-cutting insights "
"that span multiple items."
),
"classes": ["pattern", "recurring_theme", "relationship", "milestone"],
"examples": [
{
"text": "Item 1: Skipped gym again. Item 2: Feeling tired at work. Item 3: Had coffee at 4pm.",
"extractions": [
{
"extraction_class": "pattern",
"extraction_text": "Skipped gym again",
"attributes": {"frequency": "recurring"},
},
{
"extraction_class": "relationship",
"extraction_text": "Feeling tired at work",
"attributes": {"related_to": "Skipped gym again"},
},
],
},
],
"defaultModelId": "gemini-2.5-flash",
"builtIn": True,
},
{
"id": "reflection-enrichment",
"name": "Reflection Enrichment",
"description": "Extract emotional states, accomplishments, concerns, and goal progress from journal-style text.",
"prompt": (
"Analyze the following reflection or journal entry and extract: emotional states, "
"accomplishments, concerns, and goal progress indicators."
),
"classes": ["emotional_state", "accomplishment", "concern", "goal_progress"],
"examples": [
{
"text": "Good day overall. Finally finished the proposal I've been putting off. Still worried about the budget review next week.",
"extractions": [
{
"extraction_class": "emotional_state",
"extraction_text": "Good day overall",
"attributes": {"valence": "positive"},
},
{
"extraction_class": "accomplishment",
"extraction_text": "finished the proposal",
},
{
"extraction_class": "concern",
"extraction_text": "worried about the budget review next week",
},
],
},
],
"defaultModelId": "gemini-2.5-flash",
"builtIn": True,
},
{
"id": "bug-report-extraction",
"name": "Bug Report Extraction",
"description": "Extract structured fields from bug report submissions.",
"prompt": (
"Extract steps to reproduce, expected behavior, actual behavior, affected component, "
"and severity from the following bug report."
),
"classes": [
"steps_to_reproduce",
"expected_behavior",
"actual_behavior",
"affected_component",
"severity",
],
"examples": [
{
"text": "When I click the save button on the settings page, nothing happens. It should save my preferences. This is a critical issue affecting all users.",
"extractions": [
{
"extraction_class": "steps_to_reproduce",
"extraction_text": "click the save button on the settings page",
},
{
"extraction_class": "actual_behavior",
"extraction_text": "nothing happens",
},
{
"extraction_class": "expected_behavior",
"extraction_text": "should save my preferences",
},
{
"extraction_class": "affected_component",
"extraction_text": "settings page",
},
{
"extraction_class": "severity",
"extraction_text": "critical issue affecting all users",
"attributes": {"level": "critical"},
},
],
},
],
"defaultModelId": "gemini-2.5-flash",
"builtIn": True,
},
]