learning_ai_common_plat/services/extraction-service/evals/fixtures/golden.json
saravanakumardb1 acd4c3542b feat(extraction-service): scaffold promptfoo eval suite with 19 test cases
- Add evals/promptfoo.yaml: HTTP provider hitting extraction-service API
  covering all 5 built-in tasks (transcript, triage, memory-insight,
  reflection-enrichment, bug-report-extraction)
- Add evals/fixtures/golden.json: machine-readable golden input/output fixtures
- Add evals/run-evals.sh: shell runner with health checks, auth token
  handling, task filtering, and CI mode
- Add evals/README.md: usage docs, prerequisites, cost estimates, CI integration
2026-02-19 12:19:16 -08:00

178 lines
7.3 KiB
JSON

{
"description": "Golden fixtures for extraction-service evals. Each entry defines the minimum expected extraction classes for a given input. Used by the promptfoo eval suite and can also be consumed by custom assertion scripts.",
"version": "1.0.0",
"tasks": {
"transcript-extraction": {
"expectedClasses": ["action_item", "decision", "question", "deadline", "person", "topic"],
"cases": [
{
"id": "tc-001",
"input": "John said we need to ship the feature by Friday. Sarah agreed to handle the testing.",
"mustContainClasses": ["action_item", "deadline", "person"],
"mustContainText": ["friday", "sarah", "john"],
"minExtractions": 3
},
{
"id": "tc-002",
"input": "The team decided to postpone the launch to Q3. Alice will notify all stakeholders by Monday.",
"mustContainClasses": ["decision", "action_item", "person", "deadline"],
"mustContainText": ["q3", "alice"],
"minExtractions": 3
},
{
"id": "tc-003",
"input": "Bob asked: should we use Postgres or Cosmos DB for the new service? No decision was made.",
"mustContainClasses": ["question", "person"],
"mustContainText": ["bob"],
"minExtractions": 2
},
{
"id": "tc-004",
"input": "Maria: I finished the design mockups. Tom: Great, I'll review them by EOD. Maria: Can you also check the mobile screens?",
"mustContainClasses": ["action_item", "person"],
"mustContainText": ["maria", "tom"],
"minExtractions": 3
}
]
},
"triage": {
"expectedClasses": ["topic", "entity", "action", "emotion", "date_reference", "brain_signal"],
"cases": [
{
"id": "tr-001",
"input": "Remind me to call the dentist tomorrow about my appointment. I'm stressed about the cost.",
"mustContainClasses": ["action", "date_reference", "emotion"],
"mustContainBrainSignal": { "brain": "health", "minConfidence": 0.5 },
"minExtractions": 3
},
{
"id": "tr-002",
"input": "Need to finish the Q1 report for my manager by end of week. The presentation is on Thursday.",
"mustContainClasses": ["action", "date_reference"],
"mustContainBrainSignal": { "brain": "work", "minConfidence": 0.5 },
"minExtractions": 2
},
{
"id": "tr-003",
"input": "I need to pay the credit card bill before the 15th or I'll get charged interest.",
"mustContainClasses": ["action", "date_reference"],
"mustContainBrainSignal": { "brain": "money", "minConfidence": 0.5 },
"minExtractions": 2
},
{
"id": "tr-004",
"input": "Feeling really overwhelmed today. Too many things on my plate and I can't focus.",
"mustContainClasses": ["emotion"],
"mustContainEmotionValence": "negative",
"minExtractions": 1
},
{
"id": "tr-005",
"input": "Doctor said I need to exercise more. Also need to check my 401k contributions before year end.",
"mustContainClasses": ["action"],
"mustContainBrainSignals": [
{ "brain": "health", "minConfidence": 0.4 },
{ "brain": "money", "minConfidence": 0.4 }
],
"minExtractions": 2
}
]
},
"memory-insight": {
"expectedClasses": ["pattern", "recurring_theme", "relationship", "milestone"],
"cases": [
{
"id": "mi-001",
"input": "Item 1: Skipped gym again. Item 2: Feeling tired at work. Item 3: Had coffee at 4pm to stay awake. Item 4: Skipped gym for the third time this week.",
"mustContainClasses": ["pattern"],
"mustContainPatternFrequency": "recurring",
"minExtractions": 1
},
{
"id": "mi-002",
"input": "Item 1: Stayed up until 2am coding. Item 2: Missed standup the next morning. Item 3: Felt foggy all day. Item 4: Late night again, can't stop.",
"mustContainClasses": ["pattern", "relationship"],
"minExtractions": 2
},
{
"id": "mi-003",
"input": "Item 1: Started learning Spanish 3 months ago. Item 2: Had first full conversation in Spanish today. Item 3: Completed Duolingo 90-day streak.",
"mustContainClasses": ["milestone"],
"minExtractions": 2
},
{
"id": "mi-004",
"input": "Entry 1: Anxious before the presentation. Entry 2: Nervous about the client call. Entry 3: Worried about the demo tomorrow. Entry 4: Stressed about the board meeting.",
"mustContainClasses": ["recurring_theme"],
"minExtractions": 1
}
]
},
"reflection-enrichment": {
"expectedClasses": ["emotional_state", "accomplishment", "concern", "goal_progress"],
"cases": [
{
"id": "re-001",
"input": "Good day overall. Finally finished the proposal I've been putting off. Still worried about the budget review next week.",
"mustContainClasses": ["accomplishment", "concern", "emotional_state"],
"minExtractions": 3
},
{
"id": "re-002",
"input": "Had a fantastic week. Shipped the new feature, got great feedback from users, and the team celebrated together.",
"mustContainClasses": ["emotional_state", "accomplishment"],
"mustContainEmotionValence": "positive",
"minExtractions": 2
},
{
"id": "re-003",
"input": "I've been trying to read more this year. This month I finished my third book — ahead of my goal of one per month.",
"mustContainClasses": ["goal_progress", "accomplishment"],
"minExtractions": 2
},
{
"id": "re-004",
"input": "Proud of finishing the marathon training plan. But I'm really worried I won't be able to run the actual race — my knee has been acting up.",
"mustContainClasses": ["accomplishment", "concern"],
"minExtractions": 2
}
]
},
"bug-report-extraction": {
"expectedClasses": [
"steps_to_reproduce",
"expected_behavior",
"actual_behavior",
"affected_component",
"severity"
],
"cases": [
{
"id": "br-001",
"input": "When I click the save button on the settings page, nothing happens. It should save my preferences. This is a critical issue affecting all users.",
"mustContainClasses": [
"steps_to_reproduce",
"expected_behavior",
"actual_behavior",
"affected_component",
"severity"
],
"mustContainSeverityLevel": "critical",
"minExtractions": 4
},
{
"id": "br-002",
"input": "Steps: 1) Open login page, 2) Enter valid credentials, 3) Click login. Expected: redirect to dashboard. Actual: spinner shows forever. Affects the login page on mobile.",
"mustContainClasses": [
"steps_to_reproduce",
"expected_behavior",
"actual_behavior",
"affected_component"
],
"minExtractions": 3
}
]
}
}
}