learning_ai_common_plat/services/extraction-service/evals/run-evals.sh
saravanakumardb1 acd4c3542b feat(extraction-service): scaffold promptfoo eval suite with 19 test cases
- Add evals/promptfoo.yaml: HTTP provider hitting extraction-service API
  covering all 5 built-in tasks (transcript, triage, memory-insight,
  reflection-enrichment, bug-report-extraction)
- Add evals/fixtures/golden.json: machine-readable golden input/output fixtures
- Add evals/run-evals.sh: shell runner with health checks, auth token
  handling, task filtering, and CI mode
- Add evals/README.md: usage docs, prerequisites, cost estimates, CI integration
2026-02-19 12:19:16 -08:00

90 lines
3.1 KiB
Bash
Executable File

#!/usr/bin/env bash
# run-evals.sh — Run promptfoo evals against the extraction-service
#
# Usage:
# ./evals/run-evals.sh # run all evals
# ./evals/run-evals.sh --task triage # filter by task (grep on description)
# ./evals/run-evals.sh --ci # CI mode: exit 1 on any failure
# ./evals/run-evals.sh --output json # output results as JSON
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SERVICE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
# ── Defaults ────────────────────────────────────────────────────
EXTRACTION_SERVICE_URL="${EXTRACTION_SERVICE_URL:-http://localhost:4005}"
EVAL_PRODUCT_ID="${EVAL_PRODUCT_ID:-lysnrai}"
CI_MODE=false
OUTPUT_FORMAT="text"
TASK_FILTER=""
# ── Parse args ──────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
case "$1" in
--ci) CI_MODE=true; shift ;;
--output) OUTPUT_FORMAT="$2"; shift 2 ;;
--task) TASK_FILTER="$2"; shift 2 ;;
*) echo "Unknown arg: $1"; exit 1 ;;
esac
done
# ── Check service is reachable ───────────────────────────────────
echo "→ Checking extraction-service at $EXTRACTION_SERVICE_URL ..."
if ! curl -sf "$EXTRACTION_SERVICE_URL/health" > /dev/null 2>&1; then
echo "✗ extraction-service is not running at $EXTRACTION_SERVICE_URL"
echo " Start it with: pnpm dev (in services/extraction-service/)"
exit 1
fi
echo "✓ Service is up"
# ── Check EXTRACTION_EVAL_TOKEN ──────────────────────────────────
if [[ -z "${EXTRACTION_EVAL_TOKEN:-}" ]]; then
echo "⚠ EXTRACTION_EVAL_TOKEN is not set — evals will fail auth"
echo " Get a token from platform-service: POST /api/auth/login"
echo " Then: export EXTRACTION_EVAL_TOKEN=<token>"
if [[ "$CI_MODE" == "true" ]]; then
exit 1
fi
fi
# ── Build promptfoo args ─────────────────────────────────────────
PROMPTFOO_ARGS=(
eval
--config "$SCRIPT_DIR/promptfoo.yaml"
--output "$OUTPUT_FORMAT"
--no-cache
)
if [[ "$CI_MODE" == "true" ]]; then
PROMPTFOO_ARGS+=(--no-progress-bar)
fi
if [[ -n "$TASK_FILTER" ]]; then
PROMPTFOO_ARGS+=(--filter-description "$TASK_FILTER")
fi
# ── Run ─────────────────────────────────────────────────────────
echo "→ Running evals (task: ${TASK_FILTER:-all}) ..."
echo ""
export EXTRACTION_SERVICE_URL
export EXTRACTION_EVAL_TOKEN
export EVAL_PRODUCT_ID
cd "$SERVICE_DIR"
npx promptfoo "${PROMPTFOO_ARGS[@]}"
EXIT_CODE=$?
if [[ $EXIT_CODE -eq 0 ]]; then
echo ""
echo "✓ All evals passed"
else
echo ""
echo "✗ Some evals failed (exit $EXIT_CODE)"
if [[ "$CI_MODE" == "true" ]]; then
exit $EXIT_CODE
fi
fi