- Add evals/promptfoo.yaml: HTTP provider hitting extraction-service API covering all 5 built-in tasks (transcript, triage, memory-insight, reflection-enrichment, bug-report-extraction) - Add evals/fixtures/golden.json: machine-readable golden input/output fixtures - Add evals/run-evals.sh: shell runner with health checks, auth token handling, task filtering, and CI mode - Add evals/README.md: usage docs, prerequisites, cost estimates, CI integration
90 lines
3.1 KiB
Bash
Executable File
90 lines
3.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# run-evals.sh — Run promptfoo evals against the extraction-service
|
|
#
|
|
# Usage:
|
|
# ./evals/run-evals.sh # run all evals
|
|
# ./evals/run-evals.sh --task triage # filter by task (grep on description)
|
|
# ./evals/run-evals.sh --ci # CI mode: exit 1 on any failure
|
|
# ./evals/run-evals.sh --output json # output results as JSON
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
SERVICE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
|
|
# ── Defaults ────────────────────────────────────────────────────
|
|
EXTRACTION_SERVICE_URL="${EXTRACTION_SERVICE_URL:-http://localhost:4005}"
|
|
EVAL_PRODUCT_ID="${EVAL_PRODUCT_ID:-lysnrai}"
|
|
CI_MODE=false
|
|
OUTPUT_FORMAT="text"
|
|
TASK_FILTER=""
|
|
|
|
# ── Parse args ──────────────────────────────────────────────────
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--ci) CI_MODE=true; shift ;;
|
|
--output) OUTPUT_FORMAT="$2"; shift 2 ;;
|
|
--task) TASK_FILTER="$2"; shift 2 ;;
|
|
*) echo "Unknown arg: $1"; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
# ── Check service is reachable ───────────────────────────────────
|
|
echo "→ Checking extraction-service at $EXTRACTION_SERVICE_URL ..."
|
|
if ! curl -sf "$EXTRACTION_SERVICE_URL/health" > /dev/null 2>&1; then
|
|
echo "✗ extraction-service is not running at $EXTRACTION_SERVICE_URL"
|
|
echo " Start it with: pnpm dev (in services/extraction-service/)"
|
|
exit 1
|
|
fi
|
|
echo "✓ Service is up"
|
|
|
|
# ── Check EXTRACTION_EVAL_TOKEN ──────────────────────────────────
|
|
if [[ -z "${EXTRACTION_EVAL_TOKEN:-}" ]]; then
|
|
echo "⚠ EXTRACTION_EVAL_TOKEN is not set — evals will fail auth"
|
|
echo " Get a token from platform-service: POST /api/auth/login"
|
|
echo " Then: export EXTRACTION_EVAL_TOKEN=<token>"
|
|
if [[ "$CI_MODE" == "true" ]]; then
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# ── Build promptfoo args ─────────────────────────────────────────
|
|
PROMPTFOO_ARGS=(
|
|
eval
|
|
--config "$SCRIPT_DIR/promptfoo.yaml"
|
|
--output "$OUTPUT_FORMAT"
|
|
--no-cache
|
|
)
|
|
|
|
if [[ "$CI_MODE" == "true" ]]; then
|
|
PROMPTFOO_ARGS+=(--no-progress-bar)
|
|
fi
|
|
|
|
if [[ -n "$TASK_FILTER" ]]; then
|
|
PROMPTFOO_ARGS+=(--filter-description "$TASK_FILTER")
|
|
fi
|
|
|
|
# ── Run ─────────────────────────────────────────────────────────
|
|
echo "→ Running evals (task: ${TASK_FILTER:-all}) ..."
|
|
echo ""
|
|
|
|
export EXTRACTION_SERVICE_URL
|
|
export EXTRACTION_EVAL_TOKEN
|
|
export EVAL_PRODUCT_ID
|
|
|
|
cd "$SERVICE_DIR"
|
|
npx promptfoo "${PROMPTFOO_ARGS[@]}"
|
|
|
|
EXIT_CODE=$?
|
|
|
|
if [[ $EXIT_CODE -eq 0 ]]; then
|
|
echo ""
|
|
echo "✓ All evals passed"
|
|
else
|
|
echo ""
|
|
echo "✗ Some evals failed (exit $EXIT_CODE)"
|
|
if [[ "$CI_MODE" == "true" ]]; then
|
|
exit $EXIT_CODE
|
|
fi
|
|
fi
|