#!/usr/bin/env bash # run-evals.sh — Run promptfoo evals against the extraction-service # # Usage: # ./evals/run-evals.sh # run all evals # ./evals/run-evals.sh --task triage # filter by task (grep on description) # ./evals/run-evals.sh --ci # CI mode: exit 1 on any failure # ./evals/run-evals.sh --output json # output results as JSON set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SERVICE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" # ── Defaults ──────────────────────────────────────────────────── EXTRACTION_SERVICE_URL="${EXTRACTION_SERVICE_URL:-http://localhost:4005}" EVAL_PRODUCT_ID="${EVAL_PRODUCT_ID:-lysnrai}" CI_MODE=false OUTPUT_FORMAT="text" TASK_FILTER="" # ── Parse args ────────────────────────────────────────────────── while [[ $# -gt 0 ]]; do case "$1" in --ci) CI_MODE=true; shift ;; --output) OUTPUT_FORMAT="$2"; shift 2 ;; --task) TASK_FILTER="$2"; shift 2 ;; *) echo "Unknown arg: $1"; exit 1 ;; esac done # ── Check service is reachable ─────────────────────────────────── echo "→ Checking extraction-service at $EXTRACTION_SERVICE_URL ..." if ! curl -sf "$EXTRACTION_SERVICE_URL/health" > /dev/null 2>&1; then echo "✗ extraction-service is not running at $EXTRACTION_SERVICE_URL" echo " Start it with: pnpm dev (in services/extraction-service/)" exit 1 fi echo "✓ Service is up" # ── Check EXTRACTION_EVAL_TOKEN ────────────────────────────────── if [[ -z "${EXTRACTION_EVAL_TOKEN:-}" ]]; then echo "⚠ EXTRACTION_EVAL_TOKEN is not set — evals will fail auth" echo " Get a token from platform-service: POST /api/auth/login" echo " Then: export EXTRACTION_EVAL_TOKEN=" if [[ "$CI_MODE" == "true" ]]; then exit 1 fi fi # ── Build promptfoo args ───────────────────────────────────────── PROMPTFOO_ARGS=( eval --config "$SCRIPT_DIR/promptfoo.yaml" --output "$OUTPUT_FORMAT" --no-cache ) if [[ "$CI_MODE" == "true" ]]; then PROMPTFOO_ARGS+=(--no-progress-bar) fi if [[ -n "$TASK_FILTER" ]]; then PROMPTFOO_ARGS+=(--filter-description "$TASK_FILTER") fi # ── Run ───────────────────────────────────────────────────────── echo "→ Running evals (task: ${TASK_FILTER:-all}) ..." echo "" export EXTRACTION_SERVICE_URL export EXTRACTION_EVAL_TOKEN export EVAL_PRODUCT_ID cd "$SERVICE_DIR" npx promptfoo "${PROMPTFOO_ARGS[@]}" EXIT_CODE=$? if [[ $EXIT_CODE -eq 0 ]]; then echo "" echo "✓ All evals passed" else echo "" echo "✗ Some evals failed (exit $EXIT_CODE)" if [[ "$CI_MODE" == "true" ]]; then exit $EXIT_CODE fi fi