#!/usr/bin/env bash # # selftest.sh — quick, dependency-light verification for agent-queue. # # Runs: # 1. shellcheck (if installed) at --severity=error on the runner # 2. bash -n syntax check on the runner + this script # 3. node --check on the dashboard (if node installed) # 4. a live init/add/run --once cycle against a throwaway queue using a # no-op engine stub (no real agent CLI is ever invoked) # # It uses its own temp AGENT_QUEUE_ROOT so it never touches a real queue. # Exit 0 = all good. Run it before every commit. # set -euo pipefail HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" AQ="$HERE/agent-queue.sh" pass() { printf ' \033[32m✓\033[0m %s\n' "$*"; } info() { printf ' \033[36m•\033[0m %s\n' "$*"; } fail() { printf ' \033[31m✗ %s\033[0m\n' "$*" >&2; exit 1; } tmp="$(mktemp -d "${TMPDIR:-/tmp}/aq-selftest.XXXXXX")" cleanup() { rm -rf "$tmp"; } trap cleanup EXIT echo "agent-queue self-test" # 1. shellcheck (optional) if command -v shellcheck >/dev/null 2>&1; then shellcheck --severity=error --shell=bash "$AQ" "${BASH_SOURCE[0]}" && pass "shellcheck (errors): clean" else info "shellcheck not installed — skipping" fi # 2. syntax bash -n "$AQ" && pass "bash -n agent-queue.sh" bash -n "${BASH_SOURCE[0]}" && pass "bash -n selftest.sh" # 3. dashboard syntax (optional) if command -v node >/dev/null 2>&1; then node --check "$HERE/dashboard.mjs" && pass "node --check dashboard.mjs" else info "node not installed — skipping dashboard check" fi # 4. live no-op cycle export AGENT_QUEUE_ROOT="$tmp/queue" stub="$tmp/noop-engine" printf '#!/usr/bin/env bash\n# no-op engine stub: drain stdin, succeed\ncat >/dev/null 2>&1 || true\nexit 0\n' > "$stub" chmod +x "$stub" work="$tmp/work"; mkdir -p "$work" task="$tmp/task.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# self-test no-op task' > "$task" "$AQ" init >/dev/null DEVIN_BIN="$stub" "$AQ" add "$task" >/dev/null DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then pass "no-verify cycle → task parked in review/" else echo "--- queue state ---" >&2 ls -R "$AGENT_QUEUE_ROOT" >&2 || true fail "no-op cycle did not complete (expected a file in review/)" fi # 5. verify-pass gate: rc=0 + passing verify → testing/, then manual ship → shipped/ task2="$tmp/task-verify.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'verify: true' '---' '' '# self-test verify-pass task' > "$task2" DEVIN_BIN="$stub" "$AQ" add "$task2" >/dev/null DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/testing/*.md >/dev/null 2>&1; then pass "verify-pass cycle → task promoted to testing/" else echo "--- queue state ---" >&2 ls -R "$AGENT_QUEUE_ROOT" >&2 || true fail "verify-pass cycle did not reach testing/ (expected a file in testing/)" fi shipjob="$(basename "$(ls -1t "$AGENT_QUEUE_ROOT"/testing/*.md | head -1)" .md)" "$AQ" ship "$shipjob" >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/shipped/*.md >/dev/null 2>&1; then pass "manual ship → task landed in shipped/" else fail "ship did not move job to shipped/" fi # 6. verify-fail gate: rc=0 + failing verify → failed/ task3="$tmp/task-verifyfail.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'verify: false' '---' '' '# self-test verify-fail task' > "$task3" DEVIN_BIN="$stub" "$AQ" add "$task3" >/dev/null DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/failed/*verifyfail*.md >/dev/null 2>&1; then pass "verify-fail cycle → task routed to failed/" else echo "--- queue state ---" >&2 ls -R "$AGENT_QUEUE_ROOT" >&2 || true fail "verify-fail cycle did not route to failed/" fi # status must not error "$AQ" status >/dev/null 2>&1 && pass "status runs clean" # ───────────────────────────────────────────────────────────────────── # Phase 1 — Slice 1 cases (manifest/priority/capabilities/engine-class/idempotency). # Each uses its OWN AGENT_QUEUE_ROOT; the no-op engine stub means no real CLI runs. # ───────────────────────────────────────────────────────────────────── # 7. backward-compat: a legacy engine/cwd/yolo-only .md still completes → review/ export AGENT_QUEUE_ROOT="$tmp/queue-bc" bc="$tmp/bc-legacy.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# legacy task' > "$bc" "$AQ" init >/dev/null DEVIN_BIN="$stub" "$AQ" add "$bc" >/dev/null DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then pass "backward-compat: legacy engine/cwd/yolo-only .md → review/" else ls -R "$AGENT_QUEUE_ROOT" >&2 || true fail "legacy .md did not land in review/" fi # 8. priority: with --max 1, a 'critical' job queued AFTER a 'low' job runs first. # An order-recording devin-style stub appends each job's TASKID as it launches. export AGENT_QUEUE_ROOT="$tmp/queue-prio" ostub="$tmp/order-engine" cat > "$ostub" <<'STUB' #!/usr/bin/env bash # order-recording no-op engine stub (devin-style: --prompt-file ) pf="" while [ $# -gt 0 ]; do case "$1" in --prompt-file) pf="${2:-}"; shift 2;; *) shift;; esac done if [ -n "${pf:-}" ] && [ -n "${AQ_ORDER:-}" ]; then grep -m1 '^TASKID=' "$pf" >> "$AQ_ORDER" 2>/dev/null || true fi exit 0 STUB chmod +x "$ostub" export AQ_ORDER="$tmp/prio-order.log"; : > "$AQ_ORDER" plow="$tmp/p-low.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'priority: low' '---' '' 'TASKID=low' > "$plow" pcrit="$tmp/p-crit.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'priority: critical' '---' '' 'TASKID=critical' > "$pcrit" "$AQ" init >/dev/null DEVIN_BIN="$ostub" "$AQ" add "$plow" >/dev/null sleep 1 # ensure the critical job has a strictly newer (later) queue timestamp DEVIN_BIN="$ostub" "$AQ" add "$pcrit" >/dev/null DEVIN_BIN="$ostub" "$AQ" run --once --max 1 >/dev/null 2>&1 if [ "$(head -1 "$AQ_ORDER" 2>/dev/null || true)" = "TASKID=critical" ]; then pass "priority: critical (queued later) ran before low" else echo "--- execution order ---" >&2; cat "$AQ_ORDER" >&2 || true fail "priority ordering did not pick the critical job first" fi # 9. capability mismatch: a job requiring an absent tool → failed/ with # result=capability_mismatch, and the agent is NEVER launched. export AGENT_QUEUE_ROOT="$tmp/queue-cap" launchflag="$tmp/cap-launched.flag"; rm -f "$launchflag" launchstub="$tmp/launch-engine" printf '#!/usr/bin/env bash\ntouch %q\nexit 0\n' "$launchflag" > "$launchstub" chmod +x "$launchstub" capjob="$tmp/cap.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \ 'capabilities: [has:definitely-not-installed]' '---' '' '# capability task' > "$capjob" "$AQ" init >/dev/null DEVIN_BIN="$launchstub" "$AQ" add "$capjob" >/dev/null DEVIN_BIN="$launchstub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/failed/*.md >/dev/null 2>&1 \ && grep -q '^result=capability_mismatch' "$AGENT_QUEUE_ROOT"/.state/*.meta 2>/dev/null; then pass "capability mismatch → failed/ (result=capability_mismatch)" else ls -R "$AGENT_QUEUE_ROOT" >&2 || true fail "capability-mismatch job not routed to failed/ with result=capability_mismatch" fi if [ -e "$launchflag" ]; then fail "agent WAS launched on capability mismatch (it must not be)" else pass "capability mismatch: agent never launched" fi # 10. engine-class: a job with engine-class:agentic-coder and no engine, with # DEVIN_BIN stubbed (available), resolves to devin, runs, and lands in review/. export AGENT_QUEUE_ROOT="$tmp/queue-ec" ecjob="$tmp/ec.md" printf '%s\n' '---' 'engine-class: agentic-coder' "cwd: $work" 'yolo: true' '---' '' '# engine-class task' > "$ecjob" "$AQ" init >/dev/null DEVIN_BIN="$stub" "$AQ" add "$ecjob" >/dev/null DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then pass "engine-class: agentic-coder (no engine) resolved to devin → review/" else ls -R "$AGENT_QUEUE_ROOT" >&2 || true fail "engine-class job did not resolve+run to review/" fi # 11. idempotency-key dedupe on add. export AGENT_QUEUE_ROOT="$tmp/queue-idem" "$AQ" init >/dev/null ia="$tmp/idem-a.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'alpha body' > "$ia" DEVIN_BIN="$stub" "$AQ" add "$ia" >/dev/null DEVIN_BIN="$stub" "$AQ" add "$ia" >/dev/null # identical key+body → no-op cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ') [ "$cnt" = "1" ] && pass "idempotency: same key+body added twice → exactly 1 inbox file" \ || fail "idempotency: expected 1 inbox file after duplicate add, got $cnt" # same key, different body, prior STILL in inbox → supersede (still exactly 1) ib="$tmp/idem-b.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'beta body (changed)' > "$ib" DEVIN_BIN="$stub" "$AQ" add "$ib" >/dev/null cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ') [ "$cnt" = "1" ] && pass "idempotency: same key + changed body, prior in inbox → superseded (1 file)" \ || fail "idempotency: expected 1 inbox file after supersede, got $cnt" # drain (prior leaves inbox → review), then same key + different body → REJECT DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 ic="$tmp/idem-c.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'gamma body (changed again)' > "$ic" if DEVIN_BIN="$stub" "$AQ" add "$ic" >/dev/null 2>&1; then fail "idempotency: same key + different body (prior past inbox) should be rejected" else pass "idempotency: same key + different body, prior past inbox → rejected" fi cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ') [ "$cnt" = "0" ] && pass "idempotency: a rejected add enqueues nothing" \ || fail "idempotency: rejected add should not enqueue (inbox=$cnt)" # ───────────────────────────────────────────────────────────────────── # Phase 1 — Slice 3 cases (resilience & insights, single host). # Use temp git repos + stubs; never touches a real queue. # ───────────────────────────────────────────────────────────────────── metaval() { grep "^$2=" "$1" 2>/dev/null | tail -1 | cut -d= -f2-; } mkrepo() { local d=$1; mkdir -p "$d"; git -C "$d" init -q git -C "$d" config user.email t@t; git -C "$d" config user.name selftest echo seed > "$d/seed.txt"; git -C "$d" add -A; git -C "$d" commit -q -m seed } # 12. orphan recovery: a building/ job whose worker pid is dead → `recover` # moves it to inbox/ with attempts incremented; a second recover is a no-op. export AGENT_QUEUE_ROOT="$tmp/queue-orphan" "$AQ" init >/dev/null printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# orphan task' \ > "$AGENT_QUEUE_ROOT/building/orphanjob.md" # pid 1 is alive but pidstart is bogus → the PID-reuse guard marks it dead. printf '%s\n' 'job=orphanjob' 'engine=devin' "cwd=$work" 'started=1' 'attempts=1' 'pid=1' 'pidstart=NOPE' \ > "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" "$AQ" recover >/dev/null 2>&1 if [ -f "$AGENT_QUEUE_ROOT/inbox/orphanjob.md" ] && [ ! -f "$AGENT_QUEUE_ROOT/building/orphanjob.md" ]; then pass "orphan recovery: dead-worker building/ job recovered to inbox/" else ls -R "$AGENT_QUEUE_ROOT" >&2 || true; fail "orphan not recovered to inbox/" fi [ "$(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts)" = "2" ] \ && pass "orphan recovery: attempts incremented (1 -> 2)" \ || fail "orphan recovery: attempts not incremented (got $(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts))" "$AQ" recover >/dev/null 2>&1 # idempotent: nothing left in building/ inbn=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -name 'orphanjob.md' | wc -l | tr -d ' ') [ "$inbn" = "1" ] && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts)" = "2" ] \ && pass "orphan recovery: idempotent (twice recovers once)" \ || fail "orphan recovery not idempotent (inbox=$inbn attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts))" # 13. WIP checkpoint (git) + numstat: a git-repo cwd whose agent writes a 3-line # file → branch aq/wip/ has a commit with the change, main is untouched, # and lines_added is recorded. export AGENT_QUEUE_ROOT="$tmp/queue-wip" repo="$tmp/repo-wip"; mkrepo "$repo" mainbr=$(git -C "$repo" symbolic-ref --short HEAD) wipstub="$tmp/wip-engine" printf '#!/usr/bin/env bash\nprintf '"'"'a\\nb\\nc\\n'"'"' > created_by_agent.txt\nexit 0\n' > "$wipstub" chmod +x "$wipstub" "$AQ" init >/dev/null printf '%s\n' '---' 'engine: devin' "cwd: $repo" 'yolo: true' '---' '' '# wip task' \ > "$AGENT_QUEUE_ROOT/inbox/wipjob.md" DEVIN_BIN="$wipstub" "$AQ" run --once >/dev/null 2>&1 # capture the log first (avoid `git log | grep -q` — under pipefail the early # grep -q exit SIGPIPEs git log and falsely fails the pipeline). wiplog=$(git -C "$repo" log --oneline aq/wip/wipjob 2>/dev/null || true) if git -C "$repo" show-ref --verify --quiet refs/heads/aq/wip/wipjob \ && [[ "$wiplog" == *"aq wip: wipjob"* ]] \ && git -C "$repo" show aq/wip/wipjob:created_by_agent.txt >/dev/null 2>&1; then pass "wip checkpoint: aq/wip/wipjob has a commit with the agent's change" else git -C "$repo" branch -a >&2 || true; fail "wip checkpoint branch/commit missing" fi if git -C "$repo" cat-file -e "$mainbr":created_by_agent.txt 2>/dev/null; then fail "wip checkpoint: main branch was modified (must be untouched)" else pass "wip checkpoint: main branch ($mainbr) untouched" fi [ "$(metaval "$AGENT_QUEUE_ROOT/.state/wipjob.meta" lines_added)" = "3" ] \ && pass "insights numstat: lines_added recorded (=3)" \ || fail "insights numstat: lines_added wrong (got $(metaval "$AGENT_QUEUE_ROOT/.state/wipjob.meta" lines_added))" # 13b. non-git cwd → WIP skipped cleanly (no error), job still completes. export AGENT_QUEUE_ROOT="$tmp/queue-nogit" "$AQ" init >/dev/null printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# nogit task' \ > "$AGENT_QUEUE_ROOT/inbox/nogitjob.md" DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1 \ && grep -q 'not a git repo' "$AGENT_QUEUE_ROOT/logs/nogitjob.log" 2>/dev/null; then pass "wip checkpoint: non-git cwd skipped cleanly → review/" else fail "non-git cwd run did not complete cleanly" fi # 14. WIP resume: an orphan whose aq/wip/ already has a prior commit → # the relaunch checks out that branch (agent sees HEAD on aq/wip/). export AGENT_QUEUE_ROOT="$tmp/queue-resume" repo2="$tmp/repo-resume"; mkrepo "$repo2" mainbr2=$(git -C "$repo2" symbolic-ref --short HEAD) git -C "$repo2" checkout -q -b aq/wip/resumejob echo prior > "$repo2/prior.txt"; git -C "$repo2" add -A; git -C "$repo2" commit -q -m "aq wip: resumejob (prior)" git -C "$repo2" checkout -q "$mainbr2" resumeout="$tmp/resume-head.txt"; rm -f "$resumeout" resumestub="$tmp/resume-engine" printf '#!/usr/bin/env bash\ngit rev-parse --abbrev-ref HEAD > %q 2>/dev/null\nexit 0\n' "$resumeout" > "$resumestub" chmod +x "$resumestub" "$AQ" init >/dev/null printf '%s\n' '---' 'engine: devin' "cwd: $repo2" 'yolo: true' '---' '' '# resume task' \ > "$AGENT_QUEUE_ROOT/building/resumejob.md" printf '%s\n' 'job=resumejob' 'engine=devin' "cwd=$repo2" 'started=1' 'attempts=1' 'pid=1' 'pidstart=NOPE' \ > "$AGENT_QUEUE_ROOT/.state/resumejob.meta" DEVIN_BIN="$resumestub" "$AQ" run --once >/dev/null 2>&1 if [ "$(cat "$resumeout" 2>/dev/null)" = "aq/wip/resumejob" ]; then pass "wip resume: recovered job ran with HEAD on aq/wip/resumejob" else echo "resume HEAD was: $(cat "$resumeout" 2>/dev/null)" >&2 fail "wip resume did not check out the existing WIP branch" fi # 15. retry on verify_failed: max=1 → requeued once (attempts=2) then failed/ # result=retries_exhausted; a backoff (next_eligible) is recorded. export AGENT_QUEUE_ROOT="$tmp/queue-retry" export AGENT_QUEUE_POLL=1 "$AQ" init >/dev/null printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'verify: false' \ 'retry: { max: 1, backoff: 1s, on: [verify_failed] }' '---' '' '# retry task' \ > "$AGENT_QUEUE_ROOT/inbox/retryjob.md" DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/failed/retryjob.md >/dev/null 2>&1 \ && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" result)" = "retries_exhausted" ] \ && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" attempts)" = "2" ]; then pass "retry(verify_failed): requeued once (attempts=2) then retries_exhausted" else fail "retry(verify_failed) wrong (result=$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" result) attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" attempts))" fi grep -q 'RETRY scheduled' "$AGENT_QUEUE_ROOT/logs/retryjob.log" 2>/dev/null \ && pass "retry: backoff RETRY scheduled (next_eligible honored)" \ || fail "retry: no RETRY scheduled line in log" # 16. retry on crash: rc!=0 with on=[crash] retries; without crash it does not. crashstub="$tmp/crash-engine" printf '#!/usr/bin/env bash\nexit 3\n' > "$crashstub"; chmod +x "$crashstub" export AGENT_QUEUE_ROOT="$tmp/queue-crash" "$AQ" init >/dev/null printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \ 'retry: { max: 1, backoff: 1s, on: [crash] }' '---' '' '# crash-retry task' \ > "$AGENT_QUEUE_ROOT/inbox/crashjob.md" DEVIN_BIN="$crashstub" "$AQ" run --once >/dev/null 2>&1 [ "$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" result)" = "retries_exhausted" ] \ && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" attempts)" = "2" ] \ && pass "retry(crash): rc!=0 with on=[crash] retried then retries_exhausted (attempts=2)" \ || fail "retry(crash) wrong (result=$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" result) attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" attempts))" export AGENT_QUEUE_ROOT="$tmp/queue-nocrash" "$AQ" init >/dev/null printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \ 'retry: { max: 1, backoff: 1s, on: [verify_failed] }' '---' '' '# crash-no-retry task' \ > "$AGENT_QUEUE_ROOT/inbox/nocrashjob.md" DEVIN_BIN="$crashstub" "$AQ" run --once >/dev/null 2>&1 [ "$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" result)" = "failed" ] \ && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" attempts)" = "1" ] \ && pass "retry(crash): crash not in on -> straight to failed/ (no retry)" \ || fail "retry(crash) should not retry when crash not in on (result=$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" result) attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" attempts))" unset AGENT_QUEUE_POLL # 17. insights parse: a stub log with a usage line → parse_usage records tokens/ # cost into meta; `insights ` prints them; a no-usage log doesn't crash. export AGENT_QUEUE_ROOT="$tmp/queue-usage" usagestub="$tmp/usage-engine" printf '#!/usr/bin/env bash\necho "AQ_USAGE model=claude-test tokens_in=100 tokens_out=50 cost_usd=0.0021 turns=3 tool_calls=5"\nexit 0\n' > "$usagestub" chmod +x "$usagestub" "$AQ" init >/dev/null printf '%s\n' '---' 'engine: claude' "cwd: $work" 'yolo: true' '---' '' '# usage task' \ > "$AGENT_QUEUE_ROOT/inbox/usagejob.md" CLAUDE_BIN="$usagestub" "$AQ" run --once >/dev/null 2>&1 if [ "$(metaval "$AGENT_QUEUE_ROOT/.state/usagejob.meta" tokens_in)" = "100" ] \ && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/usagejob.meta" cost_usd)" = "0.0021" ]; then pass "insights parse_usage: tokens/cost extracted into meta" else fail "parse_usage did not record tokens/cost (tokens_in=$(metaval "$AGENT_QUEUE_ROOT/.state/usagejob.meta" tokens_in))" fi ins=$("$AQ" insights usagejob 2>/dev/null || true) if [[ "$ins" == *tokens_in* && "$ins" == *0.0021* ]]; then pass "insights : prints per-job metrics" else fail "insights did not print metrics" fi printf '%s\n' '---' 'engine: claude' "cwd: $work" 'yolo: true' '---' '' '# no-usage task' \ > "$AGENT_QUEUE_ROOT/inbox/nousagejob.md" CLAUDE_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if "$AQ" insights nousagejob >/dev/null 2>&1 \ && [ -z "$(metaval "$AGENT_QUEUE_ROOT/.state/nousagejob.meta" tokens_in)" ]; then pass "insights: no-usage log omits token fields without crashing" else fail "insights crashed or fabricated tokens for a no-usage log" fi # 18. insights aggregate: two finished jobs → per-engine rollup with totals + rate. out=$("$AQ" insights 2>/dev/null || true) if [[ "$out" == *"ROLLUP BY ENGINE"* ]] && grep -qE 'claude .* 100 .* 50' <<<"$out"; then pass "insights aggregate: per-engine rollup with token totals" else printf '%s\n' "$out" >&2; fail "insights aggregate rollup missing/incorrect" fi # ───────────────────────────────────────────────────────────────────── # Phase 1 — Slice 2 cases (profiles + deps/DAG, single host). # Uses a temp profile catalog (AGENT_QUEUE_PROFILES) + temp git repos. # ───────────────────────────────────────────────────────────────────── profdir="$tmp/profiles"; mkdir -p "$profdir" printf '%s\n' '---' 'name: vfail' 'persona: |' ' PERSONA-VFAIL' 'default-verify: false' '---' > "$profdir/vfail.md" printf '%s\n' '---' 'name: vpass' 'default-verify: true' '---' > "$profdir/vpass.md" printf '%s\n' '---' 'name: capreq' 'capabilities: [has:definitely-not-installed]' '---' > "$profdir/capreq.md" printf '%s\n' '---' 'name: personap' 'persona: |' ' PERSONA-MARKER-XYZ' ' second persona line' 'default-verify: true' '---' > "$profdir/personap.md" printf '%s\n' '---' 'name: scoped' 'allowed-scope: [backend/**]' '---' > "$profdir/scoped.md" export AGENT_QUEUE_PROFILES="$profdir" funcs="$tmp/aq-funcs.sh"; sed '/^main "\$@"/d' "$AQ" > "$funcs" # 19. profile inherits default-verify: vfail (verify=false) → failed/verify_failed; # vpass (verify=true) → testing/. export AGENT_QUEUE_ROOT="$tmp/queue-pverify" "$AQ" init >/dev/null printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'profile: vfail' '---' '' '# pv-fail' \ > "$AGENT_QUEUE_ROOT/inbox/pvfail.md" DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/failed/pvfail.md >/dev/null 2>&1 \ && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/pvfail.meta" result)" = "verify_failed" ]; then pass "profile inherit: default-verify=false → failed/ (verify_failed)" else fail "profile verify=false did not route to failed (result=$(metaval "$AGENT_QUEUE_ROOT/.state/pvfail.meta" result))" fi printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'profile: vpass' '---' '' '# pv-pass' \ > "$AGENT_QUEUE_ROOT/inbox/pvpass.md" DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/testing/pvpass.md >/dev/null 2>&1; then pass "profile inherit: default-verify=true → testing/" else fail "profile verify=true did not reach testing/" fi # 19b. job-level verify overrides the profile (precedence job > profile). printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'profile: vfail' 'verify: true' '---' '' '# override' \ > "$AGENT_QUEUE_ROOT/inbox/pvoverride.md" DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 ls "$AGENT_QUEUE_ROOT"/testing/pvoverride.md >/dev/null 2>&1 \ && pass "profile precedence: job verify overrides profile default-verify" \ || fail "job-level verify did not override profile" # 20. persona injection (golden): the body fed to the engine begins with the # profile persona. A stub copies its --prompt-file to a sentinel. export AGENT_QUEUE_ROOT="$tmp/queue-persona" sentinel="$tmp/persona-body.txt"; rm -f "$sentinel" copystub="$tmp/copy-engine" cat > "$copystub" </dev/null printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'profile: personap' '---' '' 'TASK-BODY-LINE' \ > "$AGENT_QUEUE_ROOT/inbox/personajob.md" DEVIN_BIN="$copystub" "$AQ" run --once >/dev/null 2>&1 if [ "$(head -1 "$sentinel" 2>/dev/null)" = "PERSONA-MARKER-XYZ" ] \ && grep -q 'TASK-BODY-LINE' "$sentinel" 2>/dev/null; then pass "persona injection: engine body begins with profile persona, task preserved" else echo "body head: $(head -3 "$sentinel" 2>/dev/null)" >&2 fail "persona was not prepended to the engine body" fi # 21. profile capability inheritance: a job omitting capabilities inherits the # profile's → unmet → failed/ capability_mismatch, agent never launched. export AGENT_QUEUE_ROOT="$tmp/queue-pcaps" launchflag="$tmp/pcaps-launched"; rm -f "$launchflag" launchstub3="$tmp/cap-launch3" printf '#!/usr/bin/env bash\ntouch %q\nexit 0\n' "$launchflag" > "$launchstub3"; chmod +x "$launchstub3" "$AQ" init >/dev/null printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'profile: capreq' '---' '' '# pcaps' \ > "$AGENT_QUEUE_ROOT/inbox/pcapsjob.md" DEVIN_BIN="$launchstub3" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/failed/pcapsjob.md >/dev/null 2>&1 \ && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/pcapsjob.meta" result)" = "capability_mismatch" ] \ && [ ! -e "$launchflag" ]; then pass "profile caps inheritance: unmet inherited capability → capability_mismatch (no launch)" else fail "profile caps inheritance failed (result=$(metaval "$AGENT_QUEUE_ROOT/.state/pcapsjob.meta" result) launched=$([ -e "$launchflag" ] && echo yes || echo no))" fi # 22. allowed-scope warn-only: an out-of-scope change logs a WARNING and the job # still succeeds; plus a direct path_in_scope unit check. export AGENT_QUEUE_ROOT="$tmp/queue-scope" repos="$tmp/repo-scope"; mkrepo "$repos" scopestub="$tmp/scope-engine" printf '#!/usr/bin/env bash\nmkdir -p frontend && echo changed > frontend/out.txt\nexit 0\n' > "$scopestub" chmod +x "$scopestub" "$AQ" init >/dev/null printf '%s\n' '---' 'engine: devin' "cwd: $repos" 'yolo: true' 'profile: scoped' '---' '' '# scope task' \ > "$AGENT_QUEUE_ROOT/inbox/scopejob.md" DEVIN_BIN="$scopestub" "$AQ" run --once >/dev/null 2>&1 if grep -q 'allowed-scope violation' "$AGENT_QUEUE_ROOT/logs/scopejob.log" 2>/dev/null \ && ls "$AGENT_QUEUE_ROOT"/review/scopejob.md >/dev/null 2>&1; then pass "allowed-scope: out-of-scope change WARNS (warn-only) and job still succeeds" else fail "allowed-scope warn-only did not warn / job did not succeed" fi if bash -c 'set -uo pipefail; source "'"$funcs"'"; path_in_scope "backend/a/b.ts" "backend/**" && ! path_in_scope "frontend/x.ts" "backend/**"'; then pass "allowed-scope: path_in_scope matches subtree, rejects outside (unit)" else fail "path_in_scope unit logic wrong" fi # 23. deps block→run: B deps:[keyA] stays blocked until A is shipped/, then runs. export AGENT_QUEUE_ROOT="$tmp/queue-deps" "$AQ" init >/dev/null printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: keyA' '---' '' '# A' \ > "$AGENT_QUEUE_ROOT/inbox/jobA.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: keyB' 'deps: [keyA]' '---' '' '# B' \ > "$AGENT_QUEUE_ROOT/inbox/jobB.md" DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/inbox/jobB.md >/dev/null 2>&1 && ls "$AGENT_QUEUE_ROOT"/review/jobA.md >/dev/null 2>&1; then pass "deps: B stays blocked in inbox while A is unshipped" else fail "deps: B should be blocked while A unshipped (A=$(ls "$AGENT_QUEUE_ROOT"/review 2>/dev/null) B-in-inbox=$(ls "$AGENT_QUEUE_ROOT"/inbox 2>/dev/null))" fi # status surfaces the blocked job "$AQ" status 2>/dev/null | grep -q 'blocked (waiting on: keyA)' \ && pass "deps: status surfaces 'blocked (waiting on: keyA)'" \ || fail "deps: status did not surface blocked job" # ship A (review -> testing -> shipped), then B becomes runnable "$AQ" promote jobA >/dev/null 2>&1 # review -> testing "$AQ" promote jobA >/dev/null 2>&1 # testing -> shipped DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/review/jobB.md >/dev/null 2>&1; then pass "deps: once A is shipped, B unblocks and completes" else fail "deps: B did not run after A shipped" fi # 24. deps-mode soft: dep satisfied when the dependency is in testing/. export AGENT_QUEUE_ROOT="$tmp/queue-depsoft" "$AQ" init >/dev/null printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: keyA' 'verify: true' '---' '' '# A-soft' \ > "$AGENT_QUEUE_ROOT/inbox/sjobA.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: keyC' 'deps: [keyA]' 'deps-mode: soft' '---' '' '# C-soft' \ > "$AGENT_QUEUE_ROOT/inbox/sjobC.md" DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/testing/sjobA.md >/dev/null 2>&1 && ls "$AGENT_QUEUE_ROOT"/review/sjobC.md >/dev/null 2>&1; then pass "deps-mode soft: dep satisfied while dependency is in testing/" else fail "deps-mode soft did not unblock from testing/ (A=$(ls "$AGENT_QUEUE_ROOT"/testing 2>/dev/null) C=$(ls "$AGENT_QUEUE_ROOT"/review 2>/dev/null))" fi # 25. cycle detection: adding A deps:[keyB] while B deps:[keyA] exists is rejected. export AGENT_QUEUE_ROOT="$tmp/queue-cycle" "$AQ" init >/dev/null cycB="$tmp/cyc-b.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: keyB' 'deps: [keyA]' '---' '' '# cyc B' > "$cycB" "$AQ" add "$cycB" >/dev/null 2>&1 # B added (blocked on keyA — allowed) cycA="$tmp/cyc-a.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: keyA' 'deps: [keyB]' '---' '' '# cyc A' > "$cycA" if DEVIN_BIN="$stub" "$AQ" add "$cycA" >/dev/null 2>&1; then fail "cycle detection: adding A deps:[keyB] while B deps:[keyA] should be rejected" else pass "cycle detection: dependency cycle on add is rejected" fi unset AGENT_QUEUE_PROFILES echo "self-test PASS"