From f46dd38adbca77027735c8ee837ef6f219d076dd Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Fri, 29 May 2026 18:43:30 -0700 Subject: [PATCH] test(agent-queue): resilience + insights selftest cases (P1-S3) Adds (never weakens) temp-git-repo + stub cases: orphan recovery (+idempotent), WIP checkpoint/numstat, non-git skip, WIP resume, retry on verify_failed and crash (incl. no-retry when class absent), parse_usage extraction, per-engine aggregate. Inbox-empty-safe counts; avoids the pipefail+grep -q SIGPIPE trap. --- agent-queue/selftest.sh | 190 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) diff --git a/agent-queue/selftest.sh b/agent-queue/selftest.sh index c15b3e8..1f150e1 100755 --- a/agent-queue/selftest.sh +++ b/agent-queue/selftest.sh @@ -230,4 +230,194 @@ cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/nul [ "$cnt" = "0" ] && pass "idempotency: a rejected add enqueues nothing" \ || fail "idempotency: rejected add should not enqueue (inbox=$cnt)" +# ───────────────────────────────────────────────────────────────────── +# Phase 1 — Slice 3 cases (resilience & insights, single host). +# Use temp git repos + stubs; never touches a real queue. +# ───────────────────────────────────────────────────────────────────── +metaval() { grep "^$2=" "$1" 2>/dev/null | tail -1 | cut -d= -f2-; } +mkrepo() { + local d=$1; mkdir -p "$d"; git -C "$d" init -q + git -C "$d" config user.email t@t; git -C "$d" config user.name selftest + echo seed > "$d/seed.txt"; git -C "$d" add -A; git -C "$d" commit -q -m seed +} + +# 12. orphan recovery: a building/ job whose worker pid is dead → `recover` +# moves it to inbox/ with attempts incremented; a second recover is a no-op. +export AGENT_QUEUE_ROOT="$tmp/queue-orphan" +"$AQ" init >/dev/null +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# orphan task' \ + > "$AGENT_QUEUE_ROOT/building/orphanjob.md" +# pid 1 is alive but pidstart is bogus → the PID-reuse guard marks it dead. +printf '%s\n' 'job=orphanjob' 'engine=devin' "cwd=$work" 'started=1' 'attempts=1' 'pid=1' 'pidstart=NOPE' \ + > "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" +"$AQ" recover >/dev/null 2>&1 +if [ -f "$AGENT_QUEUE_ROOT/inbox/orphanjob.md" ] && [ ! -f "$AGENT_QUEUE_ROOT/building/orphanjob.md" ]; then + pass "orphan recovery: dead-worker building/ job recovered to inbox/" +else + ls -R "$AGENT_QUEUE_ROOT" >&2 || true; fail "orphan not recovered to inbox/" +fi +[ "$(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts)" = "2" ] \ + && pass "orphan recovery: attempts incremented (1 -> 2)" \ + || fail "orphan recovery: attempts not incremented (got $(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts))" +"$AQ" recover >/dev/null 2>&1 # idempotent: nothing left in building/ +inbn=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -name 'orphanjob.md' | wc -l | tr -d ' ') +[ "$inbn" = "1" ] && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts)" = "2" ] \ + && pass "orphan recovery: idempotent (twice recovers once)" \ + || fail "orphan recovery not idempotent (inbox=$inbn attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts))" + +# 13. WIP checkpoint (git) + numstat: a git-repo cwd whose agent writes a 3-line +# file → branch aq/wip/ has a commit with the change, main is untouched, +# and lines_added is recorded. +export AGENT_QUEUE_ROOT="$tmp/queue-wip" +repo="$tmp/repo-wip"; mkrepo "$repo" +mainbr=$(git -C "$repo" symbolic-ref --short HEAD) +wipstub="$tmp/wip-engine" +printf '#!/usr/bin/env bash\nprintf '"'"'a\\nb\\nc\\n'"'"' > created_by_agent.txt\nexit 0\n' > "$wipstub" +chmod +x "$wipstub" +"$AQ" init >/dev/null +printf '%s\n' '---' 'engine: devin' "cwd: $repo" 'yolo: true' '---' '' '# wip task' \ + > "$AGENT_QUEUE_ROOT/inbox/wipjob.md" +DEVIN_BIN="$wipstub" "$AQ" run --once >/dev/null 2>&1 +# capture the log first (avoid `git log | grep -q` — under pipefail the early +# grep -q exit SIGPIPEs git log and falsely fails the pipeline). +wiplog=$(git -C "$repo" log --oneline aq/wip/wipjob 2>/dev/null || true) +if git -C "$repo" show-ref --verify --quiet refs/heads/aq/wip/wipjob \ + && [[ "$wiplog" == *"aq wip: wipjob"* ]] \ + && git -C "$repo" show aq/wip/wipjob:created_by_agent.txt >/dev/null 2>&1; then + pass "wip checkpoint: aq/wip/wipjob has a commit with the agent's change" +else + git -C "$repo" branch -a >&2 || true; fail "wip checkpoint branch/commit missing" +fi +if git -C "$repo" cat-file -e "$mainbr":created_by_agent.txt 2>/dev/null; then + fail "wip checkpoint: main branch was modified (must be untouched)" +else + pass "wip checkpoint: main branch ($mainbr) untouched" +fi +[ "$(metaval "$AGENT_QUEUE_ROOT/.state/wipjob.meta" lines_added)" = "3" ] \ + && pass "insights numstat: lines_added recorded (=3)" \ + || fail "insights numstat: lines_added wrong (got $(metaval "$AGENT_QUEUE_ROOT/.state/wipjob.meta" lines_added))" + +# 13b. non-git cwd → WIP skipped cleanly (no error), job still completes. +export AGENT_QUEUE_ROOT="$tmp/queue-nogit" +"$AQ" init >/dev/null +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# nogit task' \ + > "$AGENT_QUEUE_ROOT/inbox/nogitjob.md" +DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 +if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1 \ + && grep -q 'not a git repo' "$AGENT_QUEUE_ROOT/logs/nogitjob.log" 2>/dev/null; then + pass "wip checkpoint: non-git cwd skipped cleanly → review/" +else + fail "non-git cwd run did not complete cleanly" +fi + +# 14. WIP resume: an orphan whose aq/wip/ already has a prior commit → +# the relaunch checks out that branch (agent sees HEAD on aq/wip/). +export AGENT_QUEUE_ROOT="$tmp/queue-resume" +repo2="$tmp/repo-resume"; mkrepo "$repo2" +mainbr2=$(git -C "$repo2" symbolic-ref --short HEAD) +git -C "$repo2" checkout -q -b aq/wip/resumejob +echo prior > "$repo2/prior.txt"; git -C "$repo2" add -A; git -C "$repo2" commit -q -m "aq wip: resumejob (prior)" +git -C "$repo2" checkout -q "$mainbr2" +resumeout="$tmp/resume-head.txt"; rm -f "$resumeout" +resumestub="$tmp/resume-engine" +printf '#!/usr/bin/env bash\ngit rev-parse --abbrev-ref HEAD > %q 2>/dev/null\nexit 0\n' "$resumeout" > "$resumestub" +chmod +x "$resumestub" +"$AQ" init >/dev/null +printf '%s\n' '---' 'engine: devin' "cwd: $repo2" 'yolo: true' '---' '' '# resume task' \ + > "$AGENT_QUEUE_ROOT/building/resumejob.md" +printf '%s\n' 'job=resumejob' 'engine=devin' "cwd=$repo2" 'started=1' 'attempts=1' 'pid=1' 'pidstart=NOPE' \ + > "$AGENT_QUEUE_ROOT/.state/resumejob.meta" +DEVIN_BIN="$resumestub" "$AQ" run --once >/dev/null 2>&1 +if [ "$(cat "$resumeout" 2>/dev/null)" = "aq/wip/resumejob" ]; then + pass "wip resume: recovered job ran with HEAD on aq/wip/resumejob" +else + echo "resume HEAD was: $(cat "$resumeout" 2>/dev/null)" >&2 + fail "wip resume did not check out the existing WIP branch" +fi + +# 15. retry on verify_failed: max=1 → requeued once (attempts=2) then failed/ +# result=retries_exhausted; a backoff (next_eligible) is recorded. +export AGENT_QUEUE_ROOT="$tmp/queue-retry" +export AGENT_QUEUE_POLL=1 +"$AQ" init >/dev/null +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'verify: false' \ + 'retry: { max: 1, backoff: 1s, on: [verify_failed] }' '---' '' '# retry task' \ + > "$AGENT_QUEUE_ROOT/inbox/retryjob.md" +DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 +if ls "$AGENT_QUEUE_ROOT"/failed/retryjob.md >/dev/null 2>&1 \ + && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" result)" = "retries_exhausted" ] \ + && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" attempts)" = "2" ]; then + pass "retry(verify_failed): requeued once (attempts=2) then retries_exhausted" +else + fail "retry(verify_failed) wrong (result=$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" result) attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" attempts))" +fi +grep -q 'RETRY scheduled' "$AGENT_QUEUE_ROOT/logs/retryjob.log" 2>/dev/null \ + && pass "retry: backoff RETRY scheduled (next_eligible honored)" \ + || fail "retry: no RETRY scheduled line in log" + +# 16. retry on crash: rc!=0 with on=[crash] retries; without crash it does not. +crashstub="$tmp/crash-engine" +printf '#!/usr/bin/env bash\nexit 3\n' > "$crashstub"; chmod +x "$crashstub" +export AGENT_QUEUE_ROOT="$tmp/queue-crash" +"$AQ" init >/dev/null +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \ + 'retry: { max: 1, backoff: 1s, on: [crash] }' '---' '' '# crash-retry task' \ + > "$AGENT_QUEUE_ROOT/inbox/crashjob.md" +DEVIN_BIN="$crashstub" "$AQ" run --once >/dev/null 2>&1 +[ "$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" result)" = "retries_exhausted" ] \ + && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" attempts)" = "2" ] \ + && pass "retry(crash): rc!=0 with on=[crash] retried then retries_exhausted (attempts=2)" \ + || fail "retry(crash) wrong (result=$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" result) attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" attempts))" +export AGENT_QUEUE_ROOT="$tmp/queue-nocrash" +"$AQ" init >/dev/null +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \ + 'retry: { max: 1, backoff: 1s, on: [verify_failed] }' '---' '' '# crash-no-retry task' \ + > "$AGENT_QUEUE_ROOT/inbox/nocrashjob.md" +DEVIN_BIN="$crashstub" "$AQ" run --once >/dev/null 2>&1 +[ "$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" result)" = "failed" ] \ + && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" attempts)" = "1" ] \ + && pass "retry(crash): crash not in on -> straight to failed/ (no retry)" \ + || fail "retry(crash) should not retry when crash not in on (result=$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" result) attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" attempts))" +unset AGENT_QUEUE_POLL + +# 17. insights parse: a stub log with a usage line → parse_usage records tokens/ +# cost into meta; `insights ` prints them; a no-usage log doesn't crash. +export AGENT_QUEUE_ROOT="$tmp/queue-usage" +usagestub="$tmp/usage-engine" +printf '#!/usr/bin/env bash\necho "AQ_USAGE model=claude-test tokens_in=100 tokens_out=50 cost_usd=0.0021 turns=3 tool_calls=5"\nexit 0\n' > "$usagestub" +chmod +x "$usagestub" +"$AQ" init >/dev/null +printf '%s\n' '---' 'engine: claude' "cwd: $work" 'yolo: true' '---' '' '# usage task' \ + > "$AGENT_QUEUE_ROOT/inbox/usagejob.md" +CLAUDE_BIN="$usagestub" "$AQ" run --once >/dev/null 2>&1 +if [ "$(metaval "$AGENT_QUEUE_ROOT/.state/usagejob.meta" tokens_in)" = "100" ] \ + && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/usagejob.meta" cost_usd)" = "0.0021" ]; then + pass "insights parse_usage: tokens/cost extracted into meta" +else + fail "parse_usage did not record tokens/cost (tokens_in=$(metaval "$AGENT_QUEUE_ROOT/.state/usagejob.meta" tokens_in))" +fi +ins=$("$AQ" insights usagejob 2>/dev/null || true) +if [[ "$ins" == *tokens_in* && "$ins" == *0.0021* ]]; then + pass "insights : prints per-job metrics" +else + fail "insights did not print metrics" +fi +printf '%s\n' '---' 'engine: claude' "cwd: $work" 'yolo: true' '---' '' '# no-usage task' \ + > "$AGENT_QUEUE_ROOT/inbox/nousagejob.md" +CLAUDE_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 +if "$AQ" insights nousagejob >/dev/null 2>&1 \ + && [ -z "$(metaval "$AGENT_QUEUE_ROOT/.state/nousagejob.meta" tokens_in)" ]; then + pass "insights: no-usage log omits token fields without crashing" +else + fail "insights crashed or fabricated tokens for a no-usage log" +fi + +# 18. insights aggregate: two finished jobs → per-engine rollup with totals + rate. +out=$("$AQ" insights 2>/dev/null || true) +if [[ "$out" == *"ROLLUP BY ENGINE"* ]] && grep -qE 'claude .* 100 .* 50' <<<"$out"; then + pass "insights aggregate: per-engine rollup with token totals" +else + printf '%s\n' "$out" >&2; fail "insights aggregate rollup missing/incorrect" +fi + echo "self-test PASS"