Adds (never weakens) temp-git-repo + stub cases: orphan recovery (+idempotent), WIP checkpoint/numstat, non-git skip, WIP resume, retry on verify_failed and crash (incl. no-retry when class absent), parse_usage extraction, per-engine aggregate. Inbox-empty-safe counts; avoids the pipefail+grep -q SIGPIPE trap.
424 lines
21 KiB
Bash
Executable File
424 lines
21 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# selftest.sh — quick, dependency-light verification for agent-queue.
|
|
#
|
|
# Runs:
|
|
# 1. shellcheck (if installed) at --severity=error on the runner
|
|
# 2. bash -n syntax check on the runner + this script
|
|
# 3. node --check on the dashboard (if node installed)
|
|
# 4. a live init/add/run --once cycle against a throwaway queue using a
|
|
# no-op engine stub (no real agent CLI is ever invoked)
|
|
#
|
|
# It uses its own temp AGENT_QUEUE_ROOT so it never touches a real queue.
|
|
# Exit 0 = all good. Run it before every commit.
|
|
#
|
|
set -euo pipefail
|
|
|
|
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
AQ="$HERE/agent-queue.sh"
|
|
|
|
pass() { printf ' \033[32m✓\033[0m %s\n' "$*"; }
|
|
info() { printf ' \033[36m•\033[0m %s\n' "$*"; }
|
|
fail() { printf ' \033[31m✗ %s\033[0m\n' "$*" >&2; exit 1; }
|
|
|
|
tmp="$(mktemp -d "${TMPDIR:-/tmp}/aq-selftest.XXXXXX")"
|
|
cleanup() { rm -rf "$tmp"; }
|
|
trap cleanup EXIT
|
|
|
|
echo "agent-queue self-test"
|
|
|
|
# 1. shellcheck (optional)
|
|
if command -v shellcheck >/dev/null 2>&1; then
|
|
shellcheck --severity=error --shell=bash "$AQ" "${BASH_SOURCE[0]}" && pass "shellcheck (errors): clean"
|
|
else
|
|
info "shellcheck not installed — skipping"
|
|
fi
|
|
|
|
# 2. syntax
|
|
bash -n "$AQ" && pass "bash -n agent-queue.sh"
|
|
bash -n "${BASH_SOURCE[0]}" && pass "bash -n selftest.sh"
|
|
|
|
# 3. dashboard syntax (optional)
|
|
if command -v node >/dev/null 2>&1; then
|
|
node --check "$HERE/dashboard.mjs" && pass "node --check dashboard.mjs"
|
|
else
|
|
info "node not installed — skipping dashboard check"
|
|
fi
|
|
|
|
# 4. live no-op cycle
|
|
export AGENT_QUEUE_ROOT="$tmp/queue"
|
|
stub="$tmp/noop-engine"
|
|
printf '#!/usr/bin/env bash\n# no-op engine stub: drain stdin, succeed\ncat >/dev/null 2>&1 || true\nexit 0\n' > "$stub"
|
|
chmod +x "$stub"
|
|
|
|
work="$tmp/work"; mkdir -p "$work"
|
|
task="$tmp/task.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# self-test no-op task' > "$task"
|
|
|
|
"$AQ" init >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" add "$task" >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
|
|
if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then
|
|
pass "no-verify cycle → task parked in review/"
|
|
else
|
|
echo "--- queue state ---" >&2
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true
|
|
fail "no-op cycle did not complete (expected a file in review/)"
|
|
fi
|
|
|
|
# 5. verify-pass gate: rc=0 + passing verify → testing/, then manual ship → shipped/
|
|
task2="$tmp/task-verify.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'verify: true' '---' '' '# self-test verify-pass task' > "$task2"
|
|
DEVIN_BIN="$stub" "$AQ" add "$task2" >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/testing/*.md >/dev/null 2>&1; then
|
|
pass "verify-pass cycle → task promoted to testing/"
|
|
else
|
|
echo "--- queue state ---" >&2
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true
|
|
fail "verify-pass cycle did not reach testing/ (expected a file in testing/)"
|
|
fi
|
|
shipjob="$(basename "$(ls -1t "$AGENT_QUEUE_ROOT"/testing/*.md | head -1)" .md)"
|
|
"$AQ" ship "$shipjob" >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/shipped/*.md >/dev/null 2>&1; then
|
|
pass "manual ship → task landed in shipped/"
|
|
else
|
|
fail "ship did not move job to shipped/"
|
|
fi
|
|
|
|
# 6. verify-fail gate: rc=0 + failing verify → failed/
|
|
task3="$tmp/task-verifyfail.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'verify: false' '---' '' '# self-test verify-fail task' > "$task3"
|
|
DEVIN_BIN="$stub" "$AQ" add "$task3" >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/failed/*verifyfail*.md >/dev/null 2>&1; then
|
|
pass "verify-fail cycle → task routed to failed/"
|
|
else
|
|
echo "--- queue state ---" >&2
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true
|
|
fail "verify-fail cycle did not route to failed/"
|
|
fi
|
|
|
|
# status must not error
|
|
"$AQ" status >/dev/null 2>&1 && pass "status runs clean"
|
|
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
# Phase 1 — Slice 1 cases (manifest/priority/capabilities/engine-class/idempotency).
|
|
# Each uses its OWN AGENT_QUEUE_ROOT; the no-op engine stub means no real CLI runs.
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
|
|
# 7. backward-compat: a legacy engine/cwd/yolo-only .md still completes → review/
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-bc"
|
|
bc="$tmp/bc-legacy.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# legacy task' > "$bc"
|
|
"$AQ" init >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" add "$bc" >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then
|
|
pass "backward-compat: legacy engine/cwd/yolo-only .md → review/"
|
|
else
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true
|
|
fail "legacy .md did not land in review/"
|
|
fi
|
|
|
|
# 8. priority: with --max 1, a 'critical' job queued AFTER a 'low' job runs first.
|
|
# An order-recording devin-style stub appends each job's TASKID as it launches.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-prio"
|
|
ostub="$tmp/order-engine"
|
|
cat > "$ostub" <<'STUB'
|
|
#!/usr/bin/env bash
|
|
# order-recording no-op engine stub (devin-style: --prompt-file <pf>)
|
|
pf=""
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--prompt-file) pf="${2:-}"; shift 2;;
|
|
*) shift;;
|
|
esac
|
|
done
|
|
if [ -n "${pf:-}" ] && [ -n "${AQ_ORDER:-}" ]; then
|
|
grep -m1 '^TASKID=' "$pf" >> "$AQ_ORDER" 2>/dev/null || true
|
|
fi
|
|
exit 0
|
|
STUB
|
|
chmod +x "$ostub"
|
|
export AQ_ORDER="$tmp/prio-order.log"; : > "$AQ_ORDER"
|
|
plow="$tmp/p-low.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'priority: low' '---' '' 'TASKID=low' > "$plow"
|
|
pcrit="$tmp/p-crit.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'priority: critical' '---' '' 'TASKID=critical' > "$pcrit"
|
|
"$AQ" init >/dev/null
|
|
DEVIN_BIN="$ostub" "$AQ" add "$plow" >/dev/null
|
|
sleep 1 # ensure the critical job has a strictly newer (later) queue timestamp
|
|
DEVIN_BIN="$ostub" "$AQ" add "$pcrit" >/dev/null
|
|
DEVIN_BIN="$ostub" "$AQ" run --once --max 1 >/dev/null 2>&1
|
|
if [ "$(head -1 "$AQ_ORDER" 2>/dev/null || true)" = "TASKID=critical" ]; then
|
|
pass "priority: critical (queued later) ran before low"
|
|
else
|
|
echo "--- execution order ---" >&2; cat "$AQ_ORDER" >&2 || true
|
|
fail "priority ordering did not pick the critical job first"
|
|
fi
|
|
|
|
# 9. capability mismatch: a job requiring an absent tool → failed/ with
|
|
# result=capability_mismatch, and the agent is NEVER launched.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-cap"
|
|
launchflag="$tmp/cap-launched.flag"; rm -f "$launchflag"
|
|
launchstub="$tmp/launch-engine"
|
|
printf '#!/usr/bin/env bash\ntouch %q\nexit 0\n' "$launchflag" > "$launchstub"
|
|
chmod +x "$launchstub"
|
|
capjob="$tmp/cap.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \
|
|
'capabilities: [has:definitely-not-installed]' '---' '' '# capability task' > "$capjob"
|
|
"$AQ" init >/dev/null
|
|
DEVIN_BIN="$launchstub" "$AQ" add "$capjob" >/dev/null
|
|
DEVIN_BIN="$launchstub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/failed/*.md >/dev/null 2>&1 \
|
|
&& grep -q '^result=capability_mismatch' "$AGENT_QUEUE_ROOT"/.state/*.meta 2>/dev/null; then
|
|
pass "capability mismatch → failed/ (result=capability_mismatch)"
|
|
else
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true
|
|
fail "capability-mismatch job not routed to failed/ with result=capability_mismatch"
|
|
fi
|
|
if [ -e "$launchflag" ]; then
|
|
fail "agent WAS launched on capability mismatch (it must not be)"
|
|
else
|
|
pass "capability mismatch: agent never launched"
|
|
fi
|
|
|
|
# 10. engine-class: a job with engine-class:agentic-coder and no engine, with
|
|
# DEVIN_BIN stubbed (available), resolves to devin, runs, and lands in review/.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-ec"
|
|
ecjob="$tmp/ec.md"
|
|
printf '%s\n' '---' 'engine-class: agentic-coder' "cwd: $work" 'yolo: true' '---' '' '# engine-class task' > "$ecjob"
|
|
"$AQ" init >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" add "$ecjob" >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then
|
|
pass "engine-class: agentic-coder (no engine) resolved to devin → review/"
|
|
else
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true
|
|
fail "engine-class job did not resolve+run to review/"
|
|
fi
|
|
|
|
# 11. idempotency-key dedupe on add.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-idem"
|
|
"$AQ" init >/dev/null
|
|
ia="$tmp/idem-a.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'alpha body' > "$ia"
|
|
DEVIN_BIN="$stub" "$AQ" add "$ia" >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" add "$ia" >/dev/null # identical key+body → no-op
|
|
cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ')
|
|
[ "$cnt" = "1" ] && pass "idempotency: same key+body added twice → exactly 1 inbox file" \
|
|
|| fail "idempotency: expected 1 inbox file after duplicate add, got $cnt"
|
|
# same key, different body, prior STILL in inbox → supersede (still exactly 1)
|
|
ib="$tmp/idem-b.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'beta body (changed)' > "$ib"
|
|
DEVIN_BIN="$stub" "$AQ" add "$ib" >/dev/null
|
|
cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ')
|
|
[ "$cnt" = "1" ] && pass "idempotency: same key + changed body, prior in inbox → superseded (1 file)" \
|
|
|| fail "idempotency: expected 1 inbox file after supersede, got $cnt"
|
|
# drain (prior leaves inbox → review), then same key + different body → REJECT
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
ic="$tmp/idem-c.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'gamma body (changed again)' > "$ic"
|
|
if DEVIN_BIN="$stub" "$AQ" add "$ic" >/dev/null 2>&1; then
|
|
fail "idempotency: same key + different body (prior past inbox) should be rejected"
|
|
else
|
|
pass "idempotency: same key + different body, prior past inbox → rejected"
|
|
fi
|
|
cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ')
|
|
[ "$cnt" = "0" ] && pass "idempotency: a rejected add enqueues nothing" \
|
|
|| fail "idempotency: rejected add should not enqueue (inbox=$cnt)"
|
|
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
# Phase 1 — Slice 3 cases (resilience & insights, single host).
|
|
# Use temp git repos + stubs; never touches a real queue.
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
metaval() { grep "^$2=" "$1" 2>/dev/null | tail -1 | cut -d= -f2-; }
|
|
mkrepo() {
|
|
local d=$1; mkdir -p "$d"; git -C "$d" init -q
|
|
git -C "$d" config user.email t@t; git -C "$d" config user.name selftest
|
|
echo seed > "$d/seed.txt"; git -C "$d" add -A; git -C "$d" commit -q -m seed
|
|
}
|
|
|
|
# 12. orphan recovery: a building/ job whose worker pid is dead → `recover`
|
|
# moves it to inbox/ with attempts incremented; a second recover is a no-op.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-orphan"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# orphan task' \
|
|
> "$AGENT_QUEUE_ROOT/building/orphanjob.md"
|
|
# pid 1 is alive but pidstart is bogus → the PID-reuse guard marks it dead.
|
|
printf '%s\n' 'job=orphanjob' 'engine=devin' "cwd=$work" 'started=1' 'attempts=1' 'pid=1' 'pidstart=NOPE' \
|
|
> "$AGENT_QUEUE_ROOT/.state/orphanjob.meta"
|
|
"$AQ" recover >/dev/null 2>&1
|
|
if [ -f "$AGENT_QUEUE_ROOT/inbox/orphanjob.md" ] && [ ! -f "$AGENT_QUEUE_ROOT/building/orphanjob.md" ]; then
|
|
pass "orphan recovery: dead-worker building/ job recovered to inbox/"
|
|
else
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true; fail "orphan not recovered to inbox/"
|
|
fi
|
|
[ "$(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts)" = "2" ] \
|
|
&& pass "orphan recovery: attempts incremented (1 -> 2)" \
|
|
|| fail "orphan recovery: attempts not incremented (got $(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts))"
|
|
"$AQ" recover >/dev/null 2>&1 # idempotent: nothing left in building/
|
|
inbn=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -name 'orphanjob.md' | wc -l | tr -d ' ')
|
|
[ "$inbn" = "1" ] && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts)" = "2" ] \
|
|
&& pass "orphan recovery: idempotent (twice recovers once)" \
|
|
|| fail "orphan recovery not idempotent (inbox=$inbn attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts))"
|
|
|
|
# 13. WIP checkpoint (git) + numstat: a git-repo cwd whose agent writes a 3-line
|
|
# file → branch aq/wip/<job> has a commit with the change, main is untouched,
|
|
# and lines_added is recorded.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-wip"
|
|
repo="$tmp/repo-wip"; mkrepo "$repo"
|
|
mainbr=$(git -C "$repo" symbolic-ref --short HEAD)
|
|
wipstub="$tmp/wip-engine"
|
|
printf '#!/usr/bin/env bash\nprintf '"'"'a\\nb\\nc\\n'"'"' > created_by_agent.txt\nexit 0\n' > "$wipstub"
|
|
chmod +x "$wipstub"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $repo" 'yolo: true' '---' '' '# wip task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/wipjob.md"
|
|
DEVIN_BIN="$wipstub" "$AQ" run --once >/dev/null 2>&1
|
|
# capture the log first (avoid `git log | grep -q` — under pipefail the early
|
|
# grep -q exit SIGPIPEs git log and falsely fails the pipeline).
|
|
wiplog=$(git -C "$repo" log --oneline aq/wip/wipjob 2>/dev/null || true)
|
|
if git -C "$repo" show-ref --verify --quiet refs/heads/aq/wip/wipjob \
|
|
&& [[ "$wiplog" == *"aq wip: wipjob"* ]] \
|
|
&& git -C "$repo" show aq/wip/wipjob:created_by_agent.txt >/dev/null 2>&1; then
|
|
pass "wip checkpoint: aq/wip/wipjob has a commit with the agent's change"
|
|
else
|
|
git -C "$repo" branch -a >&2 || true; fail "wip checkpoint branch/commit missing"
|
|
fi
|
|
if git -C "$repo" cat-file -e "$mainbr":created_by_agent.txt 2>/dev/null; then
|
|
fail "wip checkpoint: main branch was modified (must be untouched)"
|
|
else
|
|
pass "wip checkpoint: main branch ($mainbr) untouched"
|
|
fi
|
|
[ "$(metaval "$AGENT_QUEUE_ROOT/.state/wipjob.meta" lines_added)" = "3" ] \
|
|
&& pass "insights numstat: lines_added recorded (=3)" \
|
|
|| fail "insights numstat: lines_added wrong (got $(metaval "$AGENT_QUEUE_ROOT/.state/wipjob.meta" lines_added))"
|
|
|
|
# 13b. non-git cwd → WIP skipped cleanly (no error), job still completes.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-nogit"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# nogit task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/nogitjob.md"
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1 \
|
|
&& grep -q 'not a git repo' "$AGENT_QUEUE_ROOT/logs/nogitjob.log" 2>/dev/null; then
|
|
pass "wip checkpoint: non-git cwd skipped cleanly → review/"
|
|
else
|
|
fail "non-git cwd run did not complete cleanly"
|
|
fi
|
|
|
|
# 14. WIP resume: an orphan whose aq/wip/<job> already has a prior commit →
|
|
# the relaunch checks out that branch (agent sees HEAD on aq/wip/<job>).
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-resume"
|
|
repo2="$tmp/repo-resume"; mkrepo "$repo2"
|
|
mainbr2=$(git -C "$repo2" symbolic-ref --short HEAD)
|
|
git -C "$repo2" checkout -q -b aq/wip/resumejob
|
|
echo prior > "$repo2/prior.txt"; git -C "$repo2" add -A; git -C "$repo2" commit -q -m "aq wip: resumejob (prior)"
|
|
git -C "$repo2" checkout -q "$mainbr2"
|
|
resumeout="$tmp/resume-head.txt"; rm -f "$resumeout"
|
|
resumestub="$tmp/resume-engine"
|
|
printf '#!/usr/bin/env bash\ngit rev-parse --abbrev-ref HEAD > %q 2>/dev/null\nexit 0\n' "$resumeout" > "$resumestub"
|
|
chmod +x "$resumestub"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $repo2" 'yolo: true' '---' '' '# resume task' \
|
|
> "$AGENT_QUEUE_ROOT/building/resumejob.md"
|
|
printf '%s\n' 'job=resumejob' 'engine=devin' "cwd=$repo2" 'started=1' 'attempts=1' 'pid=1' 'pidstart=NOPE' \
|
|
> "$AGENT_QUEUE_ROOT/.state/resumejob.meta"
|
|
DEVIN_BIN="$resumestub" "$AQ" run --once >/dev/null 2>&1
|
|
if [ "$(cat "$resumeout" 2>/dev/null)" = "aq/wip/resumejob" ]; then
|
|
pass "wip resume: recovered job ran with HEAD on aq/wip/resumejob"
|
|
else
|
|
echo "resume HEAD was: $(cat "$resumeout" 2>/dev/null)" >&2
|
|
fail "wip resume did not check out the existing WIP branch"
|
|
fi
|
|
|
|
# 15. retry on verify_failed: max=1 → requeued once (attempts=2) then failed/
|
|
# result=retries_exhausted; a backoff (next_eligible) is recorded.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-retry"
|
|
export AGENT_QUEUE_POLL=1
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'verify: false' \
|
|
'retry: { max: 1, backoff: 1s, on: [verify_failed] }' '---' '' '# retry task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/retryjob.md"
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/failed/retryjob.md >/dev/null 2>&1 \
|
|
&& [ "$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" result)" = "retries_exhausted" ] \
|
|
&& [ "$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" attempts)" = "2" ]; then
|
|
pass "retry(verify_failed): requeued once (attempts=2) then retries_exhausted"
|
|
else
|
|
fail "retry(verify_failed) wrong (result=$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" result) attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" attempts))"
|
|
fi
|
|
grep -q 'RETRY scheduled' "$AGENT_QUEUE_ROOT/logs/retryjob.log" 2>/dev/null \
|
|
&& pass "retry: backoff RETRY scheduled (next_eligible honored)" \
|
|
|| fail "retry: no RETRY scheduled line in log"
|
|
|
|
# 16. retry on crash: rc!=0 with on=[crash] retries; without crash it does not.
|
|
crashstub="$tmp/crash-engine"
|
|
printf '#!/usr/bin/env bash\nexit 3\n' > "$crashstub"; chmod +x "$crashstub"
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-crash"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \
|
|
'retry: { max: 1, backoff: 1s, on: [crash] }' '---' '' '# crash-retry task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/crashjob.md"
|
|
DEVIN_BIN="$crashstub" "$AQ" run --once >/dev/null 2>&1
|
|
[ "$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" result)" = "retries_exhausted" ] \
|
|
&& [ "$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" attempts)" = "2" ] \
|
|
&& pass "retry(crash): rc!=0 with on=[crash] retried then retries_exhausted (attempts=2)" \
|
|
|| fail "retry(crash) wrong (result=$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" result) attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" attempts))"
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-nocrash"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \
|
|
'retry: { max: 1, backoff: 1s, on: [verify_failed] }' '---' '' '# crash-no-retry task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/nocrashjob.md"
|
|
DEVIN_BIN="$crashstub" "$AQ" run --once >/dev/null 2>&1
|
|
[ "$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" result)" = "failed" ] \
|
|
&& [ "$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" attempts)" = "1" ] \
|
|
&& pass "retry(crash): crash not in on -> straight to failed/ (no retry)" \
|
|
|| fail "retry(crash) should not retry when crash not in on (result=$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" result) attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" attempts))"
|
|
unset AGENT_QUEUE_POLL
|
|
|
|
# 17. insights parse: a stub log with a usage line → parse_usage records tokens/
|
|
# cost into meta; `insights <job>` prints them; a no-usage log doesn't crash.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-usage"
|
|
usagestub="$tmp/usage-engine"
|
|
printf '#!/usr/bin/env bash\necho "AQ_USAGE model=claude-test tokens_in=100 tokens_out=50 cost_usd=0.0021 turns=3 tool_calls=5"\nexit 0\n' > "$usagestub"
|
|
chmod +x "$usagestub"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: claude' "cwd: $work" 'yolo: true' '---' '' '# usage task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/usagejob.md"
|
|
CLAUDE_BIN="$usagestub" "$AQ" run --once >/dev/null 2>&1
|
|
if [ "$(metaval "$AGENT_QUEUE_ROOT/.state/usagejob.meta" tokens_in)" = "100" ] \
|
|
&& [ "$(metaval "$AGENT_QUEUE_ROOT/.state/usagejob.meta" cost_usd)" = "0.0021" ]; then
|
|
pass "insights parse_usage: tokens/cost extracted into meta"
|
|
else
|
|
fail "parse_usage did not record tokens/cost (tokens_in=$(metaval "$AGENT_QUEUE_ROOT/.state/usagejob.meta" tokens_in))"
|
|
fi
|
|
ins=$("$AQ" insights usagejob 2>/dev/null || true)
|
|
if [[ "$ins" == *tokens_in* && "$ins" == *0.0021* ]]; then
|
|
pass "insights <job>: prints per-job metrics"
|
|
else
|
|
fail "insights <job> did not print metrics"
|
|
fi
|
|
printf '%s\n' '---' 'engine: claude' "cwd: $work" 'yolo: true' '---' '' '# no-usage task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/nousagejob.md"
|
|
CLAUDE_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if "$AQ" insights nousagejob >/dev/null 2>&1 \
|
|
&& [ -z "$(metaval "$AGENT_QUEUE_ROOT/.state/nousagejob.meta" tokens_in)" ]; then
|
|
pass "insights: no-usage log omits token fields without crashing"
|
|
else
|
|
fail "insights crashed or fabricated tokens for a no-usage log"
|
|
fi
|
|
|
|
# 18. insights aggregate: two finished jobs → per-engine rollup with totals + rate.
|
|
out=$("$AQ" insights 2>/dev/null || true)
|
|
if [[ "$out" == *"ROLLUP BY ENGINE"* ]] && grep -qE 'claude .* 100 .* 50' <<<"$out"; then
|
|
pass "insights aggregate: per-engine rollup with token totals"
|
|
else
|
|
printf '%s\n' "$out" >&2; fail "insights aggregate rollup missing/incorrect"
|
|
fi
|
|
|
|
echo "self-test PASS"
|