#!/usr/bin/env bash # # selftest.sh — quick, dependency-light verification for agent-queue. # # Runs: # 1. shellcheck (if installed) at --severity=error on the runner # 2. bash -n syntax check on the runner + this script # 3. node --check on the dashboard (if node installed) # 4. a live init/add/run --once cycle against a throwaway queue using a # no-op engine stub (no real agent CLI is ever invoked) # # It uses its own temp AGENT_QUEUE_ROOT so it never touches a real queue. # Exit 0 = all good. Run it before every commit. # set -euo pipefail HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" AQ="$HERE/agent-queue.sh" pass() { printf ' \033[32m✓\033[0m %s\n' "$*"; } info() { printf ' \033[36m•\033[0m %s\n' "$*"; } fail() { printf ' \033[31m✗ %s\033[0m\n' "$*" >&2; exit 1; } tmp="$(mktemp -d "${TMPDIR:-/tmp}/aq-selftest.XXXXXX")" cleanup() { rm -rf "$tmp"; } trap cleanup EXIT echo "agent-queue self-test" # 1. shellcheck (optional) if command -v shellcheck >/dev/null 2>&1; then shellcheck --severity=error --shell=bash "$AQ" "${BASH_SOURCE[0]}" && pass "shellcheck (errors): clean" else info "shellcheck not installed — skipping" fi # 2. syntax bash -n "$AQ" && pass "bash -n agent-queue.sh" bash -n "${BASH_SOURCE[0]}" && pass "bash -n selftest.sh" # 3. dashboard syntax (optional) if command -v node >/dev/null 2>&1; then node --check "$HERE/dashboard.mjs" && pass "node --check dashboard.mjs" else info "node not installed — skipping dashboard check" fi # 4. live no-op cycle export AGENT_QUEUE_ROOT="$tmp/queue" stub="$tmp/noop-engine" printf '#!/usr/bin/env bash\n# no-op engine stub: drain stdin, succeed\ncat >/dev/null 2>&1 || true\nexit 0\n' > "$stub" chmod +x "$stub" work="$tmp/work"; mkdir -p "$work" task="$tmp/task.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# self-test no-op task' > "$task" "$AQ" init >/dev/null DEVIN_BIN="$stub" "$AQ" add "$task" >/dev/null DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then pass "no-verify cycle → task parked in review/" else echo "--- queue state ---" >&2 ls -R "$AGENT_QUEUE_ROOT" >&2 || true fail "no-op cycle did not complete (expected a file in review/)" fi # 5. verify-pass gate: rc=0 + passing verify → testing/, then manual ship → shipped/ task2="$tmp/task-verify.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'verify: true' '---' '' '# self-test verify-pass task' > "$task2" DEVIN_BIN="$stub" "$AQ" add "$task2" >/dev/null DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/testing/*.md >/dev/null 2>&1; then pass "verify-pass cycle → task promoted to testing/" else echo "--- queue state ---" >&2 ls -R "$AGENT_QUEUE_ROOT" >&2 || true fail "verify-pass cycle did not reach testing/ (expected a file in testing/)" fi shipjob="$(basename "$(ls -1t "$AGENT_QUEUE_ROOT"/testing/*.md | head -1)" .md)" "$AQ" ship "$shipjob" >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/shipped/*.md >/dev/null 2>&1; then pass "manual ship → task landed in shipped/" else fail "ship did not move job to shipped/" fi # 6. verify-fail gate: rc=0 + failing verify → failed/ task3="$tmp/task-verifyfail.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'verify: false' '---' '' '# self-test verify-fail task' > "$task3" DEVIN_BIN="$stub" "$AQ" add "$task3" >/dev/null DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/failed/*verifyfail*.md >/dev/null 2>&1; then pass "verify-fail cycle → task routed to failed/" else echo "--- queue state ---" >&2 ls -R "$AGENT_QUEUE_ROOT" >&2 || true fail "verify-fail cycle did not route to failed/" fi # status must not error "$AQ" status >/dev/null 2>&1 && pass "status runs clean" # ───────────────────────────────────────────────────────────────────── # Phase 1 — Slice 1 cases (manifest/priority/capabilities/engine-class/idempotency). # Each uses its OWN AGENT_QUEUE_ROOT; the no-op engine stub means no real CLI runs. # ───────────────────────────────────────────────────────────────────── # 7. backward-compat: a legacy engine/cwd/yolo-only .md still completes → review/ export AGENT_QUEUE_ROOT="$tmp/queue-bc" bc="$tmp/bc-legacy.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# legacy task' > "$bc" "$AQ" init >/dev/null DEVIN_BIN="$stub" "$AQ" add "$bc" >/dev/null DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then pass "backward-compat: legacy engine/cwd/yolo-only .md → review/" else ls -R "$AGENT_QUEUE_ROOT" >&2 || true fail "legacy .md did not land in review/" fi # 8. priority: with --max 1, a 'critical' job queued AFTER a 'low' job runs first. # An order-recording devin-style stub appends each job's TASKID as it launches. export AGENT_QUEUE_ROOT="$tmp/queue-prio" ostub="$tmp/order-engine" cat > "$ostub" <<'STUB' #!/usr/bin/env bash # order-recording no-op engine stub (devin-style: --prompt-file ) pf="" while [ $# -gt 0 ]; do case "$1" in --prompt-file) pf="${2:-}"; shift 2;; *) shift;; esac done if [ -n "${pf:-}" ] && [ -n "${AQ_ORDER:-}" ]; then grep -m1 '^TASKID=' "$pf" >> "$AQ_ORDER" 2>/dev/null || true fi exit 0 STUB chmod +x "$ostub" export AQ_ORDER="$tmp/prio-order.log"; : > "$AQ_ORDER" plow="$tmp/p-low.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'priority: low' '---' '' 'TASKID=low' > "$plow" pcrit="$tmp/p-crit.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'priority: critical' '---' '' 'TASKID=critical' > "$pcrit" "$AQ" init >/dev/null DEVIN_BIN="$ostub" "$AQ" add "$plow" >/dev/null sleep 1 # ensure the critical job has a strictly newer (later) queue timestamp DEVIN_BIN="$ostub" "$AQ" add "$pcrit" >/dev/null DEVIN_BIN="$ostub" "$AQ" run --once --max 1 >/dev/null 2>&1 if [ "$(head -1 "$AQ_ORDER" 2>/dev/null || true)" = "TASKID=critical" ]; then pass "priority: critical (queued later) ran before low" else echo "--- execution order ---" >&2; cat "$AQ_ORDER" >&2 || true fail "priority ordering did not pick the critical job first" fi # 9. capability mismatch: a job requiring an absent tool → failed/ with # result=capability_mismatch, and the agent is NEVER launched. export AGENT_QUEUE_ROOT="$tmp/queue-cap" launchflag="$tmp/cap-launched.flag"; rm -f "$launchflag" launchstub="$tmp/launch-engine" printf '#!/usr/bin/env bash\ntouch %q\nexit 0\n' "$launchflag" > "$launchstub" chmod +x "$launchstub" capjob="$tmp/cap.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \ 'capabilities: [has:definitely-not-installed]' '---' '' '# capability task' > "$capjob" "$AQ" init >/dev/null DEVIN_BIN="$launchstub" "$AQ" add "$capjob" >/dev/null DEVIN_BIN="$launchstub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/failed/*.md >/dev/null 2>&1 \ && grep -q '^result=capability_mismatch' "$AGENT_QUEUE_ROOT"/.state/*.meta 2>/dev/null; then pass "capability mismatch → failed/ (result=capability_mismatch)" else ls -R "$AGENT_QUEUE_ROOT" >&2 || true fail "capability-mismatch job not routed to failed/ with result=capability_mismatch" fi if [ -e "$launchflag" ]; then fail "agent WAS launched on capability mismatch (it must not be)" else pass "capability mismatch: agent never launched" fi # 10. engine-class: a job with engine-class:agentic-coder and no engine, with # DEVIN_BIN stubbed (available), resolves to devin, runs, and lands in review/. export AGENT_QUEUE_ROOT="$tmp/queue-ec" ecjob="$tmp/ec.md" printf '%s\n' '---' 'engine-class: agentic-coder' "cwd: $work" 'yolo: true' '---' '' '# engine-class task' > "$ecjob" "$AQ" init >/dev/null DEVIN_BIN="$stub" "$AQ" add "$ecjob" >/dev/null DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then pass "engine-class: agentic-coder (no engine) resolved to devin → review/" else ls -R "$AGENT_QUEUE_ROOT" >&2 || true fail "engine-class job did not resolve+run to review/" fi # 11. idempotency-key dedupe on add. export AGENT_QUEUE_ROOT="$tmp/queue-idem" "$AQ" init >/dev/null ia="$tmp/idem-a.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'alpha body' > "$ia" DEVIN_BIN="$stub" "$AQ" add "$ia" >/dev/null DEVIN_BIN="$stub" "$AQ" add "$ia" >/dev/null # identical key+body → no-op cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ') [ "$cnt" = "1" ] && pass "idempotency: same key+body added twice → exactly 1 inbox file" \ || fail "idempotency: expected 1 inbox file after duplicate add, got $cnt" # same key, different body, prior STILL in inbox → supersede (still exactly 1) ib="$tmp/idem-b.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'beta body (changed)' > "$ib" DEVIN_BIN="$stub" "$AQ" add "$ib" >/dev/null cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ') [ "$cnt" = "1" ] && pass "idempotency: same key + changed body, prior in inbox → superseded (1 file)" \ || fail "idempotency: expected 1 inbox file after supersede, got $cnt" # drain (prior leaves inbox → review), then same key + different body → REJECT DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 ic="$tmp/idem-c.md" printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'gamma body (changed again)' > "$ic" if DEVIN_BIN="$stub" "$AQ" add "$ic" >/dev/null 2>&1; then fail "idempotency: same key + different body (prior past inbox) should be rejected" else pass "idempotency: same key + different body, prior past inbox → rejected" fi cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ') [ "$cnt" = "0" ] && pass "idempotency: a rejected add enqueues nothing" \ || fail "idempotency: rejected add should not enqueue (inbox=$cnt)" echo "self-test PASS"