Close the final Phase-2 exit-criteria box: >=2 factories executing jobs in parallel through one coordinator, proving the concurrency guarantees end-to-end. This is a DEMO HARNESS over the existing runtime — agent-queue.sh and lib/fleet-client.sh are unchanged (read + called, not modified). demo/two-factory-demo.sh: starts two real `agent-queue.sh run` daemons (mac-1 + ubuntu-1, separate queues/cwds) that compete ONLY through the coordinator, then asserts: (a) no double-assign — each of 3 jobs executed by exactly one factory; (b) fencing + reclaim — kill a factory mid-job, the reaper returns its job, the survivor reclaims + completes it, and the dead worker's late/zombie report (stale leaseEpoch) is FENCED (HTTP 409, never shipped); (c) parallelism — both factories hold active jobs concurrently. Dual-mode: CI-safe stateful stub by default; live platform-service when AQ_FLEET_API/AQ_FLEET_TOKEN set. demo/coordinator-stub.sh: stateful, mkdir-lock-guarded, file-backed coordinator implementing claim/lease/fence/renew/release + reaper-reclaim via the existing AQ_FLEET_API_CMD seam — the selftest stub pattern extended with shared state so >=2 processes coordinate through one coordinator. demo/README.md: stub + real invocations, env knobs, what each guarantee proves, what-to-watch guide. selftest.sh: +3 headless stub-mode checks (existing 68 unchanged byte-for-byte -> 71 total green). docs/GIGAFACTORY_ROADMAP.md: tick the §14 two-factory-demo box; annotate Phase-2 exit criteria; bump §0 Phase 2 to 80% (remaining: scheduler-core wiring [common-plat PR #31], tracker-direct call, factory enrollment). bash 3.2 + awk/sed/grep/pgrep only; mac+linux safe; no new runtime deps. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com>
1026 lines
55 KiB
Bash
Executable File
1026 lines
55 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# selftest.sh — quick, dependency-light verification for agent-queue.
|
|
#
|
|
# Runs:
|
|
# 1. shellcheck (if installed) at --severity=error on the runner
|
|
# 2. bash -n syntax check on the runner + this script
|
|
# 3. node --check on the dashboard (if node installed)
|
|
# 4. a live init/add/run --once cycle against a throwaway queue using a
|
|
# no-op engine stub (no real agent CLI is ever invoked)
|
|
#
|
|
# It uses its own temp AGENT_QUEUE_ROOT so it never touches a real queue.
|
|
# Exit 0 = all good. Run it before every commit.
|
|
#
|
|
set -euo pipefail
|
|
|
|
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
AQ="$HERE/agent-queue.sh"
|
|
|
|
pass() { printf ' \033[32m✓\033[0m %s\n' "$*"; }
|
|
info() { printf ' \033[36m•\033[0m %s\n' "$*"; }
|
|
fail() { printf ' \033[31m✗ %s\033[0m\n' "$*" >&2; exit 1; }
|
|
|
|
tmp="$(mktemp -d "${TMPDIR:-/tmp}/aq-selftest.XXXXXX")"
|
|
cleanup() { rm -rf "$tmp"; }
|
|
trap cleanup EXIT
|
|
|
|
echo "agent-queue self-test"
|
|
|
|
# 1. shellcheck (optional)
|
|
if command -v shellcheck >/dev/null 2>&1; then
|
|
shellcheck --severity=error --shell=bash "$AQ" "${BASH_SOURCE[0]}" && pass "shellcheck (errors): clean"
|
|
else
|
|
info "shellcheck not installed — skipping"
|
|
fi
|
|
|
|
# 2. syntax
|
|
bash -n "$AQ" && pass "bash -n agent-queue.sh"
|
|
bash -n "${BASH_SOURCE[0]}" && pass "bash -n selftest.sh"
|
|
|
|
# 3. dashboard syntax (optional)
|
|
if command -v node >/dev/null 2>&1; then
|
|
node --check "$HERE/dashboard.mjs" && pass "node --check dashboard.mjs"
|
|
else
|
|
info "node not installed — skipping dashboard check"
|
|
fi
|
|
|
|
# 4. live no-op cycle
|
|
export AGENT_QUEUE_ROOT="$tmp/queue"
|
|
stub="$tmp/noop-engine"
|
|
printf '#!/usr/bin/env bash\n# no-op engine stub: drain stdin, succeed\ncat >/dev/null 2>&1 || true\nexit 0\n' > "$stub"
|
|
chmod +x "$stub"
|
|
|
|
work="$tmp/work"; mkdir -p "$work"
|
|
task="$tmp/task.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# self-test no-op task' > "$task"
|
|
|
|
"$AQ" init >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" add "$task" >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
|
|
if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then
|
|
pass "no-verify cycle → task parked in review/"
|
|
else
|
|
echo "--- queue state ---" >&2
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true
|
|
fail "no-op cycle did not complete (expected a file in review/)"
|
|
fi
|
|
|
|
# 5. verify-pass gate: rc=0 + passing verify → testing/, then manual ship → shipped/
|
|
task2="$tmp/task-verify.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'verify: true' '---' '' '# self-test verify-pass task' > "$task2"
|
|
DEVIN_BIN="$stub" "$AQ" add "$task2" >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/testing/*.md >/dev/null 2>&1; then
|
|
pass "verify-pass cycle → task promoted to testing/"
|
|
else
|
|
echo "--- queue state ---" >&2
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true
|
|
fail "verify-pass cycle did not reach testing/ (expected a file in testing/)"
|
|
fi
|
|
shipjob="$(basename "$(ls -1t "$AGENT_QUEUE_ROOT"/testing/*.md | head -1)" .md)"
|
|
"$AQ" ship "$shipjob" >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/shipped/*.md >/dev/null 2>&1; then
|
|
pass "manual ship → task landed in shipped/"
|
|
else
|
|
fail "ship did not move job to shipped/"
|
|
fi
|
|
|
|
# 6. verify-fail gate: rc=0 + failing verify → failed/
|
|
task3="$tmp/task-verifyfail.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'verify: false' '---' '' '# self-test verify-fail task' > "$task3"
|
|
DEVIN_BIN="$stub" "$AQ" add "$task3" >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/failed/*verifyfail*.md >/dev/null 2>&1; then
|
|
pass "verify-fail cycle → task routed to failed/"
|
|
else
|
|
echo "--- queue state ---" >&2
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true
|
|
fail "verify-fail cycle did not route to failed/"
|
|
fi
|
|
|
|
# status must not error
|
|
"$AQ" status >/dev/null 2>&1 && pass "status runs clean"
|
|
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
# Phase 1 — Slice 1 cases (manifest/priority/capabilities/engine-class/idempotency).
|
|
# Each uses its OWN AGENT_QUEUE_ROOT; the no-op engine stub means no real CLI runs.
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
|
|
# 7. backward-compat: a legacy engine/cwd/yolo-only .md still completes → review/
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-bc"
|
|
bc="$tmp/bc-legacy.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# legacy task' > "$bc"
|
|
"$AQ" init >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" add "$bc" >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then
|
|
pass "backward-compat: legacy engine/cwd/yolo-only .md → review/"
|
|
else
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true
|
|
fail "legacy .md did not land in review/"
|
|
fi
|
|
|
|
# 8. priority: with --max 1, a 'critical' job queued AFTER a 'low' job runs first.
|
|
# An order-recording devin-style stub appends each job's TASKID as it launches.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-prio"
|
|
ostub="$tmp/order-engine"
|
|
cat > "$ostub" <<'STUB'
|
|
#!/usr/bin/env bash
|
|
# order-recording no-op engine stub (devin-style: --prompt-file <pf>)
|
|
pf=""
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--prompt-file) pf="${2:-}"; shift 2;;
|
|
*) shift;;
|
|
esac
|
|
done
|
|
if [ -n "${pf:-}" ] && [ -n "${AQ_ORDER:-}" ]; then
|
|
grep -m1 '^TASKID=' "$pf" >> "$AQ_ORDER" 2>/dev/null || true
|
|
fi
|
|
exit 0
|
|
STUB
|
|
chmod +x "$ostub"
|
|
export AQ_ORDER="$tmp/prio-order.log"; : > "$AQ_ORDER"
|
|
plow="$tmp/p-low.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'priority: low' '---' '' 'TASKID=low' > "$plow"
|
|
pcrit="$tmp/p-crit.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'priority: critical' '---' '' 'TASKID=critical' > "$pcrit"
|
|
"$AQ" init >/dev/null
|
|
DEVIN_BIN="$ostub" "$AQ" add "$plow" >/dev/null
|
|
sleep 1 # ensure the critical job has a strictly newer (later) queue timestamp
|
|
DEVIN_BIN="$ostub" "$AQ" add "$pcrit" >/dev/null
|
|
DEVIN_BIN="$ostub" "$AQ" run --once --max 1 >/dev/null 2>&1
|
|
if [ "$(head -1 "$AQ_ORDER" 2>/dev/null || true)" = "TASKID=critical" ]; then
|
|
pass "priority: critical (queued later) ran before low"
|
|
else
|
|
echo "--- execution order ---" >&2; cat "$AQ_ORDER" >&2 || true
|
|
fail "priority ordering did not pick the critical job first"
|
|
fi
|
|
|
|
# 9. capability mismatch: a job requiring an absent tool → failed/ with
|
|
# result=capability_mismatch, and the agent is NEVER launched.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-cap"
|
|
launchflag="$tmp/cap-launched.flag"; rm -f "$launchflag"
|
|
launchstub="$tmp/launch-engine"
|
|
printf '#!/usr/bin/env bash\ntouch %q\nexit 0\n' "$launchflag" > "$launchstub"
|
|
chmod +x "$launchstub"
|
|
capjob="$tmp/cap.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \
|
|
'capabilities: [has:definitely-not-installed]' '---' '' '# capability task' > "$capjob"
|
|
"$AQ" init >/dev/null
|
|
DEVIN_BIN="$launchstub" "$AQ" add "$capjob" >/dev/null
|
|
DEVIN_BIN="$launchstub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/failed/*.md >/dev/null 2>&1 \
|
|
&& grep -q '^result=capability_mismatch' "$AGENT_QUEUE_ROOT"/.state/*.meta 2>/dev/null; then
|
|
pass "capability mismatch → failed/ (result=capability_mismatch)"
|
|
else
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true
|
|
fail "capability-mismatch job not routed to failed/ with result=capability_mismatch"
|
|
fi
|
|
if [ -e "$launchflag" ]; then
|
|
fail "agent WAS launched on capability mismatch (it must not be)"
|
|
else
|
|
pass "capability mismatch: agent never launched"
|
|
fi
|
|
|
|
# 10. engine-class: a job with engine-class:agentic-coder and no engine, with
|
|
# DEVIN_BIN stubbed (available), resolves to devin, runs, and lands in review/.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-ec"
|
|
ecjob="$tmp/ec.md"
|
|
printf '%s\n' '---' 'engine-class: agentic-coder' "cwd: $work" 'yolo: true' '---' '' '# engine-class task' > "$ecjob"
|
|
"$AQ" init >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" add "$ecjob" >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then
|
|
pass "engine-class: agentic-coder (no engine) resolved to devin → review/"
|
|
else
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true
|
|
fail "engine-class job did not resolve+run to review/"
|
|
fi
|
|
|
|
# 11. idempotency-key dedupe on add.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-idem"
|
|
"$AQ" init >/dev/null
|
|
ia="$tmp/idem-a.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'alpha body' > "$ia"
|
|
DEVIN_BIN="$stub" "$AQ" add "$ia" >/dev/null
|
|
DEVIN_BIN="$stub" "$AQ" add "$ia" >/dev/null # identical key+body → no-op
|
|
cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ')
|
|
[ "$cnt" = "1" ] && pass "idempotency: same key+body added twice → exactly 1 inbox file" \
|
|
|| fail "idempotency: expected 1 inbox file after duplicate add, got $cnt"
|
|
# same key, different body, prior STILL in inbox → supersede (still exactly 1)
|
|
ib="$tmp/idem-b.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'beta body (changed)' > "$ib"
|
|
DEVIN_BIN="$stub" "$AQ" add "$ib" >/dev/null
|
|
cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ')
|
|
[ "$cnt" = "1" ] && pass "idempotency: same key + changed body, prior in inbox → superseded (1 file)" \
|
|
|| fail "idempotency: expected 1 inbox file after supersede, got $cnt"
|
|
# drain (prior leaves inbox → review), then same key + different body → REJECT
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
ic="$tmp/idem-c.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'gamma body (changed again)' > "$ic"
|
|
if DEVIN_BIN="$stub" "$AQ" add "$ic" >/dev/null 2>&1; then
|
|
fail "idempotency: same key + different body (prior past inbox) should be rejected"
|
|
else
|
|
pass "idempotency: same key + different body, prior past inbox → rejected"
|
|
fi
|
|
cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ')
|
|
[ "$cnt" = "0" ] && pass "idempotency: a rejected add enqueues nothing" \
|
|
|| fail "idempotency: rejected add should not enqueue (inbox=$cnt)"
|
|
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
# Phase 1 — Slice 3 cases (resilience & insights, single host).
|
|
# Use temp git repos + stubs; never touches a real queue.
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
metaval() { grep "^$2=" "$1" 2>/dev/null | tail -1 | cut -d= -f2-; }
|
|
mkrepo() {
|
|
local d=$1; mkdir -p "$d"; git -C "$d" init -q
|
|
git -C "$d" config user.email t@t; git -C "$d" config user.name selftest
|
|
echo seed > "$d/seed.txt"; git -C "$d" add -A; git -C "$d" commit -q -m seed
|
|
}
|
|
|
|
# 12. orphan recovery: a building/ job whose worker pid is dead → `recover`
|
|
# moves it to inbox/ with attempts incremented; a second recover is a no-op.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-orphan"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# orphan task' \
|
|
> "$AGENT_QUEUE_ROOT/building/orphanjob.md"
|
|
# pid 1 is alive but pidstart is bogus → the PID-reuse guard marks it dead.
|
|
printf '%s\n' 'job=orphanjob' 'engine=devin' "cwd=$work" 'started=1' 'attempts=1' 'pid=1' 'pidstart=NOPE' \
|
|
> "$AGENT_QUEUE_ROOT/.state/orphanjob.meta"
|
|
"$AQ" recover >/dev/null 2>&1
|
|
if [ -f "$AGENT_QUEUE_ROOT/inbox/orphanjob.md" ] && [ ! -f "$AGENT_QUEUE_ROOT/building/orphanjob.md" ]; then
|
|
pass "orphan recovery: dead-worker building/ job recovered to inbox/"
|
|
else
|
|
ls -R "$AGENT_QUEUE_ROOT" >&2 || true; fail "orphan not recovered to inbox/"
|
|
fi
|
|
[ "$(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts)" = "2" ] \
|
|
&& pass "orphan recovery: attempts incremented (1 -> 2)" \
|
|
|| fail "orphan recovery: attempts not incremented (got $(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts))"
|
|
"$AQ" recover >/dev/null 2>&1 # idempotent: nothing left in building/
|
|
inbn=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -name 'orphanjob.md' | wc -l | tr -d ' ')
|
|
[ "$inbn" = "1" ] && [ "$(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts)" = "2" ] \
|
|
&& pass "orphan recovery: idempotent (twice recovers once)" \
|
|
|| fail "orphan recovery not idempotent (inbox=$inbn attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/orphanjob.meta" attempts))"
|
|
|
|
# 13. WIP checkpoint (git) + numstat: a git-repo cwd whose agent writes a 3-line
|
|
# file → branch aq/wip/<job> has a commit with the change, main is untouched,
|
|
# and lines_added is recorded.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-wip"
|
|
repo="$tmp/repo-wip"; mkrepo "$repo"
|
|
mainbr=$(git -C "$repo" symbolic-ref --short HEAD)
|
|
wipstub="$tmp/wip-engine"
|
|
printf '#!/usr/bin/env bash\nprintf '"'"'a\\nb\\nc\\n'"'"' > created_by_agent.txt\nexit 0\n' > "$wipstub"
|
|
chmod +x "$wipstub"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $repo" 'yolo: true' '---' '' '# wip task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/wipjob.md"
|
|
DEVIN_BIN="$wipstub" "$AQ" run --once >/dev/null 2>&1
|
|
# capture the log first (avoid `git log | grep -q` — under pipefail the early
|
|
# grep -q exit SIGPIPEs git log and falsely fails the pipeline).
|
|
wiplog=$(git -C "$repo" log --oneline aq/wip/wipjob 2>/dev/null || true)
|
|
if git -C "$repo" show-ref --verify --quiet refs/heads/aq/wip/wipjob \
|
|
&& [[ "$wiplog" == *"aq wip: wipjob"* ]] \
|
|
&& git -C "$repo" show aq/wip/wipjob:created_by_agent.txt >/dev/null 2>&1; then
|
|
pass "wip checkpoint: aq/wip/wipjob has a commit with the agent's change"
|
|
else
|
|
git -C "$repo" branch -a >&2 || true; fail "wip checkpoint branch/commit missing"
|
|
fi
|
|
if git -C "$repo" cat-file -e "$mainbr":created_by_agent.txt 2>/dev/null; then
|
|
fail "wip checkpoint: main branch was modified (must be untouched)"
|
|
else
|
|
pass "wip checkpoint: main branch ($mainbr) untouched"
|
|
fi
|
|
[ "$(metaval "$AGENT_QUEUE_ROOT/.state/wipjob.meta" lines_added)" = "3" ] \
|
|
&& pass "insights numstat: lines_added recorded (=3)" \
|
|
|| fail "insights numstat: lines_added wrong (got $(metaval "$AGENT_QUEUE_ROOT/.state/wipjob.meta" lines_added))"
|
|
|
|
# 13b. non-git cwd → WIP skipped cleanly (no error), job still completes.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-nogit"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# nogit task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/nogitjob.md"
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1 \
|
|
&& grep -q 'not a git repo' "$AGENT_QUEUE_ROOT/logs/nogitjob.log" 2>/dev/null; then
|
|
pass "wip checkpoint: non-git cwd skipped cleanly → review/"
|
|
else
|
|
fail "non-git cwd run did not complete cleanly"
|
|
fi
|
|
|
|
# 14. WIP resume: an orphan whose aq/wip/<job> already has a prior commit →
|
|
# the relaunch checks out that branch (agent sees HEAD on aq/wip/<job>).
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-resume"
|
|
repo2="$tmp/repo-resume"; mkrepo "$repo2"
|
|
mainbr2=$(git -C "$repo2" symbolic-ref --short HEAD)
|
|
git -C "$repo2" checkout -q -b aq/wip/resumejob
|
|
echo prior > "$repo2/prior.txt"; git -C "$repo2" add -A; git -C "$repo2" commit -q -m "aq wip: resumejob (prior)"
|
|
git -C "$repo2" checkout -q "$mainbr2"
|
|
resumeout="$tmp/resume-head.txt"; rm -f "$resumeout"
|
|
resumestub="$tmp/resume-engine"
|
|
printf '#!/usr/bin/env bash\ngit rev-parse --abbrev-ref HEAD > %q 2>/dev/null\nexit 0\n' "$resumeout" > "$resumestub"
|
|
chmod +x "$resumestub"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $repo2" 'yolo: true' '---' '' '# resume task' \
|
|
> "$AGENT_QUEUE_ROOT/building/resumejob.md"
|
|
printf '%s\n' 'job=resumejob' 'engine=devin' "cwd=$repo2" 'started=1' 'attempts=1' 'pid=1' 'pidstart=NOPE' \
|
|
> "$AGENT_QUEUE_ROOT/.state/resumejob.meta"
|
|
DEVIN_BIN="$resumestub" "$AQ" run --once >/dev/null 2>&1
|
|
if [ "$(cat "$resumeout" 2>/dev/null)" = "aq/wip/resumejob" ]; then
|
|
pass "wip resume: recovered job ran with HEAD on aq/wip/resumejob"
|
|
else
|
|
echo "resume HEAD was: $(cat "$resumeout" 2>/dev/null)" >&2
|
|
fail "wip resume did not check out the existing WIP branch"
|
|
fi
|
|
|
|
# 15. retry on verify_failed: max=1 → requeued once (attempts=2) then failed/
|
|
# result=retries_exhausted; a backoff (next_eligible) is recorded.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-retry"
|
|
export AGENT_QUEUE_POLL=1
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'verify: false' \
|
|
'retry: { max: 1, backoff: 1s, on: [verify_failed] }' '---' '' '# retry task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/retryjob.md"
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/failed/retryjob.md >/dev/null 2>&1 \
|
|
&& [ "$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" result)" = "retries_exhausted" ] \
|
|
&& [ "$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" attempts)" = "2" ]; then
|
|
pass "retry(verify_failed): requeued once (attempts=2) then retries_exhausted"
|
|
else
|
|
fail "retry(verify_failed) wrong (result=$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" result) attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/retryjob.meta" attempts))"
|
|
fi
|
|
grep -q 'RETRY scheduled' "$AGENT_QUEUE_ROOT/logs/retryjob.log" 2>/dev/null \
|
|
&& pass "retry: backoff RETRY scheduled (next_eligible honored)" \
|
|
|| fail "retry: no RETRY scheduled line in log"
|
|
|
|
# 16. retry on crash: rc!=0 with on=[crash] retries; without crash it does not.
|
|
crashstub="$tmp/crash-engine"
|
|
printf '#!/usr/bin/env bash\nexit 3\n' > "$crashstub"; chmod +x "$crashstub"
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-crash"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \
|
|
'retry: { max: 1, backoff: 1s, on: [crash] }' '---' '' '# crash-retry task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/crashjob.md"
|
|
DEVIN_BIN="$crashstub" "$AQ" run --once >/dev/null 2>&1
|
|
[ "$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" result)" = "retries_exhausted" ] \
|
|
&& [ "$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" attempts)" = "2" ] \
|
|
&& pass "retry(crash): rc!=0 with on=[crash] retried then retries_exhausted (attempts=2)" \
|
|
|| fail "retry(crash) wrong (result=$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" result) attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/crashjob.meta" attempts))"
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-nocrash"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \
|
|
'retry: { max: 1, backoff: 1s, on: [verify_failed] }' '---' '' '# crash-no-retry task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/nocrashjob.md"
|
|
DEVIN_BIN="$crashstub" "$AQ" run --once >/dev/null 2>&1
|
|
[ "$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" result)" = "failed" ] \
|
|
&& [ "$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" attempts)" = "1" ] \
|
|
&& pass "retry(crash): crash not in on -> straight to failed/ (no retry)" \
|
|
|| fail "retry(crash) should not retry when crash not in on (result=$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" result) attempts=$(metaval "$AGENT_QUEUE_ROOT/.state/nocrashjob.meta" attempts))"
|
|
unset AGENT_QUEUE_POLL
|
|
|
|
# 17. insights parse: a stub log with a usage line → parse_usage records tokens/
|
|
# cost into meta; `insights <job>` prints them; a no-usage log doesn't crash.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-usage"
|
|
usagestub="$tmp/usage-engine"
|
|
printf '#!/usr/bin/env bash\necho "AQ_USAGE model=claude-test tokens_in=100 tokens_out=50 cost_usd=0.0021 turns=3 tool_calls=5"\nexit 0\n' > "$usagestub"
|
|
chmod +x "$usagestub"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: claude' "cwd: $work" 'yolo: true' '---' '' '# usage task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/usagejob.md"
|
|
CLAUDE_BIN="$usagestub" "$AQ" run --once >/dev/null 2>&1
|
|
if [ "$(metaval "$AGENT_QUEUE_ROOT/.state/usagejob.meta" tokens_in)" = "100" ] \
|
|
&& [ "$(metaval "$AGENT_QUEUE_ROOT/.state/usagejob.meta" cost_usd)" = "0.0021" ]; then
|
|
pass "insights parse_usage: tokens/cost extracted into meta"
|
|
else
|
|
fail "parse_usage did not record tokens/cost (tokens_in=$(metaval "$AGENT_QUEUE_ROOT/.state/usagejob.meta" tokens_in))"
|
|
fi
|
|
ins=$("$AQ" insights usagejob 2>/dev/null || true)
|
|
if [[ "$ins" == *tokens_in* && "$ins" == *0.0021* ]]; then
|
|
pass "insights <job>: prints per-job metrics"
|
|
else
|
|
fail "insights <job> did not print metrics"
|
|
fi
|
|
printf '%s\n' '---' 'engine: claude' "cwd: $work" 'yolo: true' '---' '' '# no-usage task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/nousagejob.md"
|
|
CLAUDE_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if "$AQ" insights nousagejob >/dev/null 2>&1 \
|
|
&& [ -z "$(metaval "$AGENT_QUEUE_ROOT/.state/nousagejob.meta" tokens_in)" ]; then
|
|
pass "insights: no-usage log omits token fields without crashing"
|
|
else
|
|
fail "insights crashed or fabricated tokens for a no-usage log"
|
|
fi
|
|
|
|
# 18. insights aggregate: two finished jobs → per-engine rollup with totals + rate.
|
|
out=$("$AQ" insights 2>/dev/null || true)
|
|
if [[ "$out" == *"ROLLUP BY ENGINE"* ]] && grep -qE 'claude .* 100 .* 50' <<<"$out"; then
|
|
pass "insights aggregate: per-engine rollup with token totals"
|
|
else
|
|
printf '%s\n' "$out" >&2; fail "insights aggregate rollup missing/incorrect"
|
|
fi
|
|
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
# Phase 1 — Slice 2 cases (profiles + deps/DAG, single host).
|
|
# Uses a temp profile catalog (AGENT_QUEUE_PROFILES) + temp git repos.
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
profdir="$tmp/profiles"; mkdir -p "$profdir"
|
|
printf '%s\n' '---' 'name: vfail' 'persona: |' ' PERSONA-VFAIL' 'default-verify: false' '---' > "$profdir/vfail.md"
|
|
printf '%s\n' '---' 'name: vpass' 'default-verify: true' '---' > "$profdir/vpass.md"
|
|
printf '%s\n' '---' 'name: capreq' 'capabilities: [has:definitely-not-installed]' '---' > "$profdir/capreq.md"
|
|
printf '%s\n' '---' 'name: personap' 'persona: |' ' PERSONA-MARKER-XYZ' ' second persona line' 'default-verify: true' '---' > "$profdir/personap.md"
|
|
printf '%s\n' '---' 'name: scoped' 'allowed-scope: [backend/**]' '---' > "$profdir/scoped.md"
|
|
export AGENT_QUEUE_PROFILES="$profdir"
|
|
funcs="$tmp/aq-funcs.sh"; sed '/^main "\$@"/d' "$AQ" > "$funcs"
|
|
|
|
# 19. profile inherits default-verify: vfail (verify=false) → failed/verify_failed;
|
|
# vpass (verify=true) → testing/.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-pverify"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'profile: vfail' '---' '' '# pv-fail' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/pvfail.md"
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/failed/pvfail.md >/dev/null 2>&1 \
|
|
&& [ "$(metaval "$AGENT_QUEUE_ROOT/.state/pvfail.meta" result)" = "verify_failed" ]; then
|
|
pass "profile inherit: default-verify=false → failed/ (verify_failed)"
|
|
else
|
|
fail "profile verify=false did not route to failed (result=$(metaval "$AGENT_QUEUE_ROOT/.state/pvfail.meta" result))"
|
|
fi
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'profile: vpass' '---' '' '# pv-pass' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/pvpass.md"
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/testing/pvpass.md >/dev/null 2>&1; then
|
|
pass "profile inherit: default-verify=true → testing/"
|
|
else
|
|
fail "profile verify=true did not reach testing/"
|
|
fi
|
|
|
|
# 19b. job-level verify overrides the profile (precedence job > profile).
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'profile: vfail' 'verify: true' '---' '' '# override' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/pvoverride.md"
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
ls "$AGENT_QUEUE_ROOT"/testing/pvoverride.md >/dev/null 2>&1 \
|
|
&& pass "profile precedence: job verify overrides profile default-verify" \
|
|
|| fail "job-level verify did not override profile"
|
|
|
|
# 20. persona injection (golden): the body fed to the engine begins with the
|
|
# profile persona. A stub copies its --prompt-file to a sentinel.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-persona"
|
|
sentinel="$tmp/persona-body.txt"; rm -f "$sentinel"
|
|
copystub="$tmp/copy-engine"
|
|
cat > "$copystub" <<STUBEOF
|
|
#!/usr/bin/env bash
|
|
pf=""
|
|
while [ \$# -gt 0 ]; do case "\$1" in --prompt-file) pf="\$2"; shift 2;; *) shift;; esac; done
|
|
[ -n "\$pf" ] && cp "\$pf" "$sentinel"
|
|
exit 0
|
|
STUBEOF
|
|
chmod +x "$copystub"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'profile: personap' '---' '' 'TASK-BODY-LINE' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/personajob.md"
|
|
DEVIN_BIN="$copystub" "$AQ" run --once >/dev/null 2>&1
|
|
if [ "$(head -1 "$sentinel" 2>/dev/null)" = "PERSONA-MARKER-XYZ" ] \
|
|
&& grep -q 'TASK-BODY-LINE' "$sentinel" 2>/dev/null; then
|
|
pass "persona injection: engine body begins with profile persona, task preserved"
|
|
else
|
|
echo "body head: $(head -3 "$sentinel" 2>/dev/null)" >&2
|
|
fail "persona was not prepended to the engine body"
|
|
fi
|
|
|
|
# 21. profile capability inheritance: a job omitting capabilities inherits the
|
|
# profile's → unmet → failed/ capability_mismatch, agent never launched.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-pcaps"
|
|
launchflag="$tmp/pcaps-launched"; rm -f "$launchflag"
|
|
launchstub3="$tmp/cap-launch3"
|
|
printf '#!/usr/bin/env bash\ntouch %q\nexit 0\n' "$launchflag" > "$launchstub3"; chmod +x "$launchstub3"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'profile: capreq' '---' '' '# pcaps' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/pcapsjob.md"
|
|
DEVIN_BIN="$launchstub3" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/failed/pcapsjob.md >/dev/null 2>&1 \
|
|
&& [ "$(metaval "$AGENT_QUEUE_ROOT/.state/pcapsjob.meta" result)" = "capability_mismatch" ] \
|
|
&& [ ! -e "$launchflag" ]; then
|
|
pass "profile caps inheritance: unmet inherited capability → capability_mismatch (no launch)"
|
|
else
|
|
fail "profile caps inheritance failed (result=$(metaval "$AGENT_QUEUE_ROOT/.state/pcapsjob.meta" result) launched=$([ -e "$launchflag" ] && echo yes || echo no))"
|
|
fi
|
|
|
|
# 22. allowed-scope warn-only: an out-of-scope change logs a WARNING and the job
|
|
# still succeeds; plus a direct path_in_scope unit check.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-scope"
|
|
repos="$tmp/repo-scope"; mkrepo "$repos"
|
|
scopestub="$tmp/scope-engine"
|
|
printf '#!/usr/bin/env bash\nmkdir -p frontend && echo changed > frontend/out.txt\nexit 0\n' > "$scopestub"
|
|
chmod +x "$scopestub"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $repos" 'yolo: true' 'profile: scoped' '---' '' '# scope task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/scopejob.md"
|
|
DEVIN_BIN="$scopestub" "$AQ" run --once >/dev/null 2>&1
|
|
if grep -q 'allowed-scope violation' "$AGENT_QUEUE_ROOT/logs/scopejob.log" 2>/dev/null \
|
|
&& ls "$AGENT_QUEUE_ROOT"/review/scopejob.md >/dev/null 2>&1; then
|
|
pass "allowed-scope: out-of-scope change WARNS (warn-only) and job still succeeds"
|
|
else
|
|
fail "allowed-scope warn-only did not warn / job did not succeed"
|
|
fi
|
|
if bash -c 'set -uo pipefail; source "'"$funcs"'"; path_in_scope "backend/a/b.ts" "backend/**" && ! path_in_scope "frontend/x.ts" "backend/**"'; then
|
|
pass "allowed-scope: path_in_scope matches subtree, rejects outside (unit)"
|
|
else
|
|
fail "path_in_scope unit logic wrong"
|
|
fi
|
|
|
|
# 23. deps block→run: B deps:[keyA] stays blocked until A is shipped/, then runs.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-deps"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: keyA' '---' '' '# A' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/jobA.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: keyB' 'deps: [keyA]' '---' '' '# B' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/jobB.md"
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/inbox/jobB.md >/dev/null 2>&1 && ls "$AGENT_QUEUE_ROOT"/review/jobA.md >/dev/null 2>&1; then
|
|
pass "deps: B stays blocked in inbox while A is unshipped"
|
|
else
|
|
fail "deps: B should be blocked while A unshipped (A=$(ls "$AGENT_QUEUE_ROOT"/review 2>/dev/null) B-in-inbox=$(ls "$AGENT_QUEUE_ROOT"/inbox 2>/dev/null))"
|
|
fi
|
|
# status surfaces the blocked job
|
|
"$AQ" status 2>/dev/null | grep -q 'blocked (waiting on: keyA)' \
|
|
&& pass "deps: status surfaces 'blocked (waiting on: keyA)'" \
|
|
|| fail "deps: status did not surface blocked job"
|
|
# ship A (review -> testing -> shipped), then B becomes runnable
|
|
"$AQ" promote jobA >/dev/null 2>&1 # review -> testing
|
|
"$AQ" promote jobA >/dev/null 2>&1 # testing -> shipped
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/review/jobB.md >/dev/null 2>&1; then
|
|
pass "deps: once A is shipped, B unblocks and completes"
|
|
else
|
|
fail "deps: B did not run after A shipped"
|
|
fi
|
|
|
|
# 24. deps-mode soft: dep satisfied when the dependency is in testing/.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-depsoft"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: keyA' 'verify: true' '---' '' '# A-soft' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/sjobA.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: keyC' 'deps: [keyA]' 'deps-mode: soft' '---' '' '# C-soft' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/sjobC.md"
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ls "$AGENT_QUEUE_ROOT"/testing/sjobA.md >/dev/null 2>&1 && ls "$AGENT_QUEUE_ROOT"/review/sjobC.md >/dev/null 2>&1; then
|
|
pass "deps-mode soft: dep satisfied while dependency is in testing/"
|
|
else
|
|
fail "deps-mode soft did not unblock from testing/ (A=$(ls "$AGENT_QUEUE_ROOT"/testing 2>/dev/null) C=$(ls "$AGENT_QUEUE_ROOT"/review 2>/dev/null))"
|
|
fi
|
|
|
|
# 25. cycle detection: adding A deps:[keyB] while B deps:[keyA] exists is rejected.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-cycle"
|
|
"$AQ" init >/dev/null
|
|
cycB="$tmp/cyc-b.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: keyB' 'deps: [keyA]' '---' '' '# cyc B' > "$cycB"
|
|
"$AQ" add "$cycB" >/dev/null 2>&1 # B added (blocked on keyA — allowed)
|
|
cycA="$tmp/cyc-a.md"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: keyA' 'deps: [keyB]' '---' '' '# cyc A' > "$cycA"
|
|
if DEVIN_BIN="$stub" "$AQ" add "$cycA" >/dev/null 2>&1; then
|
|
fail "cycle detection: adding A deps:[keyB] while B deps:[keyA] should be rejected"
|
|
else
|
|
pass "cycle detection: dependency cycle on add is rejected"
|
|
fi
|
|
unset AGENT_QUEUE_PROFILES
|
|
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
# Phase 1 — Slice 4 cases (tracker adapter §10). No live service: a stub
|
|
# replaces tracker_api via AQ_TRACKER_API_CMD, records calls, returns canned JSON.
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
trkstub="$tmp/trk-stub.sh"
|
|
cat > "$trkstub" <<'STUBEOF'
|
|
#!/usr/bin/env bash
|
|
# tracker API stub: records "<method> <path> :: <body>" and returns canned output
|
|
# (body line + HTTP code line), keyed off the method.
|
|
[ -n "${AQ_STUB_CALLS:-}" ] && printf '%s %s :: %s\n' "$1" "$2" "$3" >> "$AQ_STUB_CALLS"
|
|
case "$1" in
|
|
GET) printf '%s\n%s\n' "${AQ_STUB_ITEM:-}" "${AQ_STUB_GET_CODE:-200}" ;;
|
|
*) printf '%s\n%s\n' '{}' "${AQ_STUB_CODE:-200}" ;;
|
|
esac
|
|
STUBEOF
|
|
chmod +x "$trkstub"
|
|
export AQ_TRACKER_API_CMD="$trkstub"
|
|
export AQ_TRACKER_CWD="$work"
|
|
|
|
# 26. from-tracker materializes an inbox job with the derived frontmatter + body.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-ft1"
|
|
"$AQ" init >/dev/null
|
|
export AQ_STUB_CALLS="$tmp/ft1-calls.log"; : > "$AQ_STUB_CALLS"
|
|
export AQ_STUB_ITEM='{"id":"T-1","productId":"p","type":"task","status":"open","priority":"medium","title":"Title One","description":"BODY-DESC-ALPHA","labels":[]}'
|
|
"$AQ" from-tracker T-1 >/dev/null 2>&1
|
|
ftf=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -name '*.md' 2>/dev/null | head -1)
|
|
if [ -n "$ftf" ] && grep -q '^tracker-item: T-1$' "$ftf" \
|
|
&& grep -q '^idempotency-key: tracker-T-1$' "$ftf" && grep -q 'BODY-DESC-ALPHA' "$ftf"; then
|
|
pass "from-tracker: materializes inbox job (tracker-item + idempotency-key + body)"
|
|
else
|
|
[ -n "$ftf" ] && cat "$ftf" >&2; fail "from-tracker did not create the expected inbox job"
|
|
fi
|
|
|
|
# 27. from-tracker maps labels -> manifest frontmatter.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-ft2"
|
|
"$AQ" init >/dev/null
|
|
export AQ_STUB_ITEM='{"id":"T-2","productId":"p","type":"task","status":"open","priority":"low","title":"Two","description":"desc two","labels":["engine-class:agentic-coder","priority:high","cap:os:mac"]}'
|
|
"$AQ" from-tracker T-2 >/dev/null 2>&1
|
|
ftf2=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -name '*.md' 2>/dev/null | head -1)
|
|
if grep -q '^engine-class: agentic-coder$' "$ftf2" && grep -q '^priority: high$' "$ftf2" \
|
|
&& grep -q '^capabilities: \[os:mac\]$' "$ftf2"; then
|
|
pass "from-tracker: label mapping (engine-class/priority/cap) -> frontmatter"
|
|
else
|
|
cat "$ftf2" >&2; fail "from-tracker label mapping incorrect"
|
|
fi
|
|
|
|
# 28. from-tracker is idempotent on the derived key (no duplicate enqueue).
|
|
"$AQ" from-tracker T-2 >/dev/null 2>&1
|
|
n=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -name '*.md' 2>/dev/null | wc -l | tr -d ' ')
|
|
[ "$n" = "1" ] && pass "from-tracker: idempotent (T-2 twice -> one job)" \
|
|
|| fail "from-tracker not idempotent (inbox=$n)"
|
|
|
|
# 29. to-tracker echoes a shipped outcome: PATCH status=done + metrics comment,
|
|
# and NEVER sends the prompt body.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-tt"
|
|
"$AQ" init >/dev/null
|
|
export AQ_STUB_CALLS="$tmp/tt-calls.log"; : > "$AQ_STUB_CALLS"
|
|
printf '%s\n' 'job=jt' 'tracker_item=T-9' 'result=shipped' 'attempts=1' 'duration_s=5' \
|
|
'tokens_in=10' 'tokens_out=3' 'cost_usd=0.001' > "$AGENT_QUEUE_ROOT/.state/jt.meta"
|
|
printf 'SECRET-PROMPT-SENTINEL\n' > "$AGENT_QUEUE_ROOT/.state/jt.body.md"
|
|
"$AQ" to-tracker jt >/dev/null 2>&1
|
|
if grep -q 'PATCH /api/items/T-9/status :: {"status":"done"}' "$AQ_STUB_CALLS" \
|
|
&& grep -q 'POST /api/items/T-9/comments' "$AQ_STUB_CALLS" \
|
|
&& ! grep -q 'SECRET-PROMPT-SENTINEL' "$AQ_STUB_CALLS"; then
|
|
pass "to-tracker: shipped -> PATCH status=done + metrics comment; no prompt body sent"
|
|
else
|
|
cat "$AQ_STUB_CALLS" >&2; fail "to-tracker echo incorrect / leaked body"
|
|
fi
|
|
|
|
# 30. to-tracker is idempotent: a second call for an unchanged outcome is a no-op.
|
|
: > "$AQ_STUB_CALLS"
|
|
"$AQ" to-tracker jt >/dev/null 2>&1
|
|
[ ! -s "$AQ_STUB_CALLS" ] && pass "to-tracker: idempotent (unchanged outcome -> no PATCH/comment)" \
|
|
|| { cat "$AQ_STUB_CALLS" >&2; fail "to-tracker not idempotent (made calls on unchanged outcome)"; }
|
|
|
|
# 31. echo failure is non-fatal: a 500 logs an error, exits 0, leaves job state intact.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-tt6"
|
|
"$AQ" init >/dev/null
|
|
printf '%s\n' 'job=jf' 'tracker_item=T-8' 'result=shipped' > "$AGENT_QUEUE_ROOT/.state/jf.meta"
|
|
printf '%s\n' '---' 'tracker-item: T-8' '---' '' '# x' > "$AGENT_QUEUE_ROOT/shipped/jf.md"
|
|
AQ_STUB_CODE=500 "$AQ" to-tracker jf >/dev/null 2>&1; rc=$?
|
|
if [ "$rc" = "0" ] && [ -f "$AGENT_QUEUE_ROOT/shipped/jf.md" ] \
|
|
&& [ -z "$(metaval "$AGENT_QUEUE_ROOT/.state/jf.meta" tracker_echoed)" ]; then
|
|
pass "to-tracker: HTTP 500 is non-fatal (exit 0, job unchanged, not marked echoed)"
|
|
else
|
|
fail "to-tracker 500 was not handled non-fatally (rc=$rc)"
|
|
fi
|
|
|
|
# 32. auto-echo (AQ_TRACKER_AUTO=1): a tracker-derived job run echoes automatically.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-auto"
|
|
"$AQ" init >/dev/null
|
|
export AQ_STUB_CALLS="$tmp/auto-calls.log"; : > "$AQ_STUB_CALLS"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'tracker-item: T-7' '---' '' '# auto task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/autojob.md"
|
|
AQ_TRACKER_AUTO=1 DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if grep -q 'PATCH /api/items/T-7/status' "$AQ_STUB_CALLS" 2>/dev/null \
|
|
&& ls "$AGENT_QUEUE_ROOT"/review/autojob.md >/dev/null 2>&1; then
|
|
pass "auto-echo: AQ_TRACKER_AUTO=1 echoes a transition during run (job still reaches review/)"
|
|
else
|
|
cat "$AQ_STUB_CALLS" 2>/dev/null >&2; fail "auto-echo did not fire / job did not complete"
|
|
fi
|
|
unset AQ_TRACKER_API_CMD AQ_TRACKER_CWD AQ_STUB_CALLS AQ_STUB_ITEM AQ_STUB_CODE AQ_STUB_GET_CODE
|
|
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
# Phase 2 — Slice 3 cases (fleet coordinator integration). A stub replaces
|
|
# fleet_api via AQ_FLEET_API_CMD (no live coordinator), records calls + returns
|
|
# canned JSON. The flag-off cases prove the offline path is unchanged.
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
fstub="$tmp/fleet-stub.sh"
|
|
cat > "$fstub" <<'STUBEOF'
|
|
#!/usr/bin/env bash
|
|
# fleet API stub: record "<method> <path> :: <body>"; canned responses by route.
|
|
[ -n "${AQ_FSTUB_CALLS:-}" ] && printf '%s %s :: %s\n' "$1" "$2" "$3" >> "$AQ_FSTUB_CALLS"
|
|
case "$1 $2" in
|
|
"POST /fleet/factories/heartbeat") printf '%s\n200\n' '{"ok":true}' ;;
|
|
"POST /fleet/claim")
|
|
if [ -n "${AQ_FSTUB_CLAIM_FLAG:-}" ] && [ -f "$AQ_FSTUB_CLAIM_FLAG" ]; then
|
|
printf '%s\n200\n' '{"claimed":false}'
|
|
else
|
|
[ -n "${AQ_FSTUB_CLAIM_FLAG:-}" ] && : > "$AQ_FSTUB_CLAIM_FLAG"
|
|
printf '{"claimed":true,"job":{"id":"%s","bodyMd":"%s","leaseEpoch":1},"lease":{"leaseEpoch":1}}\n200\n' \
|
|
"${AQ_FSTUB_JOB_ID:-fjob_1}" "${AQ_FSTUB_BODY:-do the work}"
|
|
fi ;;
|
|
PATCH\ /fleet/jobs/*) printf '%s\n%s\n' '{}' "${AQ_FSTUB_PATCH_CODE:-200}" ;;
|
|
*) printf '%s\n200\n' '{}' ;;
|
|
esac
|
|
STUBEOF
|
|
chmod +x "$fstub"
|
|
|
|
# 33. flag OFF (default): a recording stub is configured but AQ_FLEET is unset →
|
|
# ZERO fleet calls, and a local job runs through the offline path unchanged.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-floff"
|
|
"$AQ" init >/dev/null
|
|
export AQ_FLEET_API_CMD="$fstub"; export AQ_FSTUB_CALLS="$tmp/floff-calls.log"; : > "$AQ_FSTUB_CALLS"
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# local task' \
|
|
> "$AGENT_QUEUE_ROOT/inbox/localjob.md"
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if [ ! -s "$AQ_FSTUB_CALLS" ] && ls "$AGENT_QUEUE_ROOT"/review/localjob.md >/dev/null 2>&1; then
|
|
pass "fleet flag OFF: zero coordinator calls; offline job completes to review/"
|
|
else
|
|
cat "$AQ_FSTUB_CALLS" >&2; fail "flag-off made fleet calls or offline job did not complete"
|
|
fi
|
|
unset AQ_FLEET_API_CMD AQ_FSTUB_CALLS
|
|
|
|
# 34. AQ_FLEET=1: loop start registers (heartbeat with caps) + claim executes a
|
|
# coordinator job to review/, with fleet_job_id + leaseEpoch persisted in meta.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-fl1"; export AQ_FLEET_CWD="$work"
|
|
"$AQ" init >/dev/null
|
|
export AQ_FLEET_API_CMD="$fstub" AQ_FSTUB_CALLS="$tmp/fl1-calls.log" AQ_FSTUB_CLAIM_FLAG="$tmp/fl1-claimed" \
|
|
AQ_FSTUB_JOB_ID="fjob_1" AQ_FSTUB_BODY="FLEET-BODY-SENTINEL do work"
|
|
: > "$AQ_FSTUB_CALLS"; rm -f "$AQ_FSTUB_CLAIM_FLAG"
|
|
AQ_FLEET=1 AGENT_QUEUE_POLL=1 DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
fmeta=$(find "$AGENT_QUEUE_ROOT/.state" -name '*fleet-fjob_1.meta' | head -1)
|
|
if grep -q 'POST /fleet/factories/heartbeat :: .*capabilities' "$AQ_FSTUB_CALLS" \
|
|
&& grep -q 'POST /fleet/claim' "$AQ_FSTUB_CALLS" \
|
|
&& ls "$AGENT_QUEUE_ROOT"/review/*fleet-fjob_1.md >/dev/null 2>&1 \
|
|
&& [ "$(metaval "$fmeta" fleet_job_id)" = "fjob_1" ] && [ "$(metaval "$fmeta" fleet_lease_epoch)" = "1" ]; then
|
|
pass "fleet: register(heartbeat)+claim -> coordinator job materialized + executed to review/"
|
|
else
|
|
cat "$AQ_FSTUB_CALLS" >&2; fail "fleet claim/execute did not work as expected"
|
|
fi
|
|
|
|
# 35. report + checkpoint: PATCH /fleet/jobs/:id carries stage + leaseEpoch, and a
|
|
# checkpoint (wipBranch) on building when cwd is a git repo.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-fl2"; repo=$tmp/repo-fl2; mkrepo "$repo"; export AQ_FLEET_CWD="$repo"
|
|
"$AQ" init >/dev/null
|
|
export AQ_FSTUB_CALLS="$tmp/fl2-calls.log" AQ_FSTUB_CLAIM_FLAG="$tmp/fl2-claimed" AQ_FSTUB_JOB_ID="fjob_2" AQ_FSTUB_BODY="work two"
|
|
: > "$AQ_FSTUB_CALLS"; rm -f "$AQ_FSTUB_CLAIM_FLAG"
|
|
AQ_FLEET=1 AGENT_QUEUE_POLL=1 DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if grep -q 'PATCH /fleet/jobs/fjob_2 :: .*"stage":"building".*"leaseEpoch":1' "$AQ_FSTUB_CALLS" \
|
|
&& grep -q 'PATCH /fleet/jobs/fjob_2 :: .*"stage":"building".*"wipBranch"' "$AQ_FSTUB_CALLS" \
|
|
&& grep -q 'PATCH /fleet/jobs/fjob_2 :: .*"stage":"review"' "$AQ_FSTUB_CALLS"; then
|
|
pass "fleet: PATCH stage transitions carry leaseEpoch + checkpoint(wipBranch) on building"
|
|
else
|
|
cat "$AQ_FSTUB_CALLS" >&2; fail "fleet report/checkpoint payload incorrect"
|
|
fi
|
|
|
|
# 36. FENCING: PATCH returns conflict (stale epoch) → worker self-aborts, job is
|
|
# quarantined to failed/ (NOT review/testing/shipped), fenced is recorded.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-fl3"
|
|
"$AQ" init >/dev/null
|
|
export AQ_FSTUB_CALLS="$tmp/fl3-calls.log" AQ_FSTUB_CLAIM_FLAG="$tmp/fl3-claimed" AQ_FSTUB_JOB_ID="fjob_3" AQ_FSTUB_BODY="work three" AQ_FSTUB_PATCH_CODE=409
|
|
: > "$AQ_FSTUB_CALLS"; rm -f "$AQ_FSTUB_CLAIM_FLAG"
|
|
AQ_FLEET=1 AGENT_QUEUE_POLL=1 DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
fmeta3=$(find "$AGENT_QUEUE_ROOT/.state" -name '*fleet-fjob_3.meta' | head -1)
|
|
rcount=$(find "$AGENT_QUEUE_ROOT/review" "$AGENT_QUEUE_ROOT/testing" "$AGENT_QUEUE_ROOT/shipped" -maxdepth 1 -name '*.md' 2>/dev/null | wc -l | tr -d ' ')
|
|
if [ "$rcount" = "0" ] && ls "$AGENT_QUEUE_ROOT"/failed/*fleet-fjob_3.md >/dev/null 2>&1 \
|
|
&& [ "$(metaval "$fmeta3" result)" = "fenced_quarantine" ] && [ "$(metaval "$fmeta3" fleet_fenced)" = "1" ]; then
|
|
pass "fleet FENCING: stale-epoch PATCH -> self-abort + quarantine (never shipped)"
|
|
else
|
|
cat "$AQ_FSTUB_CALLS" >&2; fail "fleet fencing did not quarantine correctly (review/testing/shipped=$rcount)"
|
|
fi
|
|
unset AQ_FSTUB_PATCH_CODE
|
|
|
|
# 37. lease renew (unit): fleet_lease_renew issues POST .../lease/renew with epoch.
|
|
funcs="$tmp/aq-funcs-fl.sh"; sed '/^main "\$@"/d' "$AQ" > "$funcs"
|
|
renew_calls="$tmp/renew-calls.log"; : > "$renew_calls"
|
|
if bash -c '
|
|
set -uo pipefail
|
|
export AGENT_QUEUE_ROOT="'"$tmp"'/queue-renew" AQ_FLEET=1
|
|
export AQ_FLEET_API_CMD="'"$fstub"'" AQ_FSTUB_CALLS="'"$renew_calls"'"
|
|
source "'"$funcs"'" # agent-queue helpers (main stripped; SCRIPT_DIR=/tmp here)
|
|
source "'"$HERE"'/lib/fleet-client.sh" # source the lib explicitly (relative source is skipped)
|
|
ensure_dirs
|
|
printf "%s\n" "job=jr" "fleet_job_id=fjob_r" "fleet_lease_epoch=7" > "$STATE/jr.meta"
|
|
fleet_lease_renew jr
|
|
'; then
|
|
grep -q 'POST /fleet/jobs/fjob_r/lease/renew :: .*"leaseEpoch":7' "$renew_calls" \
|
|
&& pass "fleet: lease renew issues POST .../lease/renew with current leaseEpoch" \
|
|
|| { cat "$renew_calls" >&2; fail "fleet lease renew payload missing/incorrect"; }
|
|
else
|
|
fail "fleet_lease_renew invocation errored"
|
|
fi
|
|
|
|
# 38. offline-degrade: a 5xx on PATCH does NOT quarantine — the job finishes locally
|
|
# (degraded), reaching review/ with fleet_degraded recorded.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-fl4"
|
|
"$AQ" init >/dev/null
|
|
export AQ_FSTUB_CALLS="$tmp/fl4-calls.log" AQ_FSTUB_CLAIM_FLAG="$tmp/fl4-claimed" AQ_FSTUB_JOB_ID="fjob_4" AQ_FSTUB_BODY="work four" AQ_FSTUB_PATCH_CODE=500
|
|
: > "$AQ_FSTUB_CALLS"; rm -f "$AQ_FSTUB_CLAIM_FLAG"
|
|
AQ_FLEET=1 AGENT_QUEUE_POLL=1 DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
fmeta4=$(find "$AGENT_QUEUE_ROOT/.state" -name '*fleet-fjob_4.meta' | head -1)
|
|
if ls "$AGENT_QUEUE_ROOT"/review/*fleet-fjob_4.md >/dev/null 2>&1 \
|
|
&& [ "$(metaval "$fmeta4" fleet_degraded)" = "1" ] \
|
|
&& [ "$(metaval "$fmeta4" result)" != "fenced_quarantine" ]; then
|
|
pass "fleet offline-degrade: coordinator 5xx -> job completes locally (degraded), not quarantined"
|
|
else
|
|
cat "$AQ_FSTUB_CALLS" >&2; fail "fleet offline-degrade behaved incorrectly"
|
|
fi
|
|
unset AQ_FSTUB_PATCH_CODE
|
|
|
|
# 39. no-leak: the claimed bodyMd is never sent in any report payload, and the
|
|
# bearer token never appears in a recorded call (it is a header, not a body).
|
|
if ! grep -q 'FLEET-BODY-SENTINEL' "$tmp/fl1-calls.log" 2>/dev/null \
|
|
&& ! grep -q 'SENTINEL-TOKEN' "$tmp/fl1-calls.log" 2>/dev/null; then
|
|
pass "fleet no-leak: bodyMd/token never appear in coordinator report payloads"
|
|
else
|
|
fail "fleet leaked bodyMd or token into a report payload"
|
|
fi
|
|
unset AQ_FLEET_API_CMD AQ_FLEET_CWD AQ_FSTUB_CALLS AQ_FSTUB_CLAIM_FLAG AQ_FSTUB_JOB_ID AQ_FSTUB_BODY
|
|
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
# Phase 2 — Slice 4 cases (feature flags + shadow / dual-run). Reuses the
|
|
# fleet stub. SHADOW mode = AQ_FLEET=1 + AQ_FLEET_ROUTE=0 + AQ_FLEET_SHADOW=1:
|
|
# the LOCAL inbox is authoritative; the coordinator is queried in parallel and
|
|
# compared, NEVER acted on; verdicts land in the shadow log. (Check 33 already
|
|
# covers flags-off ⇒ zero coordinator calls.)
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
mk_local_job() { # <queue_root> <name> <idempotency-key>
|
|
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' "idempotency-key: $3" '---' '' "# local $2" \
|
|
> "$1/inbox/$2.md"
|
|
}
|
|
# verdict_has <shadowlog> <localKey> <coordKey> <verdict> — exact structured-line match
|
|
verdict_has() { awk -F'\t' -v l="$2" -v c="$3" -v v="$4" '$2==l && $3==c && $4==v{f=1} END{exit f?0:1}' "$1"; }
|
|
|
|
# 40. SHADOW AGREE: coord would-be == local key → AGREE; the real job still ships
|
|
# via the offline/local path; NO coordinator job is materialized locally; nothing
|
|
# quarantined; and the probe is read-only (dryRun + isolated -shadow factoryId).
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-sh-agree"; "$AQ" init >/dev/null
|
|
slog="$tmp/sh-agree.log"; scalls="$tmp/sh-agree-calls.log"; : > "$scalls"; rm -f "$slog"
|
|
mk_local_job "$AGENT_QUEUE_ROOT" "locA" "fjob_same"
|
|
AQ_FLEET=1 AQ_FLEET_ROUTE=0 AQ_FLEET_SHADOW=1 \
|
|
AQ_FLEET_API_CMD="$fstub" AQ_FSTUB_CALLS="$scalls" AQ_FSTUB_JOB_ID="fjob_same" \
|
|
AQ_FLEET_SHADOW_LOG="$slog" DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if verdict_has "$slog" fjob_same fjob_same AGREE \
|
|
&& ls "$AGENT_QUEUE_ROOT"/review/locA.md >/dev/null 2>&1 \
|
|
&& [ -z "$(ls "$AGENT_QUEUE_ROOT"/failed 2>/dev/null)" ] \
|
|
&& ! ls "$AGENT_QUEUE_ROOT"/review/*fleet-* >/dev/null 2>&1 \
|
|
&& grep -q '"dryRun":true' "$scalls" && grep -q -- '-shadow"' "$scalls"; then
|
|
pass "fleet shadow AGREE: local==coord ⇒ AGREE; real job ships offline; no coord job materialized; read-only probe"
|
|
else
|
|
cat "$slog" "$scalls" >&2; fail "shadow AGREE behaved incorrectly"
|
|
fi
|
|
|
|
# 41. SHADOW DIVERGE: coord would-be != local key → DIVERGE; real job still ships;
|
|
# nothing quarantined.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-sh-div"; "$AQ" init >/dev/null
|
|
slog="$tmp/sh-div.log"; : > "$tmp/sh-div-calls.log"; rm -f "$slog"
|
|
mk_local_job "$AGENT_QUEUE_ROOT" "locB" "fjob_local"
|
|
AQ_FLEET=1 AQ_FLEET_ROUTE=0 AQ_FLEET_SHADOW=1 \
|
|
AQ_FLEET_API_CMD="$fstub" AQ_FSTUB_CALLS="$tmp/sh-div-calls.log" AQ_FSTUB_JOB_ID="fjob_remote" \
|
|
AQ_FLEET_SHADOW_LOG="$slog" DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if verdict_has "$slog" fjob_local fjob_remote DIVERGE \
|
|
&& ls "$AGENT_QUEUE_ROOT"/review/locB.md >/dev/null 2>&1 \
|
|
&& [ -z "$(ls "$AGENT_QUEUE_ROOT"/failed 2>/dev/null)" ]; then
|
|
pass "fleet shadow DIVERGE: local!=coord ⇒ DIVERGE logged; real job still completes; nothing quarantined"
|
|
else
|
|
cat "$slog" >&2; fail "shadow DIVERGE behaved incorrectly"
|
|
fi
|
|
|
|
# 42. SHADOW COORD_EMPTY: coordinator returns claimed:false → COORD_EMPTY; real job ships.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-sh-ce"; "$AQ" init >/dev/null
|
|
slog="$tmp/sh-ce.log"; rm -f "$slog"; ceflag="$tmp/sh-ce-flag"; : > "$ceflag" # flag present ⇒ stub claimed:false
|
|
mk_local_job "$AGENT_QUEUE_ROOT" "locC" "fjob_ce"
|
|
AQ_FLEET=1 AQ_FLEET_ROUTE=0 AQ_FLEET_SHADOW=1 \
|
|
AQ_FLEET_API_CMD="$fstub" AQ_FSTUB_CALLS="$tmp/sh-ce-calls.log" AQ_FSTUB_CLAIM_FLAG="$ceflag" \
|
|
AQ_FLEET_SHADOW_LOG="$slog" DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if verdict_has "$slog" fjob_ce "<none>" COORD_EMPTY \
|
|
&& ls "$AGENT_QUEUE_ROOT"/review/locC.md >/dev/null 2>&1; then
|
|
pass "fleet shadow COORD_EMPTY: coordinator empty ⇒ COORD_EMPTY logged; real job still completes"
|
|
else
|
|
cat "$slog" >&2; fail "shadow COORD_EMPTY behaved incorrectly"
|
|
fi
|
|
|
|
# 43. SHADOW NON-FATAL: a coordinator 5xx during the shadow claim must NOT fail the
|
|
# real job — it still completes (review/), exit 0, and a SHADOW_ERROR is recorded.
|
|
fstub5xx="$tmp/fleet-stub-5xx.sh"
|
|
cat > "$fstub5xx" <<'STUBEOF'
|
|
#!/usr/bin/env bash
|
|
[ -n "${AQ_FSTUB_CALLS:-}" ] && printf '%s %s :: %s\n' "$1" "$2" "$3" >> "$AQ_FSTUB_CALLS"
|
|
case "$1 $2" in
|
|
"POST /fleet/factories/heartbeat") printf '%s\n200\n' '{"ok":true}' ;;
|
|
"POST /fleet/claim") printf '%s\n500\n' '{}' ;;
|
|
*) printf '%s\n200\n' '{}' ;;
|
|
esac
|
|
STUBEOF
|
|
chmod +x "$fstub5xx"
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-sh-5xx"; "$AQ" init >/dev/null
|
|
slog="$tmp/sh-5xx.log"; rm -f "$slog"
|
|
mk_local_job "$AGENT_QUEUE_ROOT" "locD" "fjob_5xx"
|
|
AQ_FLEET=1 AQ_FLEET_ROUTE=0 AQ_FLEET_SHADOW=1 \
|
|
AQ_FLEET_API_CMD="$fstub5xx" AQ_FSTUB_CALLS="$tmp/sh-5xx-calls.log" \
|
|
AQ_FLEET_SHADOW_LOG="$slog" DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1; rc=$?
|
|
if [ "$rc" -eq 0 ] && ls "$AGENT_QUEUE_ROOT"/review/locD.md >/dev/null 2>&1 \
|
|
&& grep -q 'SHADOW_ERROR' "$slog" 2>/dev/null \
|
|
&& [ -z "$(ls "$AGENT_QUEUE_ROOT"/failed 2>/dev/null)" ]; then
|
|
pass "fleet shadow NON-FATAL: coordinator 5xx ⇒ real job completes (exit 0), SHADOW_ERROR noted, not quarantined"
|
|
else
|
|
echo "rc=$rc"; cat "$slog" >&2; fail "shadow non-fatal behaved incorrectly"
|
|
fi
|
|
|
|
# 44. ROUTE precedence: AQ_FLEET_ROUTE=1 + AQ_FLEET_SHADOW=1 ⇒ ROUTE wins — the
|
|
# coordinator sources work (job materialized + run), a one-shot warning is logged,
|
|
# and NO shadow comparison happens (shadow log stays empty/absent).
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-sh-prec"; export AQ_FLEET_CWD="$work"; "$AQ" init >/dev/null
|
|
slog="$tmp/sh-prec.log"; rm -f "$slog"
|
|
AQ_FLEET=1 AQ_FLEET_ROUTE=1 AQ_FLEET_SHADOW=1 \
|
|
AQ_FLEET_API_CMD="$fstub" AQ_FSTUB_CALLS="$tmp/sh-prec-calls.log" AQ_FSTUB_CLAIM_FLAG="$tmp/sh-prec-claimed" \
|
|
AQ_FSTUB_JOB_ID="fjob_p" AQ_FLEET_SHADOW_LOG="$slog" DEVIN_BIN="$stub" \
|
|
"$AQ" run --once >/dev/null 2>"$tmp/sh-prec.err"
|
|
if grep -qi 'ROUTE wins' "$tmp/sh-prec.err" \
|
|
&& ls "$AGENT_QUEUE_ROOT"/review/*fleet-fjob_p.md >/dev/null 2>&1 \
|
|
&& [ ! -s "$slog" ]; then
|
|
pass "fleet ROUTE precedence: ROUTE=1 + SHADOW=1 ⇒ ROUTE path + warning, no shadow compare"
|
|
else
|
|
cat "$tmp/sh-prec.err" >&2; fail "ROUTE>SHADOW precedence behaved incorrectly"
|
|
fi
|
|
unset AQ_FLEET_CWD
|
|
|
|
# 45. ROUTE=0 + AQ_FLEET=1 (no shadow): LOCAL inbox authoritative — the coordinator
|
|
# is NOT used to source work (zero /fleet/claim), and the local job still completes.
|
|
export AGENT_QUEUE_ROOT="$tmp/queue-sh-route0"; "$AQ" init >/dev/null
|
|
scalls="$tmp/sh-route0-calls.log"; : > "$scalls"
|
|
mk_local_job "$AGENT_QUEUE_ROOT" "locE" "fjob_e"
|
|
AQ_FLEET=1 AQ_FLEET_ROUTE=0 AQ_FLEET_SHADOW=0 \
|
|
AQ_FLEET_API_CMD="$fstub" AQ_FSTUB_CALLS="$scalls" AQ_FSTUB_JOB_ID="fjob_e" \
|
|
DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
|
|
if ! grep -q 'POST /fleet/claim' "$scalls" \
|
|
&& ls "$AGENT_QUEUE_ROOT"/review/locE.md >/dev/null 2>&1; then
|
|
pass "fleet ROUTE=0: local inbox authoritative — coordinator not used to source work; local job completes"
|
|
else
|
|
cat "$scalls" >&2; fail "ROUTE=0 local-authoritative behaved incorrectly"
|
|
fi
|
|
|
|
# 46. fleet-shadow-report: summarize a seeded shadow log (counts + agreement rate).
|
|
rlog="$tmp/sh-report.log"
|
|
printf '%s\t%s\t%s\t%s\n' \
|
|
100 fjob_a fjob_a AGREE \
|
|
101 fjob_b fjob_b AGREE \
|
|
102 fjob_c fjob_x DIVERGE \
|
|
103 fjob_d '<none>' COORD_EMPTY > "$rlog"
|
|
rout=$(AQ_FLEET_SHADOW_LOG="$rlog" "$AQ" fleet-shadow-report 2>&1)
|
|
if printf '%s\n' "$rout" | grep -qE 'AGREE +2' \
|
|
&& printf '%s\n' "$rout" | grep -qE 'DIVERGE +1' \
|
|
&& printf '%s\n' "$rout" | grep -qE 'COORD_EMPTY +1' \
|
|
&& printf '%s\n' "$rout" | grep -qE 'TOTAL +4' \
|
|
&& printf '%s\n' "$rout" | grep -qE 'AGREEMENT +50%'; then
|
|
pass "fleet-shadow-report: per-verdict counts (AGREE 2 / DIVERGE 1 / COORD_EMPTY 1), TOTAL 4, AGREEMENT 50%"
|
|
else
|
|
printf '%s\n' "$rout" >&2; fail "fleet-shadow-report summary incorrect"
|
|
fi
|
|
|
|
# 47. fleet_shadow_report (unit): mirrors a stage transition as a SHADOW event
|
|
# (shadow:true) against the would-be coord job; the response is never acted on.
|
|
funcs2="$tmp/aq-funcs-sh.sh"; sed '/^main "\$@"/d' "$AQ" > "$funcs2"
|
|
rep_calls="$tmp/sh-rep-calls.log"; : > "$rep_calls"
|
|
if bash -c '
|
|
set -uo pipefail
|
|
export AGENT_QUEUE_ROOT="'"$tmp"'/queue-sh-rep" AQ_FLEET=1 AQ_FLEET_ROUTE=0 AQ_FLEET_SHADOW=1
|
|
export AQ_FLEET_API_CMD="'"$fstub"'" AQ_FSTUB_CALLS="'"$rep_calls"'"
|
|
source "'"$funcs2"'"
|
|
source "'"$HERE"'/lib/fleet-client.sh"
|
|
ensure_dirs
|
|
fleet_shadow_report js fjob_r building
|
|
'; then
|
|
grep -q 'PATCH /fleet/jobs/fjob_r :: .*"shadow":true' "$rep_calls" \
|
|
&& pass "fleet_shadow_report: PATCH carries shadow:true (report exercised, response ignored)" \
|
|
|| { cat "$rep_calls" >&2; fail "shadow report payload missing shadow flag"; }
|
|
else
|
|
fail "fleet_shadow_report invocation errored"
|
|
fi
|
|
unset AQ_FLEET_API_CMD AQ_FLEET_SHADOW_LOG AGENT_QUEUE_ROOT
|
|
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
# Phase 2 — two-factory parallel demo (EXIT CRITERIA, §14). Runs the demo
|
|
# HEADLESS in STUB mode (its own stateful coordinator stub + two real factory
|
|
# daemons) and asserts the three exit guarantees. Self-contained: the demo owns
|
|
# its temp dirs/daemons and cleans them up; no live service.
|
|
# ─────────────────────────────────────────────────────────────────────
|
|
demo_sh="$HERE/demo/two-factory-demo.sh"
|
|
demo_out="$tmp/two-factory.out"
|
|
if DEMO_MODE=stub AQ_FLEET_API= AQ_FLEET_TOKEN= DEMO_JOB_SLEEP=2 DEMO_TIMEOUT=45 bash "$demo_sh" >"$demo_out" 2>&1; then demo_rc=0; else demo_rc=$?; fi
|
|
|
|
# 49. demo is green end-to-end in stub mode (exit 0 + overall PASS)
|
|
if [ "$demo_rc" -eq 0 ] && grep -q '\[demo\] PASS' "$demo_out"; then
|
|
pass "two-factory demo: stub-mode run is green (exit 0, all guarantees PASS)"
|
|
else
|
|
cat "$demo_out" >&2; fail "two-factory demo did not pass (rc=$demo_rc)"
|
|
fi
|
|
|
|
# 50. no double-assign: 3 jobs reach terminal across 2 factories (one winner each) + parallel
|
|
if grep -q '(a) no double-assign: all 3 jobs executed to terminal' "$demo_out" \
|
|
&& grep -q '(c) parallelism: both factories claimed concurrently' "$demo_out"; then
|
|
pass "two-factory demo: 3 jobs terminal across 2 factories, no double-assignment, ran in parallel"
|
|
else
|
|
cat "$demo_out" >&2; fail "two-factory demo: no-double-assign / parallelism assertion missing"
|
|
fi
|
|
|
|
# 51. kill -> reaper reclaim -> survivor completes -> dead worker's zombie report fenced (409)
|
|
if grep -q '(b) reclaim: .* RECLAIM event' "$demo_out" \
|
|
&& grep -q 'was FENCED (HTTP 409)' "$demo_out" \
|
|
&& grep -q '(b) fencing: zombie report rejected (409)' "$demo_out"; then
|
|
pass "two-factory demo: kill -> reclaim -> completed by survivor -> zombie report FENCED (409)"
|
|
else
|
|
cat "$demo_out" >&2; fail "two-factory demo: kill/reclaim/fenced-zombie path did not fire"
|
|
fi
|
|
|
|
echo "self-test PASS"
|