From 4600a41e5d4870305a1ee57517ff899badd695e7 Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Fri, 29 May 2026 17:44:27 -0700 Subject: [PATCH] test(agent-queue): self-test cases for manifest/priority/capabilities/engine-class/idempotency (P1-S1) Adds (never weakens existing) cases, each in its own temp AGENT_QUEUE_ROOT using the no-op engine stub: - backward-compat: legacy engine/cwd/yolo-only .md still lands in review/. - priority: with --max 1, a critical job queued after a low job runs first (order-recording stub). - capability mismatch: has:definitely-not-installed -> failed/ result=capability_mismatch, asserting the agent was never launched. - engine-class: agentic-coder + no engine, DEVIN_BIN stubbed -> review/. - idempotency: same key+body twice -> 1 inbox file; same key+changed body in inbox -> superseded; same key+different body after drain -> rejected. Inbox counts use find (not a globbing ls) so set -e/pipefail tolerate an empty inbox. --- agent-queue/selftest.sh | 127 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/agent-queue/selftest.sh b/agent-queue/selftest.sh index fa31217..c15b3e8 100755 --- a/agent-queue/selftest.sh +++ b/agent-queue/selftest.sh @@ -103,4 +103,131 @@ fi # status must not error "$AQ" status >/dev/null 2>&1 && pass "status runs clean" +# ───────────────────────────────────────────────────────────────────── +# Phase 1 — Slice 1 cases (manifest/priority/capabilities/engine-class/idempotency). +# Each uses its OWN AGENT_QUEUE_ROOT; the no-op engine stub means no real CLI runs. +# ───────────────────────────────────────────────────────────────────── + +# 7. backward-compat: a legacy engine/cwd/yolo-only .md still completes → review/ +export AGENT_QUEUE_ROOT="$tmp/queue-bc" +bc="$tmp/bc-legacy.md" +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# legacy task' > "$bc" +"$AQ" init >/dev/null +DEVIN_BIN="$stub" "$AQ" add "$bc" >/dev/null +DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 +if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then + pass "backward-compat: legacy engine/cwd/yolo-only .md → review/" +else + ls -R "$AGENT_QUEUE_ROOT" >&2 || true + fail "legacy .md did not land in review/" +fi + +# 8. priority: with --max 1, a 'critical' job queued AFTER a 'low' job runs first. +# An order-recording devin-style stub appends each job's TASKID as it launches. +export AGENT_QUEUE_ROOT="$tmp/queue-prio" +ostub="$tmp/order-engine" +cat > "$ostub" <<'STUB' +#!/usr/bin/env bash +# order-recording no-op engine stub (devin-style: --prompt-file ) +pf="" +while [ $# -gt 0 ]; do + case "$1" in + --prompt-file) pf="${2:-}"; shift 2;; + *) shift;; + esac +done +if [ -n "${pf:-}" ] && [ -n "${AQ_ORDER:-}" ]; then + grep -m1 '^TASKID=' "$pf" >> "$AQ_ORDER" 2>/dev/null || true +fi +exit 0 +STUB +chmod +x "$ostub" +export AQ_ORDER="$tmp/prio-order.log"; : > "$AQ_ORDER" +plow="$tmp/p-low.md" +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'priority: low' '---' '' 'TASKID=low' > "$plow" +pcrit="$tmp/p-crit.md" +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'priority: critical' '---' '' 'TASKID=critical' > "$pcrit" +"$AQ" init >/dev/null +DEVIN_BIN="$ostub" "$AQ" add "$plow" >/dev/null +sleep 1 # ensure the critical job has a strictly newer (later) queue timestamp +DEVIN_BIN="$ostub" "$AQ" add "$pcrit" >/dev/null +DEVIN_BIN="$ostub" "$AQ" run --once --max 1 >/dev/null 2>&1 +if [ "$(head -1 "$AQ_ORDER" 2>/dev/null || true)" = "TASKID=critical" ]; then + pass "priority: critical (queued later) ran before low" +else + echo "--- execution order ---" >&2; cat "$AQ_ORDER" >&2 || true + fail "priority ordering did not pick the critical job first" +fi + +# 9. capability mismatch: a job requiring an absent tool → failed/ with +# result=capability_mismatch, and the agent is NEVER launched. +export AGENT_QUEUE_ROOT="$tmp/queue-cap" +launchflag="$tmp/cap-launched.flag"; rm -f "$launchflag" +launchstub="$tmp/launch-engine" +printf '#!/usr/bin/env bash\ntouch %q\nexit 0\n' "$launchflag" > "$launchstub" +chmod +x "$launchstub" +capjob="$tmp/cap.md" +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' \ + 'capabilities: [has:definitely-not-installed]' '---' '' '# capability task' > "$capjob" +"$AQ" init >/dev/null +DEVIN_BIN="$launchstub" "$AQ" add "$capjob" >/dev/null +DEVIN_BIN="$launchstub" "$AQ" run --once >/dev/null 2>&1 +if ls "$AGENT_QUEUE_ROOT"/failed/*.md >/dev/null 2>&1 \ + && grep -q '^result=capability_mismatch' "$AGENT_QUEUE_ROOT"/.state/*.meta 2>/dev/null; then + pass "capability mismatch → failed/ (result=capability_mismatch)" +else + ls -R "$AGENT_QUEUE_ROOT" >&2 || true + fail "capability-mismatch job not routed to failed/ with result=capability_mismatch" +fi +if [ -e "$launchflag" ]; then + fail "agent WAS launched on capability mismatch (it must not be)" +else + pass "capability mismatch: agent never launched" +fi + +# 10. engine-class: a job with engine-class:agentic-coder and no engine, with +# DEVIN_BIN stubbed (available), resolves to devin, runs, and lands in review/. +export AGENT_QUEUE_ROOT="$tmp/queue-ec" +ecjob="$tmp/ec.md" +printf '%s\n' '---' 'engine-class: agentic-coder' "cwd: $work" 'yolo: true' '---' '' '# engine-class task' > "$ecjob" +"$AQ" init >/dev/null +DEVIN_BIN="$stub" "$AQ" add "$ecjob" >/dev/null +DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 +if ls "$AGENT_QUEUE_ROOT"/review/*.md >/dev/null 2>&1; then + pass "engine-class: agentic-coder (no engine) resolved to devin → review/" +else + ls -R "$AGENT_QUEUE_ROOT" >&2 || true + fail "engine-class job did not resolve+run to review/" +fi + +# 11. idempotency-key dedupe on add. +export AGENT_QUEUE_ROOT="$tmp/queue-idem" +"$AQ" init >/dev/null +ia="$tmp/idem-a.md" +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'alpha body' > "$ia" +DEVIN_BIN="$stub" "$AQ" add "$ia" >/dev/null +DEVIN_BIN="$stub" "$AQ" add "$ia" >/dev/null # identical key+body → no-op +cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ') +[ "$cnt" = "1" ] && pass "idempotency: same key+body added twice → exactly 1 inbox file" \ + || fail "idempotency: expected 1 inbox file after duplicate add, got $cnt" +# same key, different body, prior STILL in inbox → supersede (still exactly 1) +ib="$tmp/idem-b.md" +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'beta body (changed)' > "$ib" +DEVIN_BIN="$stub" "$AQ" add "$ib" >/dev/null +cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ') +[ "$cnt" = "1" ] && pass "idempotency: same key + changed body, prior in inbox → superseded (1 file)" \ + || fail "idempotency: expected 1 inbox file after supersede, got $cnt" +# drain (prior leaves inbox → review), then same key + different body → REJECT +DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1 +ic="$tmp/idem-c.md" +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' 'idempotency-key: dedup-1' '---' '' 'gamma body (changed again)' > "$ic" +if DEVIN_BIN="$stub" "$AQ" add "$ic" >/dev/null 2>&1; then + fail "idempotency: same key + different body (prior past inbox) should be rejected" +else + pass "idempotency: same key + different body, prior past inbox → rejected" +fi +cnt=$(find "$AGENT_QUEUE_ROOT/inbox" -maxdepth 1 -type f -name '*.md' 2>/dev/null | wc -l | tr -d ' ') +[ "$cnt" = "0" ] && pass "idempotency: a rejected add enqueues nothing" \ + || fail "idempotency: rejected add should not enqueue (inbox=$cnt)" + echo "self-test PASS"