feat(agent-queue): report testing + optional autoship to the fleet (close testing->shipped)

Previously the factory reported up to `review` and "shipping is always manual",
so a coordinator job never reached a terminal stage autonomously.

- On a passing local verify, always report `testing` to the coordinator so its
  stage reflects that QA passed (was stuck at `review`).
- New AQ_FLEET_AUTOSHIP=1: the factory's verify gate IS the test phase, so advance
  the coordinator job testing -> shipped and land it in shipped/ locally. This
  closes the testing->shipped gap for an autonomous submit -> shipped pipeline.
  Default off keeps the human review gate authoritative (job rests at testing).

selftest: +2 cases (autoship reports testing+shipped + lands in shipped/; autoship
OFF reports testing but withholds shipped). Full self-test PASS.

Generated with [Devin](https://cli.devin.ai/docs)

Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com>
This commit is contained in:
saravanakumardb1 2026-05-31 04:21:41 -07:00
parent 2d40daf72f
commit df65b7a245
3 changed files with 48 additions and 0 deletions

View File

@ -847,6 +847,19 @@ run_worker() {
mv "$review_file" "$TESTING/" 2>/dev/null
_meta_end "$metaf" "testing" "$started"
echo "VERIFY PASSED — promoted to testing (QA): $(date)" >> "$logf"
# Fleet (§14): mirror local QA to the coordinator. Always report `testing`
# so the coordinator stage reflects that local verify passed. When AUTOSHIP
# is enabled, the factory's verify gate IS the test phase — advance
# testing -> shipped (closing the testing->shipped gap autonomously). Default
# off leaves the job resting at `testing` for the human review gate / ship.
if fleet_enabled && _fleet_is_job "$job"; then
fleet_report "$job" testing
if [[ "${AQ_FLEET_AUTOSHIP:-0}" == 1 ]] && fleet_report "$job" shipped; then
mv "$TESTING/$job.md" "$SHIPPED/" 2>/dev/null
_meta_end "$metaf" "shipped" "$started"
echo "FLEET AUTOSHIP — testing -> shipped: $(date)" >> "$logf"
fi
fi
else
echo "VERIFY FAILED (rc=$vrc): $(date)" >> "$logf"
# verify ran on the review_file; retry policy may requeue it.

View File

@ -49,6 +49,10 @@ AQ_FLEET_HB_TS=0 # last heartbeat epo
# If AQ_FLEET_ROUTE=1 AND AQ_FLEET_SHADOW=1, ROUTE WINS and shadow is disabled
# (a one-shot warning is logged) — you never shadow and route at the same time.
AQ_FLEET_ROUTE="${AQ_FLEET_ROUTE:-1}"
# AQ_FLEET_AUTOSHIP=1 ⇒ when the factory's local verify gate passes, advance the
# coordinator job testing -> shipped (the factory's verify IS the test phase).
# Default 0 keeps the human review gate authoritative (job rests at testing).
AQ_FLEET_AUTOSHIP="${AQ_FLEET_AUTOSHIP:-0}"
AQ_FLEET_SHADOW="${AQ_FLEET_SHADOW:-0}"
# Isolated factory id for the read-only shadow claim (never the real factory id).
AQ_FLEET_SHADOW_FACTORY_ID="${AQ_FLEET_SHADOW_FACTORY_ID:-${AQ_FACTORY_ID}-shadow}"

View File

@ -830,6 +830,37 @@ else
cat "$AQ_FSTUB_CALLS" >&2; fail "fleet report/checkpoint payload incorrect"
fi
# 35b. autoship: with AQ_FLEET_AUTOSHIP=1 a passing local verify advances the
# coordinator job testing -> shipped (closing the testing->shipped gap).
export AGENT_QUEUE_ROOT="$tmp/queue-fl-autoship"; export AQ_FLEET_CWD="$work"
"$AQ" init >/dev/null
export AQ_FSTUB_CALLS="$tmp/fl-autoship-calls.log" AQ_FSTUB_CLAIM_FLAG="$tmp/fl-autoship-claimed" \
AQ_FSTUB_JOB_ID="fjob_ship" AQ_FSTUB_BODY="ship me"
: > "$AQ_FSTUB_CALLS"; rm -f "$AQ_FSTUB_CLAIM_FLAG"
AQ_FLEET=1 AQ_FLEET_AUTOSHIP=1 AGENT_QUEUE_VERIFY=true AGENT_QUEUE_POLL=1 DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
if grep -q 'PATCH /fleet/jobs/fjob_ship :: .*"stage":"testing"' "$AQ_FSTUB_CALLS" \
&& grep -q 'PATCH /fleet/jobs/fjob_ship :: .*"stage":"shipped"' "$AQ_FSTUB_CALLS" \
&& ls "$AGENT_QUEUE_ROOT"/shipped/*fleet-fjob_ship.md >/dev/null 2>&1; then
pass "fleet autoship: verify pass -> testing -> shipped reported + landed in shipped/"
else
cat "$AQ_FSTUB_CALLS" >&2; fail "autoship did not report testing+shipped"
fi
# 35c. autoship OFF (default): a passing verify reports testing but NOT shipped
# (job rests at testing for the human review gate).
export AGENT_QUEUE_ROOT="$tmp/queue-fl-noship"; export AQ_FLEET_CWD="$work"
"$AQ" init >/dev/null
export AQ_FSTUB_CALLS="$tmp/fl-noship-calls.log" AQ_FSTUB_CLAIM_FLAG="$tmp/fl-noship-claimed" \
AQ_FSTUB_JOB_ID="fjob_noship" AQ_FSTUB_BODY="hold"
: > "$AQ_FSTUB_CALLS"; rm -f "$AQ_FSTUB_CLAIM_FLAG"
AQ_FLEET=1 AGENT_QUEUE_VERIFY=true AGENT_QUEUE_POLL=1 DEVIN_BIN="$stub" "$AQ" run --once >/dev/null 2>&1
if grep -q 'PATCH /fleet/jobs/fjob_noship :: .*"stage":"testing"' "$AQ_FSTUB_CALLS" \
&& ! grep -q 'PATCH /fleet/jobs/fjob_noship :: .*"stage":"shipped"' "$AQ_FSTUB_CALLS"; then
pass "fleet autoship OFF: verify pass reports testing but not shipped (human gate)"
else
cat "$AQ_FSTUB_CALLS" >&2; fail "autoship-off should report testing and withhold shipped"
fi
# 36. FENCING: PATCH returns conflict (stale epoch) → worker self-aborts, job is
# quarantined to failed/ (NOT review/testing/shipped), fenced is recorded.
export AGENT_QUEUE_ROOT="$tmp/queue-fl3"