diff --git a/agent-queue/agent-queue.sh b/agent-queue/agent-queue.sh index a583558..f450fea 100755 --- a/agent-queue/agent-queue.sh +++ b/agent-queue/agent-queue.sh @@ -971,7 +971,6 @@ run_worker() { echo "verify_exit=$vrc" >> "$metaf" if [[ $vrc -eq 0 ]]; then mv "$review_file" "$TESTING/" 2>/dev/null - _meta_end "$metaf" "testing" "$started" echo "VERIFY PASSED — promoted to testing (QA): $(date)" >> "$logf" # PR mode (§PR): work passed verify — commit/push the job branch, open a PR, # record the URL in the meta, and push it onto the coordinator run. @@ -995,14 +994,22 @@ run_worker() { # is enabled, the factory's verify gate IS the test phase — advance # testing -> shipped (closing the testing->shipped gap autonomously). Default # off leaves the job resting at `testing` for the human review gate / ship. + local _ship_done=0 if fleet_enabled && _fleet_is_job "$job"; then fleet_report "$job" testing if [[ "${AQ_FLEET_AUTOSHIP:-0}" == 1 ]] && fleet_report "$job" shipped; then mv "$TESTING/$job.md" "$SHIPPED/" 2>/dev/null - _meta_end "$metaf" "shipped" "$started" + _meta_end "$metaf" "shipped" "$started"; _ship_done=1 echo "FLEET AUTOSHIP — testing -> shipped: $(date)" >> "$logf" fi fi + # Mark the concurrency slot done LAST — only after the PR open/merge + the + # coordinator reports above. Writing `ended=` here (not right after the + # testing/ move) keeps the worker counted as active until that work is + # complete, so `run --once` cannot drain-exit and a caller cannot observe + # the job as finished before the PR is actually opened/merged. Fixes a + # flaky race where the PR/report steps ran after the slot was freed. + [[ "$_ship_done" == 1 ]] || _meta_end "$metaf" "testing" "$started" else echo "VERIFY FAILED (rc=$vrc): $(date)" >> "$logf" # verify ran on the review_file; retry policy may requeue it.