From 57831e3e7a617bcb6cb9d1887b3b703ebfc4e064 Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Sun, 31 May 2026 02:27:17 -0700 Subject: [PATCH] feat(agent-queue): report run insights to the fleet + normalize API base #1 fleet_report_insights: on a successful fleet run the factory now reports the parsed cost/token/effort metrics (model, tokensIn/Out/cached, costUsd, turns, toolCalls) plus the run result onto the coordinator run via POST .../lease/release (which also frees the lease). parse_usage already extracted these into the job meta; they were never sent. Engines that do not expose usage locally (devin) still land result + endedAt. #2 normalize AQ_FLEET_API: platform-service mounts fleet under /api, so a base without it silently returned 404 on every call. Strip a trailing slash and append /api unless already present, so AQ_FLEET_API=http://host:4003 works too. selftest: +2 cases (insights reported via lease/release; API-base normalization). Full self-test PASS. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- agent-queue/agent-queue.sh | 3 +++ agent-queue/lib/fleet-client.sh | 38 +++++++++++++++++++++++++++++++++ agent-queue/selftest.sh | 28 ++++++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/agent-queue/agent-queue.sh b/agent-queue/agent-queue.sh index d3bdc1b..ce34934 100755 --- a/agent-queue/agent-queue.sh +++ b/agent-queue/agent-queue.sh @@ -816,6 +816,9 @@ run_worker() { fleet_quarantine "$job" "$doing_file" "$metaf" "$logf" return 0 fi + # Report cost/token/effort metrics (parse_usage wrote them to the meta just + # above) + result onto the coordinator's run, and release the held lease. + fleet_report_insights "$job" review fi # Agent succeeded: land in review/, then run the auto-QA verify gate. The # worker is still alive here so the concurrency slot stays held through diff --git a/agent-queue/lib/fleet-client.sh b/agent-queue/lib/fleet-client.sh index 89d499f..af1df96 100644 --- a/agent-queue/lib/fleet-client.sh +++ b/agent-queue/lib/fleet-client.sh @@ -21,6 +21,11 @@ # ── Config (env-overridable) ──────────────────────────────────────── AQ_FLEET="${AQ_FLEET:-0}" # master switch (0 = offline) AQ_FLEET_API="${AQ_FLEET_API:-http://localhost:4003/api}" # base URL incl. /api +# Normalize: platform-service mounts the fleet routes under /api. Strip a trailing +# slash and append /api unless already present, so AQ_FLEET_API=http://host:4003 +# (the natural form) works too instead of silently 404ing every fleet call. +AQ_FLEET_API="${AQ_FLEET_API%/}" +[[ "$AQ_FLEET_API" == */api ]] || AQ_FLEET_API="${AQ_FLEET_API}/api" AQ_FLEET_TOKEN="${AQ_FLEET_TOKEN:-}" # bearer; never hardcode # AQ_PRODUCT_ID is shared with the Slice-4 tracker config (X-Product-Id header). AQ_FACTORY_ID="${AQ_FACTORY_ID:-$( (hostname -s 2>/dev/null || hostname 2>/dev/null || echo factory) | tr -cd 'A-Za-z0-9._-')-$$}" @@ -259,6 +264,39 @@ fleet_lease_release() { return 0 } +# fleet_report_insights [result] — report the run's cost/token/effort metrics +# (parsed by parse_usage into the job meta) to the coordinator, recorded on the +# current run. Also releases the held lease (the agent has finished its work unit). +# Best-effort: never blocks the loop. Engines that don't expose usage locally +# (e.g. devin) simply omit token/cost fields; `result` + endedAt still land. +fleet_report_insights() { + fleet_enabled || return 0 + local job=$1 result=${2:-} metaf jid epoch + metaf="$STATE/$job.meta" + jid=$(_meta_val "$metaf" fleet_job_id); epoch=$(_meta_val "$metaf" fleet_lease_epoch) + [[ -n "$jid" ]] || return 0 + local model ti to tc cost turns tools est ins="" + model=$(_meta_val "$metaf" model) + ti=$(_meta_val "$metaf" tokens_in); to=$(_meta_val "$metaf" tokens_out) + tc=$(_meta_val "$metaf" tokens_cached); cost=$(_meta_val "$metaf" cost_usd) + turns=$(_meta_val "$metaf" turns); tools=$(_meta_val "$metaf" tool_calls) + est=$(_meta_val "$metaf" usage_estimated) + [[ -n "$model" ]] && ins+=",\"model\":\"$(_json_escape "$model")\"" + [[ "$ti" =~ ^[0-9]+$ ]] && ins+=",\"tokensIn\":$ti" + [[ "$to" =~ ^[0-9]+$ ]] && ins+=",\"tokensOut\":$to" + [[ "$tc" =~ ^[0-9]+$ ]] && ins+=",\"tokensCached\":$tc" + [[ "$cost" =~ ^[0-9]+(\.[0-9]+)?$ ]] && ins+=",\"costUsd\":$cost" + [[ "$turns" =~ ^[0-9]+$ ]] && ins+=",\"turns\":$turns" + [[ "$tools" =~ ^[0-9]+$ ]] && ins+=",\"toolCalls\":$tools" + [[ "$est" == "true" || "$est" == "1" ]] && ins+=",\"estimated\":true" + local body="{\"leaseEpoch\":${epoch:-0}" + [[ -n "$ins" ]] && body+=",\"insights\":{${ins#,}}" + [[ -n "$result" ]] && body+=",\"result\":\"$(_json_escape "$result")\"" + body+="}" + _fleet_call POST "/fleet/jobs/$jid/lease/release" "$body" + return 0 +} + # fleet_renew_active — renew leases for all in-flight (building/) fleet jobs. fleet_renew_active() { fleet_enabled || return 0 diff --git a/agent-queue/selftest.sh b/agent-queue/selftest.sh index beedf8f..11b9da8 100755 --- a/agent-queue/selftest.sh +++ b/agent-queue/selftest.sh @@ -1027,6 +1027,34 @@ else fi unset AQ_FLEET_API_CMD AQ_FLEET_SHADOW_LOG AGENT_QUEUE_ROOT +# fleet insights (§26): a successful fleet run reports parsed cost/token metrics + +# result onto the coordinator's run via POST .../lease/release (fleet_report_insights). +export AGENT_QUEUE_ROOT="$tmp/queue-fl-ins"; export AQ_FLEET_CWD="$work" +"$AQ" init >/dev/null +export AQ_FLEET_API_CMD="$fstub" AQ_FSTUB_CALLS="$tmp/fl-ins-calls.log" \ + AQ_FSTUB_CLAIM_FLAG="$tmp/fl-ins-claimed" AQ_FSTUB_JOB_ID="fjob_ins" AQ_FSTUB_BODY="emit usage" +: > "$AQ_FSTUB_CALLS"; rm -f "$AQ_FSTUB_CLAIM_FLAG" +# usagestub emits an AQ_USAGE line; materialized fleet jobs run the default (devin) engine. +AQ_FLEET=1 AGENT_QUEUE_POLL=1 DEVIN_BIN="$usagestub" "$AQ" run --once >/dev/null 2>&1 +if grep -qE 'POST /fleet/jobs/fjob_ins/lease/release :: .*"tokensIn":100' "$AQ_FSTUB_CALLS" \ + && grep -qE 'POST /fleet/jobs/fjob_ins/lease/release :: .*"costUsd":0\.0021' "$AQ_FSTUB_CALLS" \ + && grep -qE 'POST /fleet/jobs/fjob_ins/lease/release :: .*"result":"review"' "$AQ_FSTUB_CALLS"; then + pass "fleet insights: run reports cost/tokens + result via lease/release" +else + cat "$AQ_FSTUB_CALLS" >&2; fail "fleet insights not reported on lease/release" +fi +unset AQ_FLEET_API_CMD AQ_FSTUB_CALLS AQ_FSTUB_CLAIM_FLAG AQ_FSTUB_JOB_ID AQ_FSTUB_BODY AGENT_QUEUE_ROOT AQ_FLEET_CWD + +# fleet API base normalization: AQ_FLEET_API without the /api mount must be +# normalized so fleet calls still resolve (regression for the silent-404 bug). +norm=$(AQ_FLEET_API="http://localhost:4003" bash -c 'source "'"$HERE"'/lib/fleet-client.sh"; printf "%s" "$AQ_FLEET_API"') +norm2=$(AQ_FLEET_API="http://localhost:4003/api/" bash -c 'source "'"$HERE"'/lib/fleet-client.sh"; printf "%s" "$AQ_FLEET_API"') +if [ "$norm" = "http://localhost:4003/api" ] && [ "$norm2" = "http://localhost:4003/api" ]; then + pass "fleet API base: normalized to include exactly one /api (with or without)" +else + fail "fleet API base normalization wrong (got '$norm' and '$norm2')" +fi + # ───────────────────────────────────────────────────────────────────── # Phase 2 — two-factory parallel demo (EXIT CRITERIA, §14). Runs the demo # HEADLESS in STUB mode (its own stateful coordinator stub + two real factory