feat(agent-queue): extract Devin token usage from the conversation export

Devin does not surface token/cost in its stdout or local log, so parse_usage previously emitted nothing for the devin engine (runs showed no metrics). Devin DOES expose per-step usage in its ATIF conversation export. - build_agent_cmd: pass `--export <path>` for the devin engine (path derived from the job log path so parse_usage can find it; harmless 4th arg for other engines). - parse_usage devin: read the export and sum per-step metadata.metrics input_tokens / output_tokens / cache_read_tokens; take model from agent.model_name. Pure grep/awk, no new dependency. USD cost is left unset (the export carries token counts but not cost) — the dashboard shows tokens + model, cost stays blank. These feed fleet_report_insights, so live devin fleet runs now report tokens + model to the coordinator (verified live: model "Claude Opus 4.8", tokensIn/out + cache populated on a real run). selftest: +1 case (parse_usage devin sums per-step tokens + model from --export). Full self-test PASS. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com>
2026-05-31 02:55:08 -07:00 · 2026-05-31 02:55:08 -07:00 · 8085501506
commit 8085501506
parent 1a28fd541f
2 changed files with 55 additions and 3 deletions
--- a/agent-queue/agent-queue.sh
+++ b/agent-queue/agent-queue.sh
@ -602,12 +602,15 @@ scope_check() {
 # frontmatter-STRIPPED body file, so a body starting with '--' is never
 # misparsed as a CLI option.
 build_agent_cmd() {
-  local engine=$1 pf=$2 yolo=$3
+  local engine=$1 pf=$2 yolo=$3 exportpath=${4:-}
  AGENT_CMD=(); AGENT_STDIN=""
  case "$engine" in
    devin)
      AGENT_CMD=( "$DEVIN_BIN" -p --prompt-file "$pf" )
      [[ "$yolo" == "true" ]] && AGENT_CMD+=( --permission-mode dangerous )
+      # Export the conversation (ATIF) so parse_usage can read per-step token
+      # metrics — Devin does not expose usage in its stdout/local log otherwise.
+      [[ -n "$exportpath" ]] && AGENT_CMD+=( --export "$exportpath" )
      ;;
    claude)
      AGENT_CMD=( "$CLAUDE_BIN" -p )
@ -708,7 +711,10 @@ run_worker() {
      echo "profile: injected persona overlay from '$prof'" >> "$logf"
    fi
  fi
-  build_agent_cmd "$engine" "$bodyf" "$yolo"
+  # Devin conversation export path (derived from the log path so parse_usage can
+  # find it). Harmless for other engines (they ignore the 4th arg).
+  local devin_export="${logf%.log}.devin-export.json"
+  build_agent_cmd "$engine" "$bodyf" "$yolo" "$devin_export"

  # ── WIP checkpoint setup (§25.2): on a git cwd, create/checkout aq/wip/<job>
  # so partial work survives a crash; a trap guarantees a checkpoint on EVERY
@ -1084,7 +1090,24 @@ parse_usage() {
      [[ -n "$ti" ]] && echo "tokens_in=$ti"
      [[ -n "$to" ]] && echo "tokens_out=$to"
      ;;
-    devin)   : ;;  # TODO: Devin session metrics are exposed via API, not the local log.
+    devin)
+      # Devin exposes usage in its ATIF conversation export (--export), not stdout.
+      # build_agent_cmd writes it next to the log; sum per-step token metrics.
+      local exp="${log%.log}.devin-export.json"
+      if [[ -f "$exp" ]]; then
+        local dti dto dtc dmodel
+        dti=$(grep -oE '"input_tokens"[[:space:]]*:[[:space:]]*[0-9]+' "$exp" | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')
+        dto=$(grep -oE '"output_tokens"[[:space:]]*:[[:space:]]*[0-9]+' "$exp" | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')
+        dtc=$(grep -oE '"cache_read_tokens"[[:space:]]*:[[:space:]]*[0-9]+' "$exp" | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')
+        dmodel=$(grep -oE '"model_name"[[:space:]]*:[[:space:]]*"[^"]+"' "$exp" | head -1 | sed -E 's/.*:[[:space:]]*"([^"]+)".*/\1/')
+        [[ -n "$dmodel" ]] && echo "model=$dmodel"
+        [[ "${dti:-0}" -gt 0 ]] && echo "tokens_in=$dti"
+        [[ "${dto:-0}" -gt 0 ]] && echo "tokens_out=$dto"
+        [[ "${dtc:-0}" -gt 0 ]] && echo "tokens_cached=$dtc"
+        # NOTE: Devin's export carries token counts but not USD cost; cost_usd is
+        # left unset (the dashboard shows tokens + model; cost stays blank).
+      fi
+      ;;
    copilot) : ;;  # TODO: GitHub Copilot CLI usage format not yet documented here.
  esac
  return 0
--- a/agent-queue/selftest.sh
+++ b/agent-queue/selftest.sh
@ -459,6 +459,35 @@ else
  printf '%s\n' "$out" >&2; fail "insights aggregate rollup missing/incorrect"
 fi

+# 18b. devin usage: parse_usage sums per-step token metrics from the --export ATIF
+# conversation (Devin exposes usage there, not in stdout). A fake devin engine
+# writes the export to the --export path build_agent_cmd passes.
+devexportstub="$tmp/devin-export-engine"
+cat > "$devexportstub" <<'STUBEOF'
+#!/usr/bin/env bash
+exp=""; while [ $# -gt 0 ]; do [ "$1" = "--export" ] && exp="$2"; shift; done
+if [ -n "$exp" ]; then
+  cat > "$exp" <<'JSON'
+{"schema_version":"ATIF-v1.4","agent":{"model_name":"Claude Opus 4.8"},"steps":[
+{"metadata":{"metrics":{"input_tokens":1200,"output_tokens":300,"cache_read_tokens":900}}},
+{"metadata":{"metrics":{"input_tokens":800,"output_tokens":200,"cache_read_tokens":100}}}]}
+JSON
+fi
+echo "done"; exit 0
+STUBEOF
+chmod +x "$devexportstub"
+"$AQ" init >/dev/null
+printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# devin usage task' \
+  > "$AGENT_QUEUE_ROOT/inbox/devusage.md"
+DEVIN_BIN="$devexportstub" "$AQ" run --once >/dev/null 2>&1
+dm="$AGENT_QUEUE_ROOT/.state/devusage.meta"
+if [ "$(metaval "$dm" tokens_in)" = "2000" ] && [ "$(metaval "$dm" tokens_out)" = "500" ] \
+   && [ "$(metaval "$dm" tokens_cached)" = "1000" ] && [ "$(metaval "$dm" model)" = "Claude Opus 4.8" ]; then
+  pass "parse_usage devin: sums per-step tokens + model from --export ATIF"
+else
+  fail "devin export parse wrong (in=$(metaval "$dm" tokens_in) out=$(metaval "$dm" tokens_out) cached=$(metaval "$dm" tokens_cached) model=$(metaval "$dm" model))"
+fi
+
 # ─────────────────────────────────────────────────────────────────────
 # Phase 1 — Slice 2 cases (profiles + deps/DAG, single host).
 # Uses a temp profile catalog (AGENT_QUEUE_PROFILES) + temp git repos.