feat(agent-queue): extract Devin token usage from the conversation export

Devin does not surface token/cost in its stdout or local log, so parse_usage
previously emitted nothing for the devin engine (runs showed no metrics). Devin
DOES expose per-step usage in its ATIF conversation export.

- build_agent_cmd: pass `--export <path>` for the devin engine (path derived from
  the job log path so parse_usage can find it; harmless 4th arg for other engines).
- parse_usage devin: read the export and sum per-step metadata.metrics
  input_tokens / output_tokens / cache_read_tokens; take model from agent.model_name.
  Pure grep/awk, no new dependency. USD cost is left unset (the export carries token
  counts but not cost) — the dashboard shows tokens + model, cost stays blank.

These feed fleet_report_insights, so live devin fleet runs now report tokens +
model to the coordinator (verified live: model "Claude Opus 4.8", tokensIn/out +
cache populated on a real run).

selftest: +1 case (parse_usage devin sums per-step tokens + model from --export).
Full self-test PASS.

Generated with [Devin](https://cli.devin.ai/docs)

Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com>
This commit is contained in:
saravanakumardb1 2026-05-31 02:55:08 -07:00
parent 1a28fd541f
commit 8085501506
2 changed files with 55 additions and 3 deletions

View File

@ -602,12 +602,15 @@ scope_check() {
# frontmatter-STRIPPED body file, so a body starting with '--' is never
# misparsed as a CLI option.
build_agent_cmd() {
local engine=$1 pf=$2 yolo=$3
local engine=$1 pf=$2 yolo=$3 exportpath=${4:-}
AGENT_CMD=(); AGENT_STDIN=""
case "$engine" in
devin)
AGENT_CMD=( "$DEVIN_BIN" -p --prompt-file "$pf" )
[[ "$yolo" == "true" ]] && AGENT_CMD+=( --permission-mode dangerous )
# Export the conversation (ATIF) so parse_usage can read per-step token
# metrics — Devin does not expose usage in its stdout/local log otherwise.
[[ -n "$exportpath" ]] && AGENT_CMD+=( --export "$exportpath" )
;;
claude)
AGENT_CMD=( "$CLAUDE_BIN" -p )
@ -708,7 +711,10 @@ run_worker() {
echo "profile: injected persona overlay from '$prof'" >> "$logf"
fi
fi
build_agent_cmd "$engine" "$bodyf" "$yolo"
# Devin conversation export path (derived from the log path so parse_usage can
# find it). Harmless for other engines (they ignore the 4th arg).
local devin_export="${logf%.log}.devin-export.json"
build_agent_cmd "$engine" "$bodyf" "$yolo" "$devin_export"
# ── WIP checkpoint setup (§25.2): on a git cwd, create/checkout aq/wip/<job>
# so partial work survives a crash; a trap guarantees a checkpoint on EVERY
@ -1084,7 +1090,24 @@ parse_usage() {
[[ -n "$ti" ]] && echo "tokens_in=$ti"
[[ -n "$to" ]] && echo "tokens_out=$to"
;;
devin) : ;; # TODO: Devin session metrics are exposed via API, not the local log.
devin)
# Devin exposes usage in its ATIF conversation export (--export), not stdout.
# build_agent_cmd writes it next to the log; sum per-step token metrics.
local exp="${log%.log}.devin-export.json"
if [[ -f "$exp" ]]; then
local dti dto dtc dmodel
dti=$(grep -oE '"input_tokens"[[:space:]]*:[[:space:]]*[0-9]+' "$exp" | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')
dto=$(grep -oE '"output_tokens"[[:space:]]*:[[:space:]]*[0-9]+' "$exp" | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')
dtc=$(grep -oE '"cache_read_tokens"[[:space:]]*:[[:space:]]*[0-9]+' "$exp" | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')
dmodel=$(grep -oE '"model_name"[[:space:]]*:[[:space:]]*"[^"]+"' "$exp" | head -1 | sed -E 's/.*:[[:space:]]*"([^"]+)".*/\1/')
[[ -n "$dmodel" ]] && echo "model=$dmodel"
[[ "${dti:-0}" -gt 0 ]] && echo "tokens_in=$dti"
[[ "${dto:-0}" -gt 0 ]] && echo "tokens_out=$dto"
[[ "${dtc:-0}" -gt 0 ]] && echo "tokens_cached=$dtc"
# NOTE: Devin's export carries token counts but not USD cost; cost_usd is
# left unset (the dashboard shows tokens + model; cost stays blank).
fi
;;
copilot) : ;; # TODO: GitHub Copilot CLI usage format not yet documented here.
esac
return 0

View File

@ -459,6 +459,35 @@ else
printf '%s\n' "$out" >&2; fail "insights aggregate rollup missing/incorrect"
fi
# 18b. devin usage: parse_usage sums per-step token metrics from the --export ATIF
# conversation (Devin exposes usage there, not in stdout). A fake devin engine
# writes the export to the --export path build_agent_cmd passes.
devexportstub="$tmp/devin-export-engine"
cat > "$devexportstub" <<'STUBEOF'
#!/usr/bin/env bash
exp=""; while [ $# -gt 0 ]; do [ "$1" = "--export" ] && exp="$2"; shift; done
if [ -n "$exp" ]; then
cat > "$exp" <<'JSON'
{"schema_version":"ATIF-v1.4","agent":{"model_name":"Claude Opus 4.8"},"steps":[
{"metadata":{"metrics":{"input_tokens":1200,"output_tokens":300,"cache_read_tokens":900}}},
{"metadata":{"metrics":{"input_tokens":800,"output_tokens":200,"cache_read_tokens":100}}}]}
JSON
fi
echo "done"; exit 0
STUBEOF
chmod +x "$devexportstub"
"$AQ" init >/dev/null
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# devin usage task' \
> "$AGENT_QUEUE_ROOT/inbox/devusage.md"
DEVIN_BIN="$devexportstub" "$AQ" run --once >/dev/null 2>&1
dm="$AGENT_QUEUE_ROOT/.state/devusage.meta"
if [ "$(metaval "$dm" tokens_in)" = "2000" ] && [ "$(metaval "$dm" tokens_out)" = "500" ] \
&& [ "$(metaval "$dm" tokens_cached)" = "1000" ] && [ "$(metaval "$dm" model)" = "Claude Opus 4.8" ]; then
pass "parse_usage devin: sums per-step tokens + model from --export ATIF"
else
fail "devin export parse wrong (in=$(metaval "$dm" tokens_in) out=$(metaval "$dm" tokens_out) cached=$(metaval "$dm" tokens_cached) model=$(metaval "$dm" model))"
fi
# ─────────────────────────────────────────────────────────────────────
# Phase 1 — Slice 2 cases (profiles + deps/DAG, single host).
# Uses a temp profile catalog (AGENT_QUEUE_PROFILES) + temp git repos.