feat(agent-queue): extract Devin token usage from the conversation export
Devin does not surface token/cost in its stdout or local log, so parse_usage previously emitted nothing for the devin engine (runs showed no metrics). Devin DOES expose per-step usage in its ATIF conversation export. - build_agent_cmd: pass `--export <path>` for the devin engine (path derived from the job log path so parse_usage can find it; harmless 4th arg for other engines). - parse_usage devin: read the export and sum per-step metadata.metrics input_tokens / output_tokens / cache_read_tokens; take model from agent.model_name. Pure grep/awk, no new dependency. USD cost is left unset (the export carries token counts but not cost) — the dashboard shows tokens + model, cost stays blank. These feed fleet_report_insights, so live devin fleet runs now report tokens + model to the coordinator (verified live: model "Claude Opus 4.8", tokensIn/out + cache populated on a real run). selftest: +1 case (parse_usage devin sums per-step tokens + model from --export). Full self-test PASS. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com>
This commit is contained in:
parent
1a28fd541f
commit
8085501506
@ -602,12 +602,15 @@ scope_check() {
|
||||
# frontmatter-STRIPPED body file, so a body starting with '--' is never
|
||||
# misparsed as a CLI option.
|
||||
build_agent_cmd() {
|
||||
local engine=$1 pf=$2 yolo=$3
|
||||
local engine=$1 pf=$2 yolo=$3 exportpath=${4:-}
|
||||
AGENT_CMD=(); AGENT_STDIN=""
|
||||
case "$engine" in
|
||||
devin)
|
||||
AGENT_CMD=( "$DEVIN_BIN" -p --prompt-file "$pf" )
|
||||
[[ "$yolo" == "true" ]] && AGENT_CMD+=( --permission-mode dangerous )
|
||||
# Export the conversation (ATIF) so parse_usage can read per-step token
|
||||
# metrics — Devin does not expose usage in its stdout/local log otherwise.
|
||||
[[ -n "$exportpath" ]] && AGENT_CMD+=( --export "$exportpath" )
|
||||
;;
|
||||
claude)
|
||||
AGENT_CMD=( "$CLAUDE_BIN" -p )
|
||||
@ -708,7 +711,10 @@ run_worker() {
|
||||
echo "profile: injected persona overlay from '$prof'" >> "$logf"
|
||||
fi
|
||||
fi
|
||||
build_agent_cmd "$engine" "$bodyf" "$yolo"
|
||||
# Devin conversation export path (derived from the log path so parse_usage can
|
||||
# find it). Harmless for other engines (they ignore the 4th arg).
|
||||
local devin_export="${logf%.log}.devin-export.json"
|
||||
build_agent_cmd "$engine" "$bodyf" "$yolo" "$devin_export"
|
||||
|
||||
# ── WIP checkpoint setup (§25.2): on a git cwd, create/checkout aq/wip/<job>
|
||||
# so partial work survives a crash; a trap guarantees a checkpoint on EVERY
|
||||
@ -1084,7 +1090,24 @@ parse_usage() {
|
||||
[[ -n "$ti" ]] && echo "tokens_in=$ti"
|
||||
[[ -n "$to" ]] && echo "tokens_out=$to"
|
||||
;;
|
||||
devin) : ;; # TODO: Devin session metrics are exposed via API, not the local log.
|
||||
devin)
|
||||
# Devin exposes usage in its ATIF conversation export (--export), not stdout.
|
||||
# build_agent_cmd writes it next to the log; sum per-step token metrics.
|
||||
local exp="${log%.log}.devin-export.json"
|
||||
if [[ -f "$exp" ]]; then
|
||||
local dti dto dtc dmodel
|
||||
dti=$(grep -oE '"input_tokens"[[:space:]]*:[[:space:]]*[0-9]+' "$exp" | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')
|
||||
dto=$(grep -oE '"output_tokens"[[:space:]]*:[[:space:]]*[0-9]+' "$exp" | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')
|
||||
dtc=$(grep -oE '"cache_read_tokens"[[:space:]]*:[[:space:]]*[0-9]+' "$exp" | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}')
|
||||
dmodel=$(grep -oE '"model_name"[[:space:]]*:[[:space:]]*"[^"]+"' "$exp" | head -1 | sed -E 's/.*:[[:space:]]*"([^"]+)".*/\1/')
|
||||
[[ -n "$dmodel" ]] && echo "model=$dmodel"
|
||||
[[ "${dti:-0}" -gt 0 ]] && echo "tokens_in=$dti"
|
||||
[[ "${dto:-0}" -gt 0 ]] && echo "tokens_out=$dto"
|
||||
[[ "${dtc:-0}" -gt 0 ]] && echo "tokens_cached=$dtc"
|
||||
# NOTE: Devin's export carries token counts but not USD cost; cost_usd is
|
||||
# left unset (the dashboard shows tokens + model; cost stays blank).
|
||||
fi
|
||||
;;
|
||||
copilot) : ;; # TODO: GitHub Copilot CLI usage format not yet documented here.
|
||||
esac
|
||||
return 0
|
||||
|
||||
@ -459,6 +459,35 @@ else
|
||||
printf '%s\n' "$out" >&2; fail "insights aggregate rollup missing/incorrect"
|
||||
fi
|
||||
|
||||
# 18b. devin usage: parse_usage sums per-step token metrics from the --export ATIF
|
||||
# conversation (Devin exposes usage there, not in stdout). A fake devin engine
|
||||
# writes the export to the --export path build_agent_cmd passes.
|
||||
devexportstub="$tmp/devin-export-engine"
|
||||
cat > "$devexportstub" <<'STUBEOF'
|
||||
#!/usr/bin/env bash
|
||||
exp=""; while [ $# -gt 0 ]; do [ "$1" = "--export" ] && exp="$2"; shift; done
|
||||
if [ -n "$exp" ]; then
|
||||
cat > "$exp" <<'JSON'
|
||||
{"schema_version":"ATIF-v1.4","agent":{"model_name":"Claude Opus 4.8"},"steps":[
|
||||
{"metadata":{"metrics":{"input_tokens":1200,"output_tokens":300,"cache_read_tokens":900}}},
|
||||
{"metadata":{"metrics":{"input_tokens":800,"output_tokens":200,"cache_read_tokens":100}}}]}
|
||||
JSON
|
||||
fi
|
||||
echo "done"; exit 0
|
||||
STUBEOF
|
||||
chmod +x "$devexportstub"
|
||||
"$AQ" init >/dev/null
|
||||
printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# devin usage task' \
|
||||
> "$AGENT_QUEUE_ROOT/inbox/devusage.md"
|
||||
DEVIN_BIN="$devexportstub" "$AQ" run --once >/dev/null 2>&1
|
||||
dm="$AGENT_QUEUE_ROOT/.state/devusage.meta"
|
||||
if [ "$(metaval "$dm" tokens_in)" = "2000" ] && [ "$(metaval "$dm" tokens_out)" = "500" ] \
|
||||
&& [ "$(metaval "$dm" tokens_cached)" = "1000" ] && [ "$(metaval "$dm" model)" = "Claude Opus 4.8" ]; then
|
||||
pass "parse_usage devin: sums per-step tokens + model from --export ATIF"
|
||||
else
|
||||
fail "devin export parse wrong (in=$(metaval "$dm" tokens_in) out=$(metaval "$dm" tokens_out) cached=$(metaval "$dm" tokens_cached) model=$(metaval "$dm" model))"
|
||||
fi
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Phase 1 — Slice 2 cases (profiles + deps/DAG, single host).
|
||||
# Uses a temp profile catalog (AGENT_QUEUE_PROFILES) + temp git repos.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user