diff --git a/agent-queue/agent-queue.sh b/agent-queue/agent-queue.sh index ce34934..8df7994 100755 --- a/agent-queue/agent-queue.sh +++ b/agent-queue/agent-queue.sh @@ -602,12 +602,15 @@ scope_check() { # frontmatter-STRIPPED body file, so a body starting with '--' is never # misparsed as a CLI option. build_agent_cmd() { - local engine=$1 pf=$2 yolo=$3 + local engine=$1 pf=$2 yolo=$3 exportpath=${4:-} AGENT_CMD=(); AGENT_STDIN="" case "$engine" in devin) AGENT_CMD=( "$DEVIN_BIN" -p --prompt-file "$pf" ) [[ "$yolo" == "true" ]] && AGENT_CMD+=( --permission-mode dangerous ) + # Export the conversation (ATIF) so parse_usage can read per-step token + # metrics — Devin does not expose usage in its stdout/local log otherwise. + [[ -n "$exportpath" ]] && AGENT_CMD+=( --export "$exportpath" ) ;; claude) AGENT_CMD=( "$CLAUDE_BIN" -p ) @@ -708,7 +711,10 @@ run_worker() { echo "profile: injected persona overlay from '$prof'" >> "$logf" fi fi - build_agent_cmd "$engine" "$bodyf" "$yolo" + # Devin conversation export path (derived from the log path so parse_usage can + # find it). Harmless for other engines (they ignore the 4th arg). + local devin_export="${logf%.log}.devin-export.json" + build_agent_cmd "$engine" "$bodyf" "$yolo" "$devin_export" # ── WIP checkpoint setup (§25.2): on a git cwd, create/checkout aq/wip/ # so partial work survives a crash; a trap guarantees a checkpoint on EVERY @@ -1084,7 +1090,24 @@ parse_usage() { [[ -n "$ti" ]] && echo "tokens_in=$ti" [[ -n "$to" ]] && echo "tokens_out=$to" ;; - devin) : ;; # TODO: Devin session metrics are exposed via API, not the local log. + devin) + # Devin exposes usage in its ATIF conversation export (--export), not stdout. + # build_agent_cmd writes it next to the log; sum per-step token metrics. + local exp="${log%.log}.devin-export.json" + if [[ -f "$exp" ]]; then + local dti dto dtc dmodel + dti=$(grep -oE '"input_tokens"[[:space:]]*:[[:space:]]*[0-9]+' "$exp" | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}') + dto=$(grep -oE '"output_tokens"[[:space:]]*:[[:space:]]*[0-9]+' "$exp" | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}') + dtc=$(grep -oE '"cache_read_tokens"[[:space:]]*:[[:space:]]*[0-9]+' "$exp" | grep -oE '[0-9]+' | awk '{s+=$1} END{print s+0}') + dmodel=$(grep -oE '"model_name"[[:space:]]*:[[:space:]]*"[^"]+"' "$exp" | head -1 | sed -E 's/.*:[[:space:]]*"([^"]+)".*/\1/') + [[ -n "$dmodel" ]] && echo "model=$dmodel" + [[ "${dti:-0}" -gt 0 ]] && echo "tokens_in=$dti" + [[ "${dto:-0}" -gt 0 ]] && echo "tokens_out=$dto" + [[ "${dtc:-0}" -gt 0 ]] && echo "tokens_cached=$dtc" + # NOTE: Devin's export carries token counts but not USD cost; cost_usd is + # left unset (the dashboard shows tokens + model; cost stays blank). + fi + ;; copilot) : ;; # TODO: GitHub Copilot CLI usage format not yet documented here. esac return 0 diff --git a/agent-queue/selftest.sh b/agent-queue/selftest.sh index 11b9da8..6a605cb 100755 --- a/agent-queue/selftest.sh +++ b/agent-queue/selftest.sh @@ -459,6 +459,35 @@ else printf '%s\n' "$out" >&2; fail "insights aggregate rollup missing/incorrect" fi +# 18b. devin usage: parse_usage sums per-step token metrics from the --export ATIF +# conversation (Devin exposes usage there, not in stdout). A fake devin engine +# writes the export to the --export path build_agent_cmd passes. +devexportstub="$tmp/devin-export-engine" +cat > "$devexportstub" <<'STUBEOF' +#!/usr/bin/env bash +exp=""; while [ $# -gt 0 ]; do [ "$1" = "--export" ] && exp="$2"; shift; done +if [ -n "$exp" ]; then + cat > "$exp" <<'JSON' +{"schema_version":"ATIF-v1.4","agent":{"model_name":"Claude Opus 4.8"},"steps":[ +{"metadata":{"metrics":{"input_tokens":1200,"output_tokens":300,"cache_read_tokens":900}}}, +{"metadata":{"metrics":{"input_tokens":800,"output_tokens":200,"cache_read_tokens":100}}}]} +JSON +fi +echo "done"; exit 0 +STUBEOF +chmod +x "$devexportstub" +"$AQ" init >/dev/null +printf '%s\n' '---' 'engine: devin' "cwd: $work" 'yolo: true' '---' '' '# devin usage task' \ + > "$AGENT_QUEUE_ROOT/inbox/devusage.md" +DEVIN_BIN="$devexportstub" "$AQ" run --once >/dev/null 2>&1 +dm="$AGENT_QUEUE_ROOT/.state/devusage.meta" +if [ "$(metaval "$dm" tokens_in)" = "2000" ] && [ "$(metaval "$dm" tokens_out)" = "500" ] \ + && [ "$(metaval "$dm" tokens_cached)" = "1000" ] && [ "$(metaval "$dm" model)" = "Claude Opus 4.8" ]; then + pass "parse_usage devin: sums per-step tokens + model from --export ATIF" +else + fail "devin export parse wrong (in=$(metaval "$dm" tokens_in) out=$(metaval "$dm" tokens_out) cached=$(metaval "$dm" tokens_cached) model=$(metaval "$dm" model))" +fi + # ───────────────────────────────────────────────────────────────────── # Phase 1 — Slice 2 cases (profiles + deps/DAG, single host). # Uses a temp profile catalog (AGENT_QUEUE_PROFILES) + temp git repos.