#!/usr/bin/env bash # # agent-queue — a folder-based "kanban" runner for headless coding-agent CLIs. # # Drop a prompt .md file into queue/inbox/, and `agent-queue run` will: # 1. pick the oldest file (respecting --max concurrency), # 2. move it inbox/ -> building/, # 3. launch the chosen agent CLI (devin | claude | codex) in --yolo mode, # 4. on agent rc=0 move building/ -> review/, then run the auto-QA verify gate: # verify pass -> testing/ verify fail -> failed/ (no verify -> stays in review/) # 5. on agent failure/timeout move building/ -> failed/, # 6. you manually `ship` testing/ -> shipped/ (the human gate), # 7. write a per-job log + live state so `status`/`watch` can show progress. # # Lifecycle: inbox -> building -> review -> testing -> shipped (+ failed) # # Per-task config travels in YAML-ish frontmatter at the top of the .md: # --- # engine: devin # devin | claude | codex (default: $DEFAULT_ENGINE) # cwd: /abs/path/repo # where the agent runs (default: $PWD when added) # yolo: true # auto-approve all tools (default: true) # --- # # Subcommands: init | add | run | status | watch | dash | stop | logs | # promote | ship | reject | requeue | clean | help # set -uo pipefail # ── Resolve paths ─────────────────────────────────────────────────── SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" QUEUE_ROOT="${AGENT_QUEUE_ROOT:-$SCRIPT_DIR/queue}" INBOX="$QUEUE_ROOT/inbox" BUILDING="$QUEUE_ROOT/building" REVIEW="$QUEUE_ROOT/review" TESTING="$QUEUE_ROOT/testing" SHIPPED="$QUEUE_ROOT/shipped" FAILED="$QUEUE_ROOT/failed" LOGS="$QUEUE_ROOT/logs" STATE="$QUEUE_ROOT/.state" LOCKS="$QUEUE_ROOT/locks" # ── Config (env-overridable) ──────────────────────────────────────── MAX_CONCURRENCY="${AGENT_QUEUE_MAX:-3}" DEFAULT_ENGINE="${AGENT_QUEUE_ENGINE:-devin}" POLL_SECONDS="${AGENT_QUEUE_POLL:-3}" # A running worker is flagged "stalled" if its log has not changed in this many # minutes (no new agent output) — surfaced in status + dash. STALL_MIN="${AGENT_QUEUE_STALL_MIN:-10}" # Auto-QA verify command. After an agent exits 0 the job lands in review/; if a # verify command is set (frontmatter `verify:` overrides this default) it runs in # the job's cwd: pass -> testing/ (QA), fail -> failed/. Empty default = jobs park # in review/ for manual `promote`. Shipping (testing -> shipped) is always manual. DEFAULT_VERIFY="${AGENT_QUEUE_VERIFY:-}" # flock is used for cross-process lock hardening when available (Linux). macOS # has no flock; mutual exclusion there relies on the single run-loop (see cmd_run). FLOCK_BIN="${FLOCK_BIN:-$(command -v flock || true)}" # timeout/gtimeout give hard process-tree kills for per-job timeouts; if absent # (stock macOS) a pure-bash watchdog is used as a best-effort fallback. TIMEOUT_BIN="${TIMEOUT_BIN:-$(command -v timeout || command -v gtimeout || true)}" DEVIN_BIN="${DEVIN_BIN:-$(command -v devin || echo "$HOME/.local/bin/devin")}" CLAUDE_BIN="${CLAUDE_BIN:-$(command -v claude || echo claude)}" CODEX_BIN="${CODEX_BIN:-$(command -v codex || echo codex)}" # ── Colors ────────────────────────────────────────────────────────── if [[ -t 1 ]]; then C_RESET=$'\033[0m'; C_DIM=$'\033[2m'; C_BOLD=$'\033[1m' C_BLUE=$'\033[34m'; C_GREEN=$'\033[32m'; C_RED=$'\033[31m'; C_YEL=$'\033[33m'; C_CYAN=$'\033[36m' else C_RESET=""; C_DIM=""; C_BOLD=""; C_BLUE=""; C_GREEN=""; C_RED=""; C_YEL=""; C_CYAN="" fi log() { printf '%s[agent-queue]%s %s\n' "$C_CYAN" "$C_RESET" "$*"; } err() { printf '%s[agent-queue]%s %s\n' "$C_RED" "$C_RESET" "$*" >&2; } die() { err "$*"; exit 1; } # ── Init ──────────────────────────────────────────────────────────── ensure_dirs() { mkdir -p "$INBOX" "$BUILDING" "$REVIEW" "$TESTING" "$SHIPPED" "$FAILED" "$LOGS" "$STATE" "$LOCKS"; } # ── Frontmatter parsing ───────────────────────────────────────────── # fm_get fm_get() { local file=$1 key=$2 def=${3:-} local val # only scan a leading --- ... --- block val=$(awk -v k="$key" ' NR==1 && $0!="---" { exit } NR==1 { infm=1; next } infm && $0=="---" { exit } infm { line=$0 sub(/^[ \t]*/,"",line) if (line ~ "^" k "[ \t]*:") { sub("^" k "[ \t]*:[ \t]*","",line) gsub(/^["'\''[:space:]]+|["'\''[:space:]]+$/,"",line) print line; exit } }' "$file" 2>/dev/null) [[ -n "$val" ]] && printf '%s' "$val" || printf '%s' "$def" } # strip_frontmatter -> prints the body (everything after a leading ---..--- block) strip_frontmatter() { awk 'NR==1 && $0=="---" { infm=1; next } infm && $0=="---" { infm=0; next } { if (!infm) print }' "$1" } # lock_key_for -> the mutual-exclusion key for a job: frontmatter `lock:` # if set, otherwise the cwd. Jobs sharing a key never run concurrently. lock_key_for() { local f=$1 k k=$(fm_get "$f" lock "") [[ -n "$k" ]] && { printf '%s' "$k"; return; } fm_get "$f" cwd "$PWD" } # _keyhash -> stable filename-safe token for a lock key _keyhash() { printf '%s' "$1" | cksum | awk '{print $1}'; } # _mtime -> file modification time in epoch seconds (BSD or GNU stat); empty if missing _mtime() { [[ -e "$1" ]] || { echo ""; return; } stat -f %m "$1" 2>/dev/null || stat -c %Y "$1" 2>/dev/null || echo "" } # _pidstart -> the process start time as reported by ps (whitespace-normalized). # Used as an identity token so a recycled pid is never mistaken for our worker. _pidstart() { ps -o lstart= -p "$1" 2>/dev/null | awk '{$1=$1;print}'; } # _pid_alive -> 0 if the pid is live AND (when a start time was # recorded) its current start time still matches — defeating pid reuse. _pid_alive() { local pid=$1 want=$2 cur [[ -n "$pid" ]] || return 1 kill -0 "$pid" 2>/dev/null || return 1 [[ -z "$want" ]] && return 0 cur=$(_pidstart "$pid") [[ "$cur" == "$want" ]] } # _dur_to_secs -> seconds. Accepts 90, 90s, 45m, 2h, 1d. Invalid/empty -> 0. _dur_to_secs() { local d=$1 [[ -z "$d" || "$d" == "0" ]] && { echo 0; return; } if [[ "$d" =~ ^([0-9]+)([smhd]?)$ ]]; then local n=${BASH_REMATCH[1]} u=${BASH_REMATCH[2]} case "$u" in ""|s) echo "$n";; m) echo $((n*60));; h) echo $((n*3600));; d) echo $((n*86400));; esac else echo 0 fi } # _meta_active -> 0 if the job is occupying a concurrency slot. # Active = no `ended=` AND (pid is live, OR pid not yet written but the meta was # created moments ago — the reserved-slot window between meta-write and launch). # The <30s guard prevents a meta orphaned mid-launch (daemon killed in the gap) # from pinning a slot forever. _meta_active() { local f=$1 pid mt age grep -q '^ended=' "$f" && return 1 pid=$(grep '^pid=' "$f" | head -1 | cut -d= -f2) if [[ -n "$pid" ]]; then local pidstart; pidstart=$(grep '^pidstart=' "$f" | head -1 | cut -d= -f2-) _pid_alive "$pid" "$pidstart" return $? fi mt=$(_mtime "$f"); age=$(( $(date +%s) - ${mt:-0} )) [[ "$age" -lt 30 ]] } # active_workers -> count of jobs occupying a concurrency slot (reservation-aware). active_workers() { local n=0 f for f in "$STATE"/*.meta; do [[ -e "$f" ]] || continue _meta_active "$f" && n=$((n+1)) done echo "$n" } # busy_keys -> newline list of lock keys currently held by active workers. busy_keys() { local f for f in "$STATE"/*.meta; do [[ -e "$f" ]] || continue _meta_active "$f" && grep '^lock=' "$f" | head -1 | cut -d= -f2- done } # ── Engine driver: builds argv into AGENT_CMD[]; sets AGENT_STDIN if the ── # prompt should be fed on stdin (claude/codex) rather than a flag. $pf is the # frontmatter-STRIPPED body file, so a body starting with '--' is never # misparsed as a CLI option. build_agent_cmd() { local engine=$1 pf=$2 yolo=$3 AGENT_CMD=(); AGENT_STDIN="" case "$engine" in devin) AGENT_CMD=( "$DEVIN_BIN" -p --prompt-file "$pf" ) [[ "$yolo" == "true" ]] && AGENT_CMD+=( --permission-mode dangerous ) ;; claude) AGENT_CMD=( "$CLAUDE_BIN" -p ) [[ "$yolo" == "true" ]] && AGENT_CMD+=( --dangerously-skip-permissions ) AGENT_STDIN="$pf" ;; codex) AGENT_CMD=( "$CODEX_BIN" exec ) [[ "$yolo" == "true" ]] && AGENT_CMD+=( --dangerously-bypass-approvals-and-sandbox ) AGENT_STDIN="$pf" ;; *) die "unknown engine '$engine' (use: devin | claude | codex)";; esac } # ── Worker: runs one job to completion (invoked in background) ─────── run_worker() { local doing_file=$1 local job; job=$(basename "$doing_file") job=${job%.md} local engine cwd yolo logf metaf engine=$(fm_get "$doing_file" engine "$DEFAULT_ENGINE") cwd=$(fm_get "$doing_file" cwd "$PWD") yolo=$(fm_get "$doing_file" yolo "true") logf="$LOGS/$job.log" metaf="$STATE/$job.meta" # NOTE: the parent (cmd_run) creates $metaf with job/engine/cwd/started/pid. # The worker only ever APPENDS (ended/exit/result) to avoid a truncation race. { echo "===== agent-queue job: $job =====" echo "engine=$engine cwd=$cwd yolo=$yolo" echo "started: $(date)" echo "=================================" } >> "$logf" if [[ ! -d "$cwd" ]]; then echo "FATAL: cwd does not exist: $cwd" >> "$logf" mv "$doing_file" "$FAILED/" 2>/dev/null echo "result=failed" >> "$metaf"; echo "ended=$(date +%s)" >> "$metaf" return 1 fi # Strip our frontmatter so the agent only sees the task body. local bodyf="$STATE/$job.body.md" strip_frontmatter "$doing_file" > "$bodyf" build_agent_cmd "$engine" "$bodyf" "$yolo" _run_agent() { if [[ -n "$AGENT_STDIN" ]]; then ( cd "$cwd" && "${AGENT_CMD[@]}" < "$AGENT_STDIN" ) else ( cd "$cwd" && "${AGENT_CMD[@]}" ) fi } local rc=0 lockkey tmo timed_out=false lockkey=$(lock_key_for "$doing_file") tmo=$(_dur_to_secs "$(fm_get "$doing_file" timeout "0")") local tmo_flag="$STATE/$job.timedout"; rm -f "$tmo_flag" local lf="$LOCKS/$(_keyhash "$lockkey").lock" if [[ "$tmo" -gt 0 && -n "$TIMEOUT_BIN" ]]; then # Hard timeout via timeout/gtimeout (kills the whole process tree). AQ_STDIN="$AGENT_STDIN" "$TIMEOUT_BIN" -k 5 "${tmo}s" bash -c ' cd "$1" || exit 97; shift if [ -n "${AQ_STDIN:-}" ]; then exec "$@" < "$AQ_STDIN"; else exec "$@"; fi ' _ "$cwd" "${AGENT_CMD[@]}" >> "$logf" 2>&1 rc=$? [[ $rc -eq 124 ]] && timed_out=true elif [[ "$tmo" -gt 0 ]]; then # Portable watchdog fallback (no timeout binary). Flags the timeout and # signals the worker; install coreutils (gtimeout) for hard tree kills. _run_agent >> "$logf" 2>&1 & local apid=$! ( sleep "$tmo"; : > "$tmo_flag" pkill -TERM -P "$apid" 2>/dev/null; kill -TERM "$apid" 2>/dev/null sleep 5; pkill -KILL -P "$apid" 2>/dev/null; kill -KILL "$apid" 2>/dev/null ) & local wpid=$! wait "$apid" 2>/dev/null; rc=$? kill "$wpid" 2>/dev/null; wait "$wpid" 2>/dev/null [[ -f "$tmo_flag" ]] && timed_out=true elif [[ -n "$FLOCK_BIN" ]]; then # Cross-process hardening where flock exists (Linux CI). The single run-loop # already serializes by lock key; this guards against a stray second launcher. ( "$FLOCK_BIN" -n 9 || exit 75; _run_agent ) 9>"$lf" >> "$logf" 2>&1 rc=$? if [[ $rc -eq 75 ]]; then echo "lock busy (key=$lockkey) — requeued to inbox" >> "$logf" mv "$doing_file" "$INBOX/" 2>/dev/null { echo "ended=$(date +%s)"; echo "result=requeued"; } >> "$metaf" return 0 fi else _run_agent >> "$logf" 2>&1 rc=$? fi rm -f "$tmo_flag" echo "exit=$rc" >> "$metaf" if $timed_out; then mv "$doing_file" "$FAILED/" 2>/dev/null echo "result=timeout" >> "$metaf" echo "ended=$(date +%s)" >> "$metaf" echo "TIMED OUT after ${tmo}s (rc=$rc): $(date)" >> "$logf" elif [[ $rc -eq 0 ]]; then # Agent succeeded: land in review/, then run the auto-QA verify gate. The # worker is still alive here so the concurrency slot stays held through # verification — `ended=` is written only once we reach a resting stage. mv "$doing_file" "$REVIEW/" 2>/dev/null local review_file="$REVIEW/$job.md" echo "completed OK (rc=0): landed in review — $(date)" >> "$logf" local verify; verify=$(fm_get "$review_file" verify "$DEFAULT_VERIFY") if [[ -z "$verify" ]]; then echo "result=review" >> "$metaf" echo "ended=$(date +%s)" >> "$metaf" echo "no verify command — parked in review for manual promote: $(date)" >> "$logf" else echo "----- verify: $verify -----" >> "$logf" local vrc=0 ( cd "$cwd" && bash -c "$verify" ) >> "$logf" 2>&1 || vrc=$? echo "verify_exit=$vrc" >> "$metaf" if [[ $vrc -eq 0 ]]; then mv "$review_file" "$TESTING/" 2>/dev/null echo "result=testing" >> "$metaf" echo "ended=$(date +%s)" >> "$metaf" echo "VERIFY PASSED — promoted to testing (QA): $(date)" >> "$logf" else mv "$review_file" "$FAILED/" 2>/dev/null echo "result=verify_failed" >> "$metaf" echo "ended=$(date +%s)" >> "$metaf" echo "VERIFY FAILED (rc=$vrc): $(date)" >> "$logf" fi fi else mv "$doing_file" "$FAILED/" 2>/dev/null echo "result=failed" >> "$metaf" echo "ended=$(date +%s)" >> "$metaf" echo "FAILED (rc=$rc): $(date)" >> "$logf" fi } # ── Commands ──────────────────────────────────────────────────────── cmd_init() { ensure_dirs; log "queue initialized at $C_BOLD$QUEUE_ROOT$C_RESET"; } cmd_add() { ensure_dirs local file="" engine="" cwd="" yolo="" while [[ $# -gt 0 ]]; do case "$1" in --engine) engine=$2; shift 2;; --cwd) cwd=$2; shift 2;; --yolo) yolo=true; shift;; --no-yolo) yolo=false; shift;; *) file=$1; shift;; esac done [[ -n "$file" && -f "$file" ]] || die "usage: add [--engine devin|claude|codex] [--cwd PATH] [--yolo|--no-yolo]" local base; base=$(basename "$file") local stamp; stamp=$(date +%Y%m%d-%H%M%S) local dest="$INBOX/${stamp}__${base}" # If user passed flags AND the file has no frontmatter, inject one. if [[ -n "$engine$cwd$yolo" ]] && [[ "$(head -1 "$file")" != "---" ]]; then { echo "---" echo "engine: ${engine:-$DEFAULT_ENGINE}" echo "cwd: ${cwd:-$PWD}" echo "yolo: ${yolo:-true}" echo "---" echo cat "$file" } > "$dest" else cp "$file" "$dest" fi log "queued $C_BOLD$(basename "$dest")$C_RESET (engine=$(fm_get "$dest" engine "$DEFAULT_ENGINE"), cwd=$(fm_get "$dest" cwd "$PWD"))" } cmd_run() { ensure_dirs local once=false while [[ $# -gt 0 ]]; do case "$1" in --max) MAX_CONCURRENCY=$2; shift 2;; --engine) DEFAULT_ENGINE=$2; shift 2;; --once|--drain) once=true; shift;; *) die "run: unknown arg '$1'";; esac done # Refuse to start a second run loop against the same queue — two daemons would # break the single-launcher invariant that per-cwd locking relies on. local dpid="" [[ -f "$STATE/daemon.pid" ]] && dpid=$(cat "$STATE/daemon.pid" 2>/dev/null) if [[ -n "$dpid" ]] && kill -0 "$dpid" 2>/dev/null; then die "a run loop is already active (pid $dpid). Use 'stop' first, or a different AGENT_QUEUE_ROOT." fi [[ -n "$dpid" ]] && log "clearing stale daemon.pid ($dpid)" echo "$$" > "$STATE/daemon.pid" trap 'rm -f "$STATE/daemon.pid"; log "run loop stopped"; exit 0' INT TERM log "run loop started (max=$MAX_CONCURRENCY, default engine=$DEFAULT_ENGINE). Ctrl-C to stop." while true; do local running; running=$(active_workers) # launch jobs while we have capacity and an eligible inbox file while [[ "$running" -lt "$MAX_CONCURRENCY" ]]; do # pick the oldest inbox file whose lock key is not currently busy, so two # jobs sharing a cwd (or `lock:` key) never run at once, regardless of --max. local busy; busy=$(busy_keys) local next="" cand cand_key while IFS= read -r cand; do [[ -n "$cand" ]] || continue cand_key=$(lock_key_for "$cand") if printf '%s\n' "$busy" | grep -qxF -- "$cand_key"; then continue; fi next="$cand"; break done < <(ls -1 "$INBOX"/*.md 2>/dev/null | sort) [[ -z "$next" ]] && break local job; job=$(basename "$next"); job=${job%.md} local doing_file="$BUILDING/$(basename "$next")" mv "$next" "$doing_file" local w_eng w_cwd w_yolo w_key w_eng=$(fm_get "$doing_file" engine "$DEFAULT_ENGINE") w_cwd=$(fm_get "$doing_file" cwd "$PWD") w_yolo=$(fm_get "$doing_file" yolo "true") w_key=$(lock_key_for "$doing_file") # write meta BEFORE launch (no pid yet), then append the worker pid from $! { echo "job=$job" echo "engine=$w_eng" echo "cwd=$w_cwd" echo "yolo=$w_yolo" echo "lock=$w_key" echo "started=$(date +%s)" } > "$STATE/$job.meta" run_worker "$doing_file" & { echo "pid=$!"; echo "pidstart=$(_pidstart "$!")"; } >> "$STATE/$job.meta" log "▶ launching $C_BOLD$job$C_RESET (engine=$w_eng, lock=$w_key)" sleep 1 running=$(active_workers) done if $once; then [[ "$(active_workers)" -eq 0 && -z "$(ls -1 "$INBOX"/*.md 2>/dev/null)" ]] && { log "drain complete — inbox empty, no workers running"; rm -f "$STATE/daemon.pid"; exit 0; } fi sleep "$POLL_SECONDS" done } _count() { ls -1 "$1"/*.md 2>/dev/null | wc -l | tr -d ' '; } cmd_status() { ensure_dirs local ib bd rv ts sh fl ib=$(_count "$INBOX"); bd=$(_count "$BUILDING"); rv=$(_count "$REVIEW") ts=$(_count "$TESTING"); sh=$(_count "$SHIPPED"); fl=$(_count "$FAILED") local running; running=$(active_workers) echo printf '%s AGENT QUEUE %s %s\n' "$C_BOLD" "$C_DIM$QUEUE_ROOT$C_RESET" "" printf ' %sinbox%s %-3s %sbuilding%s %-3s %sreview%s %-3s %stesting%s %-3s %sshipped%s %-3s %sfailed%s %-3s %srunning%s %s/%s\n\n' \ "$C_BLUE" "$C_RESET" "$ib" "$C_YEL" "$C_RESET" "$bd" \ "$C_CYAN" "$C_RESET" "$rv" "$C_CYAN" "$C_RESET" "$ts" \ "$C_GREEN" "$C_RESET" "$sh" "$C_RED" "$C_RESET" "$fl" \ "$C_BOLD" "$C_RESET" "$running" "$MAX_CONCURRENCY" # running table local f local printed=false for f in "$STATE"/*.meta; do [[ -e "$f" ]] || continue grep -q '^ended=' "$f" && continue local pid pidstart; pid=$(grep '^pid=' "$f" | cut -d= -f2); pidstart=$(grep '^pidstart=' "$f" | cut -d= -f2-) _pid_alive "$pid" "$pidstart" || continue if ! $printed; then printf ' %sRUNNING%s\n' "$C_BOLD" "$C_RESET"; printed=true; fi local job eng start now el last lmt age stall="" job=$(grep '^job=' "$f" | cut -d= -f2) eng=$(grep '^engine=' "$f" | cut -d= -f2) start=$(grep '^started=' "$f" | cut -d= -f2) now=$(date +%s); el=$(( now - ${start:-$now} )) last=$(tail -n 1 "$LOGS/$job.log" 2>/dev/null | cut -c1-60) lmt=$(_mtime "$LOGS/$job.log"); age=$(( now - ${lmt:-$now} )) [[ "$age" -gt $(( STALL_MIN * 60 )) ]] && stall="${C_RED}⚠ stalled${C_RESET} " printf ' %s%-26s%s %-7s %3dm%02ds pid %-6s %s%s%s%s\n' \ "$C_BOLD" "$job" "$C_RESET" "$eng" $((el/60)) $((el%60)) "$pid" "$stall" "$C_DIM" "$last" "$C_RESET" done $printed || printf ' %sno workers running%s\n' "$C_DIM" "$C_RESET" echo } cmd_watch() { local interval="${1:-2}" while true; do clear; cmd_status; sleep "$interval"; done } cmd_dash() { command -v node >/dev/null 2>&1 || die "node not found — use 'watch' for the bash status view" AGENT_QUEUE_ROOT="$QUEUE_ROOT" exec node "$SCRIPT_DIR/dashboard.mjs" "$@" } cmd_stop() { ensure_dirs local killed=0 f pid pidstart for f in "$STATE"/*.meta; do [[ -e "$f" ]] || continue grep -q '^ended=' "$f" && continue pid=$(grep '^pid=' "$f" | cut -d= -f2); pidstart=$(grep '^pidstart=' "$f" | cut -d= -f2-) _pid_alive "$pid" "$pidstart" && { kill "$pid" 2>/dev/null && killed=$((killed+1)); } done [[ -f "$STATE/daemon.pid" ]] && kill "$(cat "$STATE/daemon.pid")" 2>/dev/null rm -f "$STATE/daemon.pid" log "stopped $killed running worker(s) + run loop" } cmd_logs() { local job="${1:-}" follow="" [[ "${2:-}" == "-f" || "$job" == "-f" ]] && follow="-f" [[ "$job" == "-f" ]] && job="${2:-}" [[ -n "$job" ]] || die "usage: logs [-f]" local lf="$LOGS/$job.log" [[ -f "$lf" ]] || lf=$(ls -1t "$LOGS"/*"$job"*.log 2>/dev/null | head -1) [[ -f "$lf" ]] || die "no log found for '$job'" if [[ -n "$follow" ]]; then tail -f "$lf"; else cat "$lf"; fi } # _find_job — echo the first matching .md across the given dirs # (exact ".md" preferred, else newest fuzzy match). Empty if none found. _find_job() { local job=$1; shift local d f for d in "$@"; do [[ -f "$d/$job.md" ]] && { printf '%s' "$d/$job.md"; return; } done for d in "$@"; do f=$(ls -1t "$d"/*"$job"*.md 2>/dev/null | head -1) [[ -f "$f" ]] && { printf '%s' "$f"; return; } done } # requeue — move a job back to inbox/ for a fresh run (from failed/review/testing). cmd_requeue() { ensure_dirs local job="${1:-}" [[ -n "$job" ]] || die "usage: requeue " local f; f=$(_find_job "$job" "$FAILED" "$REVIEW" "$TESTING") [[ -n "$f" ]] || die "no failed/review/testing job matching '$job'" local base name from; base=$(basename "$f"); name=${base%.md}; from=$(basename "$(dirname "$f")") mv "$f" "$INBOX/$base" # drop stale state so it re-runs cleanly rm -f "$STATE/$name.meta" "$STATE/$name.body.md" "$STATE/$name.timedout" log "requeued $C_BOLD$base$C_RESET ($from → inbox)" } # ship — manual promotion testing/ (QA) → shipped/. The human gate. cmd_ship() { ensure_dirs local job="${1:-}" [[ -n "$job" ]] || die "usage: ship " local f; f=$(_find_job "$job" "$TESTING") [[ -n "$f" ]] || die "no job in testing/ matching '$job' (only QA-passed jobs can ship)" local base name; base=$(basename "$f"); name=${base%.md} mv "$f" "$SHIPPED/$base" [[ -f "$STATE/$name.meta" ]] && echo "result=shipped" >> "$STATE/$name.meta" log "shipped $C_BOLD$base$C_RESET (testing → shipped)" } # promote — advance one stage forward: review → testing → shipped. cmd_promote() { ensure_dirs local job="${1:-}" [[ -n "$job" ]] || die "usage: promote " local f; f=$(_find_job "$job" "$REVIEW" "$TESTING") [[ -n "$f" ]] || die "no job in review/ or testing/ matching '$job'" local base name from dest result; base=$(basename "$f"); name=${base%.md} from=$(basename "$(dirname "$f")") case "$from" in review) dest="$TESTING"; result="testing";; testing) dest="$SHIPPED"; result="shipped";; *) die "promote: '$base' is in '$from' — nothing to promote";; esac mv "$f" "$dest/$base" [[ -f "$STATE/$name.meta" ]] && echo "result=$result" >> "$STATE/$name.meta" log "promoted $C_BOLD$base$C_RESET ($from → $result)" } # reject — move a review/testing job to failed/ (manual gate rejection). cmd_reject() { ensure_dirs local job="${1:-}" [[ -n "$job" ]] || die "usage: reject " local f; f=$(_find_job "$job" "$REVIEW" "$TESTING") [[ -n "$f" ]] || die "no job in review/ or testing/ matching '$job'" local base name from; base=$(basename "$f"); name=${base%.md}; from=$(basename "$(dirname "$f")") mv "$f" "$FAILED/$base" [[ -f "$STATE/$name.meta" ]] && echo "result=rejected" >> "$STATE/$name.meta" log "rejected $C_BOLD$base$C_RESET ($from → failed)" } # clean [--keep N] — archive finished jobs' logs+meta beyond the newest N # (default 50) into queue/.archive//. Running jobs and the done/failed .md # kanban records are left untouched. cmd_clean() { ensure_dirs local keep=50 while [[ $# -gt 0 ]]; do case "$1" in --keep) keep=$2; shift 2;; *) die "clean: unknown arg '$1'";; esac done [[ "$keep" =~ ^[0-9]+$ ]] || die "clean: --keep must be a number" local arch="$QUEUE_ROOT/.archive/$(date +%Y%m%d-%H%M%S)" # finished metas (have ended=), newest-first by mtime local metas; metas=$(grep -l '^ended=' "$STATE"/*.meta 2>/dev/null \ | while IFS= read -r m; do printf '%s %s\n' "$(_mtime "$m")" "$m"; done \ | sort -rn | awk '{print $2}') local i=0 moved=0 m name while IFS= read -r m; do [[ -n "$m" ]] || continue i=$((i+1)) [[ "$i" -le "$keep" ]] && continue name=$(basename "$m"); name=${name%.meta} mkdir -p "$arch" mv "$m" "$arch/" 2>/dev/null [[ -f "$LOGS/$name.log" ]] && mv "$LOGS/$name.log" "$arch/" 2>/dev/null [[ -f "$STATE/$name.body.md" ]] && mv "$STATE/$name.body.md" "$arch/" 2>/dev/null moved=$((moved+1)) done <<< "$metas" if [[ "$moved" -gt 0 ]]; then log "archived $moved finished job(s) to $C_BOLD$arch$C_RESET (kept newest $keep)" else log "nothing to clean (≤$keep finished jobs)" fi } usage() { cat < [args] ${C_BOLD}COMMANDS${C_RESET} init create the queue/ folders add [opts] queue a prompt file into inbox/ --engine devin|claude|codex --cwd PATH --yolo | --no-yolo run [--max N] [--engine E] [--once] process inbox/ (foreground loop; Ctrl-C to stop) status show kanban counts + running workers watch [interval] live status (default 2s, bash) dash [--interval N] richer live Node dashboard (recent shipped/failed too) stop kill running workers + the run loop logs [-f] print (or follow) a job's log promote advance one stage (review → testing → shipped) ship manual gate: testing (QA) → shipped reject send a review/testing job to failed/ requeue move a failed/review/testing job back to inbox/ clean [--keep N] archive finished logs+meta beyond newest N (default 50) help this message ${C_BOLD}KANBAN${C_RESET} inbox → building → review → testing → shipped (+ failed; logs/ + .state/ alongside) auto: agent rc=0 → review; verify pass → testing; verify fail → failed manual: ship (testing → shipped) ${C_BOLD}TASK FRONTMATTER${C_RESET} (top of each .md) --- engine: devin cwd: /Users/you/code/repo yolo: true lock: my-repo # optional; defaults to cwd. Jobs sharing a key run serially timeout: 45m # optional; 90s|45m|2h|1d. On expiry -> failed (result=timeout) verify: pnpm -s test # optional; auto-QA gate. pass -> testing, fail -> failed --- ${C_BOLD}ENV${C_RESET} AGENT_QUEUE_ROOT (=$QUEUE_ROOT) AGENT_QUEUE_MAX (=$MAX_CONCURRENCY) AGENT_QUEUE_ENGINE (=$DEFAULT_ENGINE) AGENT_QUEUE_VERIFY (default verify cmd) DEVIN_BIN / CLAUDE_BIN / CODEX_BIN EOF } main() { local cmd="${1:-help}"; shift || true case "$cmd" in init) cmd_init "$@";; add) cmd_add "$@";; run) cmd_run "$@";; status) cmd_status "$@";; watch) cmd_watch "$@";; dash|dashboard) cmd_dash "$@";; stop) cmd_stop "$@";; logs) cmd_logs "$@";; promote) cmd_promote "$@";; ship) cmd_ship "$@";; reject) cmd_reject "$@";; requeue) cmd_requeue "$@";; clean) cmd_clean "$@";; help|-h|--help) usage;; *) err "unknown command: $cmd"; echo; usage; exit 1;; esac } main "$@"