Mark a running worker '⚠ stalled' when its log has not changed for more than AGENT_QUEUE_STALL_MIN minutes (default 10), using log mtime as the freshness signal. Implemented in both the bash status table and the Node dashboard.
513 lines
19 KiB
Bash
Executable File
513 lines
19 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# agent-queue — a folder-based "kanban" runner for headless coding-agent CLIs.
|
|
#
|
|
# Drop a prompt .md file into queue/inbox/, and `agent-queue run` will:
|
|
# 1. pick the oldest file (respecting --max concurrency),
|
|
# 2. move it inbox/ -> doing/,
|
|
# 3. launch the chosen agent CLI (devin | claude | codex) in --yolo mode,
|
|
# 4. on success move doing/ -> done/, on failure -> failed/,
|
|
# 5. write a per-job log + live state so `status`/`watch` can show progress.
|
|
#
|
|
# Per-task config travels in YAML-ish frontmatter at the top of the .md:
|
|
# ---
|
|
# engine: devin # devin | claude | codex (default: $DEFAULT_ENGINE)
|
|
# cwd: /abs/path/repo # where the agent runs (default: $PWD when added)
|
|
# yolo: true # auto-approve all tools (default: true)
|
|
# ---
|
|
#
|
|
# Subcommands: init | add | run | status | watch | stop | logs | help
|
|
#
|
|
set -uo pipefail
|
|
|
|
# ── Resolve paths ───────────────────────────────────────────────────
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
QUEUE_ROOT="${AGENT_QUEUE_ROOT:-$SCRIPT_DIR/queue}"
|
|
INBOX="$QUEUE_ROOT/inbox"
|
|
DOING="$QUEUE_ROOT/doing"
|
|
DONE="$QUEUE_ROOT/done"
|
|
FAILED="$QUEUE_ROOT/failed"
|
|
LOGS="$QUEUE_ROOT/logs"
|
|
STATE="$QUEUE_ROOT/.state"
|
|
LOCKS="$QUEUE_ROOT/locks"
|
|
|
|
# ── Config (env-overridable) ────────────────────────────────────────
|
|
MAX_CONCURRENCY="${AGENT_QUEUE_MAX:-2}"
|
|
DEFAULT_ENGINE="${AGENT_QUEUE_ENGINE:-devin}"
|
|
POLL_SECONDS="${AGENT_QUEUE_POLL:-3}"
|
|
# A running worker is flagged "stalled" if its log has not changed in this many
|
|
# minutes (no new agent output) — surfaced in status + dash.
|
|
STALL_MIN="${AGENT_QUEUE_STALL_MIN:-10}"
|
|
|
|
# flock is used for cross-process lock hardening when available (Linux). macOS
|
|
# has no flock; mutual exclusion there relies on the single run-loop (see cmd_run).
|
|
FLOCK_BIN="${FLOCK_BIN:-$(command -v flock || true)}"
|
|
# timeout/gtimeout give hard process-tree kills for per-job timeouts; if absent
|
|
# (stock macOS) a pure-bash watchdog is used as a best-effort fallback.
|
|
TIMEOUT_BIN="${TIMEOUT_BIN:-$(command -v timeout || command -v gtimeout || true)}"
|
|
|
|
DEVIN_BIN="${DEVIN_BIN:-$(command -v devin || echo "$HOME/.local/bin/devin")}"
|
|
CLAUDE_BIN="${CLAUDE_BIN:-$(command -v claude || echo claude)}"
|
|
CODEX_BIN="${CODEX_BIN:-$(command -v codex || echo codex)}"
|
|
|
|
# ── Colors ──────────────────────────────────────────────────────────
|
|
if [[ -t 1 ]]; then
|
|
C_RESET=$'\033[0m'; C_DIM=$'\033[2m'; C_BOLD=$'\033[1m'
|
|
C_BLUE=$'\033[34m'; C_GREEN=$'\033[32m'; C_RED=$'\033[31m'; C_YEL=$'\033[33m'; C_CYAN=$'\033[36m'
|
|
else
|
|
C_RESET=""; C_DIM=""; C_BOLD=""; C_BLUE=""; C_GREEN=""; C_RED=""; C_YEL=""; C_CYAN=""
|
|
fi
|
|
|
|
log() { printf '%s[agent-queue]%s %s\n' "$C_CYAN" "$C_RESET" "$*"; }
|
|
err() { printf '%s[agent-queue]%s %s\n' "$C_RED" "$C_RESET" "$*" >&2; }
|
|
die() { err "$*"; exit 1; }
|
|
|
|
# ── Init ────────────────────────────────────────────────────────────
|
|
ensure_dirs() { mkdir -p "$INBOX" "$DOING" "$DONE" "$FAILED" "$LOGS" "$STATE" "$LOCKS"; }
|
|
|
|
# ── Frontmatter parsing ─────────────────────────────────────────────
|
|
# fm_get <file> <key> <default>
|
|
fm_get() {
|
|
local file=$1 key=$2 def=${3:-}
|
|
local val
|
|
# only scan a leading --- ... --- block
|
|
val=$(awk -v k="$key" '
|
|
NR==1 && $0!="---" { exit }
|
|
NR==1 { infm=1; next }
|
|
infm && $0=="---" { exit }
|
|
infm {
|
|
line=$0
|
|
sub(/^[ \t]*/,"",line)
|
|
if (line ~ "^" k "[ \t]*:") {
|
|
sub("^" k "[ \t]*:[ \t]*","",line)
|
|
gsub(/^["'\''[:space:]]+|["'\''[:space:]]+$/,"",line)
|
|
print line; exit
|
|
}
|
|
}' "$file" 2>/dev/null)
|
|
[[ -n "$val" ]] && printf '%s' "$val" || printf '%s' "$def"
|
|
}
|
|
|
|
# strip_frontmatter <file> -> prints the body (everything after a leading ---..--- block)
|
|
strip_frontmatter() {
|
|
awk 'NR==1 && $0=="---" { infm=1; next }
|
|
infm && $0=="---" { infm=0; next }
|
|
{ if (!infm) print }' "$1"
|
|
}
|
|
|
|
# lock_key_for <file> -> the mutual-exclusion key for a job: frontmatter `lock:`
|
|
# if set, otherwise the cwd. Jobs sharing a key never run concurrently.
|
|
lock_key_for() {
|
|
local f=$1 k
|
|
k=$(fm_get "$f" lock "")
|
|
[[ -n "$k" ]] && { printf '%s' "$k"; return; }
|
|
fm_get "$f" cwd "$PWD"
|
|
}
|
|
|
|
# _keyhash <key> -> stable filename-safe token for a lock key
|
|
_keyhash() { printf '%s' "$1" | cksum | awk '{print $1}'; }
|
|
|
|
# _mtime <file> -> file modification time in epoch seconds (BSD or GNU stat); empty if missing
|
|
_mtime() {
|
|
[[ -e "$1" ]] || { echo ""; return; }
|
|
stat -f %m "$1" 2>/dev/null || stat -c %Y "$1" 2>/dev/null || echo ""
|
|
}
|
|
|
|
# _dur_to_secs <dur> -> seconds. Accepts 90, 90s, 45m, 2h, 1d. Invalid/empty -> 0.
|
|
_dur_to_secs() {
|
|
local d=$1
|
|
[[ -z "$d" || "$d" == "0" ]] && { echo 0; return; }
|
|
if [[ "$d" =~ ^([0-9]+)([smhd]?)$ ]]; then
|
|
local n=${BASH_REMATCH[1]} u=${BASH_REMATCH[2]}
|
|
case "$u" in
|
|
""|s) echo "$n";;
|
|
m) echo $((n*60));;
|
|
h) echo $((n*3600));;
|
|
d) echo $((n*86400));;
|
|
esac
|
|
else
|
|
echo 0
|
|
fi
|
|
}
|
|
|
|
# busy_keys -> newline list of lock keys currently held by active workers.
|
|
# A worker is active if its meta has no `ended=` and its pid is live (or the pid
|
|
# has not been written yet, i.e. it was just launched and the slot is reserved).
|
|
busy_keys() {
|
|
local f pid
|
|
for f in "$STATE"/*.meta; do
|
|
[[ -e "$f" ]] || continue
|
|
grep -q '^ended=' "$f" && continue
|
|
pid=$(grep '^pid=' "$f" | head -1 | cut -d= -f2)
|
|
if [[ -z "$pid" ]] || kill -0 "$pid" 2>/dev/null; then
|
|
grep '^lock=' "$f" | head -1 | cut -d= -f2-
|
|
fi
|
|
done
|
|
}
|
|
|
|
# ── Engine driver: builds argv into AGENT_CMD[]; sets AGENT_STDIN if the ──
|
|
# prompt should be fed on stdin (claude/codex) rather than a flag. $pf is the
|
|
# frontmatter-STRIPPED body file, so a body starting with '--' is never
|
|
# misparsed as a CLI option.
|
|
build_agent_cmd() {
|
|
local engine=$1 pf=$2 yolo=$3
|
|
AGENT_CMD=(); AGENT_STDIN=""
|
|
case "$engine" in
|
|
devin)
|
|
AGENT_CMD=( "$DEVIN_BIN" -p --prompt-file "$pf" )
|
|
[[ "$yolo" == "true" ]] && AGENT_CMD+=( --permission-mode dangerous )
|
|
;;
|
|
claude)
|
|
AGENT_CMD=( "$CLAUDE_BIN" -p )
|
|
[[ "$yolo" == "true" ]] && AGENT_CMD+=( --dangerously-skip-permissions )
|
|
AGENT_STDIN="$pf"
|
|
;;
|
|
codex)
|
|
AGENT_CMD=( "$CODEX_BIN" exec )
|
|
[[ "$yolo" == "true" ]] && AGENT_CMD+=( --dangerously-bypass-approvals-and-sandbox )
|
|
AGENT_STDIN="$pf"
|
|
;;
|
|
*) die "unknown engine '$engine' (use: devin | claude | codex)";;
|
|
esac
|
|
}
|
|
|
|
# ── Worker: runs one job to completion (invoked in background) ───────
|
|
run_worker() {
|
|
local doing_file=$1
|
|
local job; job=$(basename "$doing_file")
|
|
job=${job%.md}
|
|
local engine cwd yolo logf metaf
|
|
engine=$(fm_get "$doing_file" engine "$DEFAULT_ENGINE")
|
|
cwd=$(fm_get "$doing_file" cwd "$PWD")
|
|
yolo=$(fm_get "$doing_file" yolo "true")
|
|
logf="$LOGS/$job.log"
|
|
metaf="$STATE/$job.meta"
|
|
# NOTE: the parent (cmd_run) creates $metaf with job/engine/cwd/started/pid.
|
|
# The worker only ever APPENDS (ended/exit/result) to avoid a truncation race.
|
|
|
|
{
|
|
echo "===== agent-queue job: $job ====="
|
|
echo "engine=$engine cwd=$cwd yolo=$yolo"
|
|
echo "started: $(date)"
|
|
echo "================================="
|
|
} >> "$logf"
|
|
|
|
if [[ ! -d "$cwd" ]]; then
|
|
echo "FATAL: cwd does not exist: $cwd" >> "$logf"
|
|
mv "$doing_file" "$FAILED/" 2>/dev/null
|
|
echo "result=failed" >> "$metaf"; echo "ended=$(date +%s)" >> "$metaf"
|
|
return 1
|
|
fi
|
|
|
|
# Strip our frontmatter so the agent only sees the task body.
|
|
local bodyf="$STATE/$job.body.md"
|
|
strip_frontmatter "$doing_file" > "$bodyf"
|
|
build_agent_cmd "$engine" "$bodyf" "$yolo"
|
|
|
|
_run_agent() {
|
|
if [[ -n "$AGENT_STDIN" ]]; then
|
|
( cd "$cwd" && "${AGENT_CMD[@]}" < "$AGENT_STDIN" )
|
|
else
|
|
( cd "$cwd" && "${AGENT_CMD[@]}" )
|
|
fi
|
|
}
|
|
|
|
local rc=0 lockkey tmo timed_out=false
|
|
lockkey=$(lock_key_for "$doing_file")
|
|
tmo=$(_dur_to_secs "$(fm_get "$doing_file" timeout "0")")
|
|
local tmo_flag="$STATE/$job.timedout"; rm -f "$tmo_flag"
|
|
local lf="$LOCKS/$(_keyhash "$lockkey").lock"
|
|
|
|
if [[ "$tmo" -gt 0 && -n "$TIMEOUT_BIN" ]]; then
|
|
# Hard timeout via timeout/gtimeout (kills the whole process tree).
|
|
AQ_STDIN="$AGENT_STDIN" "$TIMEOUT_BIN" -k 5 "${tmo}s" bash -c '
|
|
cd "$1" || exit 97; shift
|
|
if [ -n "${AQ_STDIN:-}" ]; then exec "$@" < "$AQ_STDIN"; else exec "$@"; fi
|
|
' _ "$cwd" "${AGENT_CMD[@]}" >> "$logf" 2>&1
|
|
rc=$?
|
|
[[ $rc -eq 124 ]] && timed_out=true
|
|
elif [[ "$tmo" -gt 0 ]]; then
|
|
# Portable watchdog fallback (no timeout binary). Flags the timeout and
|
|
# signals the worker; install coreutils (gtimeout) for hard tree kills.
|
|
_run_agent >> "$logf" 2>&1 &
|
|
local apid=$!
|
|
( sleep "$tmo"; : > "$tmo_flag"
|
|
pkill -TERM -P "$apid" 2>/dev/null; kill -TERM "$apid" 2>/dev/null
|
|
sleep 5; pkill -KILL -P "$apid" 2>/dev/null; kill -KILL "$apid" 2>/dev/null ) &
|
|
local wpid=$!
|
|
wait "$apid" 2>/dev/null; rc=$?
|
|
kill "$wpid" 2>/dev/null; wait "$wpid" 2>/dev/null
|
|
[[ -f "$tmo_flag" ]] && timed_out=true
|
|
elif [[ -n "$FLOCK_BIN" ]]; then
|
|
# Cross-process hardening where flock exists (Linux CI). The single run-loop
|
|
# already serializes by lock key; this guards against a stray second launcher.
|
|
( "$FLOCK_BIN" -n 9 || exit 75; _run_agent ) 9>"$lf" >> "$logf" 2>&1
|
|
rc=$?
|
|
if [[ $rc -eq 75 ]]; then
|
|
echo "lock busy (key=$lockkey) — requeued to inbox" >> "$logf"
|
|
mv "$doing_file" "$INBOX/" 2>/dev/null
|
|
{ echo "ended=$(date +%s)"; echo "result=requeued"; } >> "$metaf"
|
|
return 0
|
|
fi
|
|
else
|
|
_run_agent >> "$logf" 2>&1
|
|
rc=$?
|
|
fi
|
|
rm -f "$tmo_flag"
|
|
|
|
echo "ended=$(date +%s)" >> "$metaf"
|
|
echo "exit=$rc" >> "$metaf"
|
|
if $timed_out; then
|
|
mv "$doing_file" "$FAILED/" 2>/dev/null
|
|
echo "result=timeout" >> "$metaf"
|
|
echo "TIMED OUT after ${tmo}s (rc=$rc): $(date)" >> "$logf"
|
|
elif [[ $rc -eq 0 ]]; then
|
|
mv "$doing_file" "$DONE/" 2>/dev/null
|
|
echo "result=done" >> "$metaf"
|
|
echo "completed OK (rc=0): $(date)" >> "$logf"
|
|
else
|
|
mv "$doing_file" "$FAILED/" 2>/dev/null
|
|
echo "result=failed" >> "$metaf"
|
|
echo "FAILED (rc=$rc): $(date)" >> "$logf"
|
|
fi
|
|
}
|
|
|
|
# count live workers by checking recorded pids
|
|
live_workers() {
|
|
local n=0 f pid
|
|
for f in "$STATE"/*.meta; do
|
|
[[ -e "$f" ]] || continue
|
|
grep -q '^ended=' "$f" && continue
|
|
pid=$(grep '^pid=' "$f" | head -1 | cut -d= -f2)
|
|
[[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null && n=$((n+1))
|
|
done
|
|
echo "$n"
|
|
}
|
|
|
|
# ── Commands ────────────────────────────────────────────────────────
|
|
cmd_init() { ensure_dirs; log "queue initialized at $C_BOLD$QUEUE_ROOT$C_RESET"; }
|
|
|
|
cmd_add() {
|
|
ensure_dirs
|
|
local file="" engine="" cwd="" yolo=""
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--engine) engine=$2; shift 2;;
|
|
--cwd) cwd=$2; shift 2;;
|
|
--yolo) yolo=true; shift;;
|
|
--no-yolo) yolo=false; shift;;
|
|
*) file=$1; shift;;
|
|
esac
|
|
done
|
|
[[ -n "$file" && -f "$file" ]] || die "usage: add <file.md> [--engine devin|claude|codex] [--cwd PATH] [--yolo|--no-yolo]"
|
|
local base; base=$(basename "$file")
|
|
local stamp; stamp=$(date +%Y%m%d-%H%M%S)
|
|
local dest="$INBOX/${stamp}__${base}"
|
|
|
|
# If user passed flags AND the file has no frontmatter, inject one.
|
|
if [[ -n "$engine$cwd$yolo" ]] && [[ "$(head -1 "$file")" != "---" ]]; then
|
|
{
|
|
echo "---"
|
|
echo "engine: ${engine:-$DEFAULT_ENGINE}"
|
|
echo "cwd: ${cwd:-$PWD}"
|
|
echo "yolo: ${yolo:-true}"
|
|
echo "---"
|
|
echo
|
|
cat "$file"
|
|
} > "$dest"
|
|
else
|
|
cp "$file" "$dest"
|
|
fi
|
|
log "queued $C_BOLD$(basename "$dest")$C_RESET (engine=$(fm_get "$dest" engine "$DEFAULT_ENGINE"), cwd=$(fm_get "$dest" cwd "$PWD"))"
|
|
}
|
|
|
|
cmd_run() {
|
|
ensure_dirs
|
|
local once=false
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--max) MAX_CONCURRENCY=$2; shift 2;;
|
|
--engine) DEFAULT_ENGINE=$2; shift 2;;
|
|
--once|--drain) once=true; shift;;
|
|
*) die "run: unknown arg '$1'";;
|
|
esac
|
|
done
|
|
echo "$$" > "$STATE/daemon.pid"
|
|
trap 'rm -f "$STATE/daemon.pid"; log "run loop stopped"; exit 0' INT TERM
|
|
log "run loop started (max=$MAX_CONCURRENCY, default engine=$DEFAULT_ENGINE). Ctrl-C to stop."
|
|
|
|
while true; do
|
|
local running; running=$(live_workers)
|
|
# launch jobs while we have capacity and an eligible inbox file
|
|
while [[ "$running" -lt "$MAX_CONCURRENCY" ]]; do
|
|
# pick the oldest inbox file whose lock key is not currently busy, so two
|
|
# jobs sharing a cwd (or `lock:` key) never run at once, regardless of --max.
|
|
local busy; busy=$(busy_keys)
|
|
local next="" cand cand_key
|
|
while IFS= read -r cand; do
|
|
[[ -n "$cand" ]] || continue
|
|
cand_key=$(lock_key_for "$cand")
|
|
if printf '%s\n' "$busy" | grep -qxF -- "$cand_key"; then continue; fi
|
|
next="$cand"; break
|
|
done < <(ls -1 "$INBOX"/*.md 2>/dev/null | sort)
|
|
[[ -z "$next" ]] && break
|
|
|
|
local job; job=$(basename "$next"); job=${job%.md}
|
|
local doing_file="$DOING/$(basename "$next")"
|
|
mv "$next" "$doing_file"
|
|
local w_eng w_cwd w_yolo w_key
|
|
w_eng=$(fm_get "$doing_file" engine "$DEFAULT_ENGINE")
|
|
w_cwd=$(fm_get "$doing_file" cwd "$PWD")
|
|
w_yolo=$(fm_get "$doing_file" yolo "true")
|
|
w_key=$(lock_key_for "$doing_file")
|
|
# write meta BEFORE launch (no pid yet), then append the worker pid from $!
|
|
{
|
|
echo "job=$job"
|
|
echo "engine=$w_eng"
|
|
echo "cwd=$w_cwd"
|
|
echo "yolo=$w_yolo"
|
|
echo "lock=$w_key"
|
|
echo "started=$(date +%s)"
|
|
} > "$STATE/$job.meta"
|
|
run_worker "$doing_file" &
|
|
echo "pid=$!" >> "$STATE/$job.meta"
|
|
log "▶ launching $C_BOLD$job$C_RESET (engine=$w_eng, lock=$w_key)"
|
|
sleep 1
|
|
running=$(live_workers)
|
|
done
|
|
|
|
if $once; then
|
|
[[ "$(live_workers)" -eq 0 && -z "$(ls -1 "$INBOX"/*.md 2>/dev/null)" ]] && {
|
|
log "drain complete — inbox empty, no workers running"; rm -f "$STATE/daemon.pid"; exit 0; }
|
|
fi
|
|
sleep "$POLL_SECONDS"
|
|
done
|
|
}
|
|
|
|
_count() { ls -1 "$1"/*.md 2>/dev/null | wc -l | tr -d ' '; }
|
|
|
|
cmd_status() {
|
|
ensure_dirs
|
|
local ib dg dn fl
|
|
ib=$(_count "$INBOX"); dg=$(_count "$DOING"); dn=$(_count "$DONE"); fl=$(_count "$FAILED")
|
|
local running; running=$(live_workers)
|
|
echo
|
|
printf '%s AGENT QUEUE %s %s\n' "$C_BOLD" "$C_DIM$QUEUE_ROOT$C_RESET" ""
|
|
printf ' %sinbox%s %-3s %sdoing%s %-3s %sdone%s %-3s %sfailed%s %-3s %srunning%s %s/%s\n\n' \
|
|
"$C_BLUE" "$C_RESET" "$ib" "$C_YEL" "$C_RESET" "$dg" \
|
|
"$C_GREEN" "$C_RESET" "$dn" "$C_RED" "$C_RESET" "$fl" \
|
|
"$C_BOLD" "$C_RESET" "$running" "$MAX_CONCURRENCY"
|
|
|
|
# running table
|
|
local f
|
|
local printed=false
|
|
for f in "$STATE"/*.meta; do
|
|
[[ -e "$f" ]] || continue
|
|
grep -q '^ended=' "$f" && continue
|
|
local pid; pid=$(grep '^pid=' "$f" | cut -d= -f2)
|
|
[[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null || continue
|
|
if ! $printed; then printf ' %sRUNNING%s\n' "$C_BOLD" "$C_RESET"; printed=true; fi
|
|
local job eng start now el last lmt age stall=""
|
|
job=$(grep '^job=' "$f" | cut -d= -f2)
|
|
eng=$(grep '^engine=' "$f" | cut -d= -f2)
|
|
start=$(grep '^started=' "$f" | cut -d= -f2)
|
|
now=$(date +%s); el=$(( now - ${start:-$now} ))
|
|
last=$(tail -n 1 "$LOGS/$job.log" 2>/dev/null | cut -c1-60)
|
|
lmt=$(_mtime "$LOGS/$job.log"); age=$(( now - ${lmt:-$now} ))
|
|
[[ "$age" -gt $(( STALL_MIN * 60 )) ]] && stall="${C_RED}⚠ stalled${C_RESET} "
|
|
printf ' %s%-26s%s %-7s %3dm%02ds pid %-6s %s%s%s%s\n' \
|
|
"$C_BOLD" "$job" "$C_RESET" "$eng" $((el/60)) $((el%60)) "$pid" "$stall" "$C_DIM" "$last" "$C_RESET"
|
|
done
|
|
$printed || printf ' %sno workers running%s\n' "$C_DIM" "$C_RESET"
|
|
echo
|
|
}
|
|
|
|
cmd_watch() {
|
|
local interval="${1:-2}"
|
|
while true; do clear; cmd_status; sleep "$interval"; done
|
|
}
|
|
|
|
cmd_dash() {
|
|
command -v node >/dev/null 2>&1 || die "node not found — use 'watch' for the bash status view"
|
|
AGENT_QUEUE_ROOT="$QUEUE_ROOT" exec node "$SCRIPT_DIR/dashboard.mjs" "$@"
|
|
}
|
|
|
|
cmd_stop() {
|
|
ensure_dirs
|
|
local killed=0 f pid
|
|
for f in "$STATE"/*.meta; do
|
|
[[ -e "$f" ]] || continue
|
|
grep -q '^ended=' "$f" && continue
|
|
pid=$(grep '^pid=' "$f" | cut -d= -f2)
|
|
[[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null && { kill "$pid" 2>/dev/null && killed=$((killed+1)); }
|
|
done
|
|
[[ -f "$STATE/daemon.pid" ]] && kill "$(cat "$STATE/daemon.pid")" 2>/dev/null
|
|
rm -f "$STATE/daemon.pid"
|
|
log "stopped $killed running worker(s) + run loop"
|
|
}
|
|
|
|
cmd_logs() {
|
|
local job="${1:-}" follow=""
|
|
[[ "${2:-}" == "-f" || "$job" == "-f" ]] && follow="-f"
|
|
[[ "$job" == "-f" ]] && job="${2:-}"
|
|
[[ -n "$job" ]] || die "usage: logs <job> [-f]"
|
|
local lf="$LOGS/$job.log"
|
|
[[ -f "$lf" ]] || lf=$(ls -1t "$LOGS"/*"$job"*.log 2>/dev/null | head -1)
|
|
[[ -f "$lf" ]] || die "no log found for '$job'"
|
|
if [[ -n "$follow" ]]; then tail -f "$lf"; else cat "$lf"; fi
|
|
}
|
|
|
|
usage() {
|
|
cat <<EOF
|
|
${C_BOLD}agent-queue${C_RESET} — folder kanban runner for devin | claude | codex
|
|
|
|
${C_BOLD}USAGE${C_RESET}
|
|
agent-queue.sh <command> [args]
|
|
|
|
${C_BOLD}COMMANDS${C_RESET}
|
|
init create the queue/ folders
|
|
add <file.md> [opts] queue a prompt file into inbox/
|
|
--engine devin|claude|codex --cwd PATH --yolo | --no-yolo
|
|
run [--max N] [--engine E] [--once]
|
|
process inbox/ (foreground loop; Ctrl-C to stop)
|
|
status show kanban counts + running workers
|
|
watch [interval] live status (default 2s, bash)
|
|
dash [--interval N] richer live Node dashboard (recent done/failed too)
|
|
stop kill running workers + the run loop
|
|
logs <job> [-f] print (or follow) a job's log
|
|
help this message
|
|
|
|
${C_BOLD}KANBAN${C_RESET} inbox → doing → done / failed (logs/ + .state/ alongside)
|
|
|
|
${C_BOLD}TASK FRONTMATTER${C_RESET} (top of each .md)
|
|
---
|
|
engine: devin
|
|
cwd: /Users/you/code/repo
|
|
yolo: true
|
|
lock: my-repo # optional; defaults to cwd. Jobs sharing a key run serially
|
|
timeout: 45m # optional; 90s|45m|2h|1d. On expiry -> failed (result=timeout)
|
|
---
|
|
|
|
${C_BOLD}ENV${C_RESET}
|
|
AGENT_QUEUE_ROOT (=$QUEUE_ROOT) AGENT_QUEUE_MAX (=$MAX_CONCURRENCY)
|
|
AGENT_QUEUE_ENGINE (=$DEFAULT_ENGINE) DEVIN_BIN / CLAUDE_BIN / CODEX_BIN
|
|
EOF
|
|
}
|
|
|
|
main() {
|
|
local cmd="${1:-help}"; shift || true
|
|
case "$cmd" in
|
|
init) cmd_init "$@";;
|
|
add) cmd_add "$@";;
|
|
run) cmd_run "$@";;
|
|
status) cmd_status "$@";;
|
|
watch) cmd_watch "$@";;
|
|
dash|dashboard) cmd_dash "$@";;
|
|
stop) cmd_stop "$@";;
|
|
logs) cmd_logs "$@";;
|
|
help|-h|--help) usage;;
|
|
*) err "unknown command: $cmd"; echo; usage; exit 1;;
|
|
esac
|
|
}
|
|
|
|
main "$@"
|