From 38d8e8e5cf0494c655848520e396012ae43131f6 Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Mon, 1 Jun 2026 00:18:26 -0700 Subject: [PATCH] feat(agent-queue): add tracked example multi-product fleet launcher MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The operational _start_fleet.sh lives in a local (untracked) sandbox, so the gate + heartbeat-cadence settings weren't version-controlled anywhere. Add demo/start-fleet.example.sh: a parameterized, sanitized launcher (one agent-queue.sh run daemon per product against a live platform-service) that ships the two settings you must get right — AQ_FLEET_GATE=1 (M0 RU gate) and AQ_FLEET_LEASE_RENEW_SEC=30 (heartbeat cadence < the 90s stale threshold). No hardcoded paths/secrets; everything env-overridable. Documented in demo/README. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- agent-queue/demo/README.md | 5 ++ agent-queue/demo/start-fleet.example.sh | 74 +++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100755 agent-queue/demo/start-fleet.example.sh diff --git a/agent-queue/demo/README.md b/agent-queue/demo/README.md index 626fade..d305a64 100644 --- a/agent-queue/demo/README.md +++ b/agent-queue/demo/README.md @@ -76,3 +76,8 @@ With `DEMO_KEEP=1`, inspect under the printed temp dir: - `two-factory-demo.sh` — the orchestrator (start factories, kill/reclaim/fence, assert). - `coordinator-stub.sh` — the stateful coordinator stub (claim/patch/fence/renew/release/reap, mkdir-locked). +- `start-fleet.example.sh` — reference launcher for a **real** multi-product local + fleet against a live platform-service (one `agent-queue.sh run` daemon per + product). Parameterized via env; ships the two settings you must get right — + `AQ_FLEET_GATE=1` (M0 RU gate) and `AQ_FLEET_LEASE_RENEW_SEC=30` (heartbeat + cadence < the 90s stale threshold). Copy + adjust for your sandbox. diff --git a/agent-queue/demo/start-fleet.example.sh b/agent-queue/demo/start-fleet.example.sh new file mode 100755 index 0000000..e7a7030 --- /dev/null +++ b/agent-queue/demo/start-fleet.example.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# +# start-fleet.example.sh — reference launcher for a multi-product local fleet. +# +# Starts one detached `agent-queue.sh run` daemon (a "factory") per product, each +# routing work through the platform-service fleet coordinator. This is the tracked, +# parameterized version of the operational `_start_fleet.sh` people keep in their +# local sandbox — copy it, adjust the env vars, and run. +# +# Prereqs: +# - platform-service running on $AQ_FLEET_API (see scripts/deploy-gigafactory.sh) +# - a factory token in $FLEET_TOKEN_FILE (an admin/factory JWT for the fleet API) +# - tmux + the `longrun` helper (sourced below) for detached, logged daemons +# +# Env overrides (all optional): +# SB sandbox/state root (per-product queues live in $SB/q_) +# AQ path to agent-queue.sh +# AQ_FLEET_API coordinator base URL (default http://localhost:4003/api) +# FLEET_TOKEN_FILE file holding the bearer token (default $SB/.token) +# PRODUCTS space-separated product ids (default: the ecosystem set) +# AGENT_QUEUE_MAX per-factory concurrency (default 3) +# +# Docs: ../docs/GIGAFACTORY/GIGAFACTORY_SYSTEM_OVERVIEW.md (§9 API, §14 gotchas) and +# ../docs/GIGAFACTORY/FLEET_DISPATCH_REDESIGN.md (the M0 RU gate). +set -uo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SB="${SB:-$PWD/.fleet-sandbox}" +AQ="${AQ:-$HERE/../agent-queue.sh}" +AQ_FLEET_API="${AQ_FLEET_API:-http://localhost:4003/api}" +FLEET_TOKEN_FILE="${FLEET_TOKEN_FILE:-$SB/.token}" +PRODUCTS="${PRODUCTS:-lysnrai chronomind mindlyst nomgap}" +AGENT_QUEUE_MAX="${AGENT_QUEUE_MAX:-3}" +LONGRUN_ALIAS="${LONGRUN_ALIAS:-$HERE/../../aliases/_longrun.alias}" + +[ -f "$AQ" ] || { echo "agent-queue.sh not found at $AQ (set AQ=)"; exit 1; } +[ -s "$FLEET_TOKEN_FILE" ] || { echo "fleet token not found at $FLEET_TOKEN_FILE (set FLEET_TOKEN_FILE=)"; exit 1; } +TOK="$(cat "$FLEET_TOKEN_FILE")" + +mkdir -p "$SB" +export LONGRUN_LOG_DIR="$SB" +# shellcheck disable=SC1090 +source "$LONGRUN_ALIAS" + +for p in $PRODUCTS; do + ROOT="$SB/q_$p" + longrun "gigafactory-$p" env \ + AGENT_QUEUE_ROOT="$ROOT" \ + AGENT_QUEUE_ENGINE=devin \ + AGENT_QUEUE_MAX="$AGENT_QUEUE_MAX" \ + AQ_FLEET=1 AQ_FLEET_ROUTE=1 \ + AQ_FLEET_API="$AQ_FLEET_API" \ + AQ_FLEET_TOKEN="$TOK" \ + AQ_PRODUCT_ID="$p" \ + AQ_FACTORY_ID="mac-$p" \ + AQ_FLEET_GATE=1 \ + AQ_FLEET_LEASE_RENEW_SEC=30 \ + "$AQ" run + echo "----" +done + +# Why these two matter (both verified on a live fleet): +# AQ_FLEET_GATE=1 §M0 RU gate — the run loop point-reads the cheap +# per-product queue version (GET /fleet/queue-state) and +# SKIPS the claim while nothing changed, slashing idle +# Cosmos RU. Default OFF; safe (fails open). See +# FLEET_DISPATCH_REDESIGN.md §8/§12. +# AQ_FLEET_LEASE_RENEW_SEC=30 heartbeat/renew cadence. MUST stay well under the +# coordinator's 90s stale threshold, or a healthy +# factory flaps to "stale"/"no live factory" between +# beats (the 300s default caused exactly that). +# +# Stop a factory: tmux kill-session -t gigafactory- +# Tail a factory: tail -f "$SB"/longrun-gigafactory--*.log