#!/usr/bin/env bash # # start-fleet.example.sh — reference launcher for a multi-product local fleet. # # Starts one detached `agent-queue.sh run` daemon (a "factory") per product, each # routing work through the platform-service fleet coordinator. This is the tracked, # parameterized version of the operational `_start_fleet.sh` people keep in their # local sandbox — copy it, adjust the env vars, and run. # # Prereqs: # - platform-service running on $AQ_FLEET_API (see scripts/deploy-gigafactory.sh) # - a factory token in $FLEET_TOKEN_FILE (an admin/factory JWT for the fleet API) # - tmux + the `longrun` helper (sourced below) for detached, logged daemons # # Env overrides (all optional): # SB sandbox/state root (per-product queues live in $SB/q_) # AQ path to agent-queue.sh # AQ_FLEET_API coordinator base URL (default http://localhost:4003/api) # FLEET_TOKEN_FILE file holding the bearer token (default $SB/.token) # PRODUCTS space-separated product ids (default: the ecosystem set) # AGENT_QUEUE_MAX per-factory concurrency (default 3) # # Docs: ../docs/GIGAFACTORY/GIGAFACTORY_SYSTEM_OVERVIEW.md (§9 API, §14 gotchas) and # ../docs/GIGAFACTORY/FLEET_DISPATCH_REDESIGN.md (the M0 RU gate). set -uo pipefail HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SB="${SB:-$PWD/.fleet-sandbox}" AQ="${AQ:-$HERE/../agent-queue.sh}" AQ_FLEET_API="${AQ_FLEET_API:-http://localhost:4003/api}" FLEET_TOKEN_FILE="${FLEET_TOKEN_FILE:-$SB/.token}" PRODUCTS="${PRODUCTS:-lysnrai chronomind mindlyst nomgap}" AGENT_QUEUE_MAX="${AGENT_QUEUE_MAX:-3}" LONGRUN_ALIAS="${LONGRUN_ALIAS:-$HERE/../../aliases/_longrun.alias}" [ -f "$AQ" ] || { echo "agent-queue.sh not found at $AQ (set AQ=)"; exit 1; } [ -s "$FLEET_TOKEN_FILE" ] || { echo "fleet token not found at $FLEET_TOKEN_FILE (set FLEET_TOKEN_FILE=)"; exit 1; } TOK="$(cat "$FLEET_TOKEN_FILE")" mkdir -p "$SB" export LONGRUN_LOG_DIR="$SB" # shellcheck disable=SC1090 source "$LONGRUN_ALIAS" for p in $PRODUCTS; do ROOT="$SB/q_$p" longrun "gigafactory-$p" env \ AGENT_QUEUE_ROOT="$ROOT" \ AGENT_QUEUE_ENGINE=devin \ AGENT_QUEUE_MAX="$AGENT_QUEUE_MAX" \ AQ_FLEET=1 AQ_FLEET_ROUTE=1 \ AQ_FLEET_API="$AQ_FLEET_API" \ AQ_FLEET_TOKEN="$TOK" \ AQ_PRODUCT_ID="$p" \ AQ_FACTORY_ID="mac-$p" \ AQ_FLEET_GATE=1 \ AQ_FLEET_LEASE_RENEW_SEC=30 \ "$AQ" run echo "----" done # Why these two matter (both verified on a live fleet): # AQ_FLEET_GATE=1 §M0 RU gate — the run loop point-reads the cheap # per-product queue version (GET /fleet/queue-state) and # SKIPS the claim while nothing changed, slashing idle # Cosmos RU. Default OFF; safe (fails open). See # FLEET_DISPATCH_REDESIGN.md §8/§12. # AQ_FLEET_LEASE_RENEW_SEC=30 heartbeat/renew cadence. MUST stay well under the # coordinator's 90s stale threshold, or a healthy # factory flaps to "stale"/"no live factory" between # beats (the 300s default caused exactly that). # # Stop a factory: tmux kill-session -t gigafactory- # Tail a factory: tail -f "$SB"/longrun-gigafactory--*.log