From 7e1a2ad660d1aee9949d60b3adf772462979557f Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Thu, 28 May 2026 18:31:57 -0700 Subject: [PATCH] feat(gitea): add-host-runner.sh for multi-runner CI parallelism - scripts/gitea/add-host-runner.sh: stand up Nth independent host-mode runner as its own launchd service (separate config/.runner/workdir, shared runner.env token, admin-API registration token, idempotent reload) - GITEA_VM_SETUP.md 11.5: document multi-runner setup, fleet list/prune, and removal; 3 runners x capacity 2 ~= 6 parallel slots (verified) Live fleet: learning-ai-mac (brew) + 2 added runners, all online; stale offline registrations pruned. --- docs/runbooks/GITEA_VM_SETUP.md | 50 +++++++++++- scripts/gitea/add-host-runner.sh | 128 +++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+), 2 deletions(-) create mode 100755 scripts/gitea/add-host-runner.sh diff --git a/docs/runbooks/GITEA_VM_SETUP.md b/docs/runbooks/GITEA_VM_SETUP.md index 7bbb2612..abe0c3fd 100644 --- a/docs/runbooks/GITEA_VM_SETUP.md +++ b/docs/runbooks/GITEA_VM_SETUP.md @@ -420,8 +420,54 @@ tail -f /opt/homebrew/var/log/act_runner.log # expect "declare successfully" `runner.capacity` controls parallel jobs on one runner. With `capacity: 1` the lightweight `docker-lint` job queues behind slow backend/web/mobile/E2E jobs (observed: ~13 min wait). `capacity: 2` lets `docker-lint` run alongside -one heavy job. For more parallelism, register additional runners rather than -pushing capacity high on a single laptop. +one heavy job. For more parallelism, register **additional runners** rather +than pushing capacity high on a single laptop — each runner gets its own +workdir and process, so failures/timeouts stay isolated. + +**Add more host runners (reproducible):** + +```bash +# Stand up runners #2 and #3 (each capacity 2) as their own launchd services. +# Shares the canonical runner.env token; separate config/.runner/workdir. +bash scripts/gitea/add-host-runner.sh 2 2 +bash scripts/gitea/add-host-runner.sh 3 2 +``` + +`add-host-runner.sh [capacity]`: + +- derives a per-runner `config.yaml` from the canonical one (preserves proxy + env + `env_file`), overriding `runner.file`, `runner.capacity`, and a unique + `host.workdir_parent` (`~/.cache/act-`) +- fetches a one-time registration token via the admin API (`~/.gitea_c5_pat`) +- registers as `$(hostname -s)-` with host-mode labels +- writes + loads `~/Library/LaunchAgents/com.bytelyst.act_runner-.plist` + (`RunAtLoad` + `KeepAlive`) +- idempotent: re-running just reloads the service + +The Homebrew `act_runner` service is runner #1; `add-host-runner.sh` adds +#2, #3, … Three runners × capacity 2 ≈ **6 parallel job slots**. Verified: +pushing a multi-job workflow lights up all three runners simultaneously. + +List + prune the fleet: + +```bash +PAT=$(cat ~/.gitea_c5_pat) +curl -s -H "Authorization: token $PAT" \ + http://localhost:3300/api/v1/admin/actions/runners \ + | python3 -c "import json,sys; [print(r['id'], r['name'], r['status']) for r in json.load(sys.stdin)['runners']]" +# Delete a stale/offline runner by id: +curl -s -X DELETE -H "Authorization: token $PAT" \ + http://localhost:3300/api/v1/admin/actions/runners/ +``` + +Remove an extra runner entirely: + +```bash +launchctl bootout "gui/$(id -u)/com.bytelyst.act_runner-2" 2>/dev/null || true +rm -f ~/Library/LaunchAgents/com.bytelyst.act_runner-2.plist +rm -rf "$HOME/Library/Application Support/act_runner-2" ~/.cache/act-2 +# then DELETE its row via the admin API (above) +``` ### 11.6 — Runner token rotation diff --git a/scripts/gitea/add-host-runner.sh b/scripts/gitea/add-host-runner.sh new file mode 100755 index 00000000..d7bdac9e --- /dev/null +++ b/scripts/gitea/add-host-runner.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +# +# add-host-runner.sh — Stand up an additional independent host-mode Gitea +# Actions runner as its own launchd service, for more CI parallelism. +# +# The canonical runner is the Homebrew-managed `act_runner` service +# (config: /opt/homebrew/etc/act_runner/config.yaml, capacity 2). Bumping a +# single runner's capacity high overloads the laptop and shares one workdir; +# registering *separate* runners gives clean isolation and scales linearly. +# +# Each extra runner gets: +# - its own config dir ~/Library/Application Support/act_runner-/ +# - its own .runner file (separate Gitea registration) +# - its own workdir ~/.cache/act- (no cross-runner clashes) +# - the SHARED secret /opt/homebrew/etc/act_runner/runner.env (env_file) +# - its own launchd plist ~/Library/LaunchAgents/com.bytelyst.act_runner-.plist +# +# Usage: +# bash add-host-runner.sh [capacity] # e.g. add-host-runner.sh 2 2 +# bash add-host-runner.sh 2 2 && bash add-host-runner.sh 3 2 +# +# Requires: act_runner on PATH, a Gitea admin PAT at ~/.gitea_c5_pat, +# and the canonical runner.env to already exist (created during runner hardening). +# +# Idempotent: if runner is already registered it reloads the service and exits. +set -euo pipefail + +N="${1:?usage: add-host-runner.sh [capacity]}" +CAP="${2:-2}" +INSTANCE="${GITEA_INSTANCE:-http://localhost:3300}" +PAT_FILE="${GITEA_PAT_FILE:-$HOME/.gitea_c5_pat}" +CANONICAL_CONFIG="${CANONICAL_CONFIG:-/opt/homebrew/etc/act_runner/config.yaml}" +SHARED_ENV_FILE="${SHARED_ENV_FILE:-/opt/homebrew/etc/act_runner/runner.env}" + +BASE="$HOME/Library/Application Support/act_runner-$N" +CONFIG="$BASE/config.yaml" +RUNNER_FILE="$BASE/.runner" +WORKDIR="$HOME/.cache/act-$N" +PLIST="$HOME/Library/LaunchAgents/com.bytelyst.act_runner-$N.plist" +SVC_LABEL="com.bytelyst.act_runner-$N" +RUNNER_NAME="$(hostname -s)-$N" +LOG_DIR="$BASE/logs" + +command -v act_runner >/dev/null 2>&1 || { echo "✗ act_runner not on PATH (brew install act_runner)" >&2; exit 1; } +[ -f "$PAT_FILE" ] || { echo "✗ no Gitea PAT at $PAT_FILE" >&2; exit 1; } +[ -f "$CANONICAL_CONFIG" ] || { echo "✗ canonical config not found: $CANONICAL_CONFIG" >&2; exit 1; } +PAT="$(cat "$PAT_FILE")" + +mkdir -p "$BASE" "$WORKDIR" "$LOG_DIR" + +echo "── add host runner #$N (capacity $CAP) ──" + +# ── already registered? just (re)load the service ─────────────────────────── +if [ -f "$RUNNER_FILE" ]; then + echo " ✓ runner #$N already registered ($RUNNER_FILE) — reloading service" + launchctl bootout "gui/$(id -u)/$SVC_LABEL" 2>/dev/null || true + launchctl bootstrap "gui/$(id -u)" "$PLIST" 2>/dev/null || launchctl load "$PLIST" 2>/dev/null || true + launchctl list | grep "$SVC_LABEL" || echo " (service not listed — check $LOG_DIR)" + exit 0 +fi + +# ── derive a per-runner config from the canonical one ─────────────────────── +# Preserve the proxy/env block + env_file; override file path, capacity, workdir. +python3 - "$CANONICAL_CONFIG" "$CONFIG" "$RUNNER_FILE" "$CAP" "$WORKDIR" "$SHARED_ENV_FILE" <<'PY' +import sys, yaml +src, dst, runner_file, cap, workdir, env_file = sys.argv[1:7] +cfg = yaml.safe_load(open(src)) or {} +cfg.setdefault("runner", {}) +cfg["runner"]["file"] = runner_file +cfg["runner"]["capacity"] = int(cap) +cfg["runner"]["env_file"] = env_file +cfg.setdefault("host", {}) +cfg["host"]["workdir_parent"] = workdir +yaml.safe_dump(cfg, open(dst, "w"), default_flow_style=False, sort_keys=False) +print(f" + wrote {dst}") +PY + +# ── fetch a one-time registration token (admin API) ───────────────────────── +REG_TOKEN=$(curl -fsS -H "Authorization: token $PAT" \ + "$INSTANCE/api/v1/admin/runners/registration-token" \ + | python3 -c "import json,sys; print(json.load(sys.stdin)['token'])") +[ -n "$REG_TOKEN" ] || { echo "✗ could not fetch registration token" >&2; exit 1; } + +# ── register (host-mode labels) ───────────────────────────────────────────── +act_runner register \ + --no-interactive \ + --instance "$INSTANCE" \ + --token "$REG_TOKEN" \ + --name "$RUNNER_NAME" \ + --labels "ubuntu-latest:host,macos-latest:host,macos-15:host,self-hosted:host" \ + --config "$CONFIG" +echo " ✓ registered as $RUNNER_NAME" + +# ── write launchd plist ───────────────────────────────────────────────────── +RUNNER_BIN="$(command -v act_runner)" +cat > "$PLIST" < + + + + Label$SVC_LABEL + ProgramArguments + + $RUNNER_BIN + daemon + --config + $CONFIG + + RunAtLoad + KeepAlive + WorkingDirectory$BASE + StandardOutPath$LOG_DIR/act_runner.log + StandardErrorPath$LOG_DIR/act_runner.err + + +EOF +echo " + wrote $PLIST" + +# ── load the service ──────────────────────────────────────────────────────── +launchctl bootout "gui/$(id -u)/$SVC_LABEL" 2>/dev/null || true +launchctl bootstrap "gui/$(id -u)" "$PLIST" 2>/dev/null || launchctl load "$PLIST" +sleep 3 +if launchctl list | grep -q "$SVC_LABEL"; then + echo " ✓ service loaded ($SVC_LABEL)" +else + echo " ⚠ service not listed — check $LOG_DIR/act_runner.err" +fi +echo " log: $LOG_DIR/act_runner.log"