learning_ai_common_plat/scripts/gitea/add-host-runner.sh
saravanakumardb1 77b074f3c0 feat(gitea): docker-mode env hygiene + document containerized job migration
- add-host-runner.sh docker mode now strips host-specific envs (HOME, PATH,
  PNPM_HOME) that leak macOS paths into Linux containers and override workflow
  env (broke $HOME-relative writes)
- GITEA_VM_SETUP.md 11.5: reference pattern + 5 gotchas for migrating a real
  job (docker-lint) onto the docker runner: Actions secret (not token file),
  doctor.sh token-file requirement, host-env leakage, env_file token override,
  proxy bypass. Validated green on M-…-4.
2026-05-28 19:16:52 -07:00

168 lines
7.9 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# add-host-runner.sh — Stand up an additional independent host-mode Gitea
# Actions runner as its own launchd service, for more CI parallelism.
#
# The canonical runner is the Homebrew-managed `act_runner` service
# (config: /opt/homebrew/etc/act_runner/config.yaml, capacity 2). Bumping a
# single runner's capacity high overloads the laptop and shares one workdir;
# registering *separate* runners gives clean isolation and scales linearly.
#
# Each extra runner gets:
# - its own config dir ~/Library/Application Support/act_runner-<N>/
# - its own .runner file (separate Gitea registration)
# - its own workdir ~/.cache/act-<N> (no cross-runner clashes)
# - the SHARED secret /opt/homebrew/etc/act_runner/runner.env (env_file)
# - its own launchd plist ~/Library/LaunchAgents/com.bytelyst.act_runner-<N>.plist
#
# Usage:
# bash add-host-runner.sh <N> [capacity] [mode] # mode: host (default) | docker
# bash add-host-runner.sh 2 2 && bash add-host-runner.sh 3 2 # two host runners
# bash add-host-runner.sh 4 1 docker # a docker-mode runner
#
# Docker mode advertises a dedicated `docker` label (not `ubuntu-latest`) so it
# does not hijack existing host-mode jobs. Target it with `runs-on: docker` and
# reach Gitea from inside the job container via host.docker.internal:3300.
#
# Requires: act_runner on PATH, a Gitea admin PAT at ~/.gitea_c5_pat,
# and the canonical runner.env to already exist (created during runner hardening).
#
# Idempotent: if runner <N> is already registered it reloads the service and exits.
set -euo pipefail
N="${1:?usage: add-host-runner.sh <N> [capacity] [mode] (mode: host|docker)}"
CAP="${2:-2}"
MODE="${3:-host}" # host | docker
INSTANCE="${GITEA_INSTANCE:-http://localhost:3300}"
DOCKER_IMAGE="${RUNNER_DOCKER_IMAGE:-catthehacker/ubuntu:act-latest}"
PAT_FILE="${GITEA_PAT_FILE:-$HOME/.gitea_c5_pat}"
CANONICAL_CONFIG="${CANONICAL_CONFIG:-/opt/homebrew/etc/act_runner/config.yaml}"
SHARED_ENV_FILE="${SHARED_ENV_FILE:-/opt/homebrew/etc/act_runner/runner.env}"
BASE="$HOME/Library/Application Support/act_runner-$N"
CONFIG="$BASE/config.yaml"
RUNNER_FILE="$BASE/.runner"
WORKDIR="$HOME/.cache/act-$N"
PLIST="$HOME/Library/LaunchAgents/com.bytelyst.act_runner-$N.plist"
SVC_LABEL="com.bytelyst.act_runner-$N"
RUNNER_NAME="$(hostname -s)-$N"
LOG_DIR="$BASE/logs"
command -v act_runner >/dev/null 2>&1 || { echo "✗ act_runner not on PATH (brew install act_runner)" >&2; exit 1; }
[ -f "$PAT_FILE" ] || { echo "✗ no Gitea PAT at $PAT_FILE" >&2; exit 1; }
[ -f "$CANONICAL_CONFIG" ] || { echo "✗ canonical config not found: $CANONICAL_CONFIG" >&2; exit 1; }
PAT="$(cat "$PAT_FILE")"
mkdir -p "$BASE" "$WORKDIR" "$LOG_DIR"
echo "── add $MODE runner #$N (capacity $CAP) ──"
[ "$MODE" = "docker" ] && echo " image : $DOCKER_IMAGE"
# ── labels per mode (act_runner uses config-file labels, not --labels) ──────
if [ "$MODE" = "docker" ]; then
# Dedicated `docker` label so existing `runs-on: ubuntu-latest` jobs (which
# assume host mode + localhost Gitea) are NOT hijacked. Opt in with
# `runs-on: docker` and reach Gitea via host.docker.internal.
LABELS="docker:docker://$DOCKER_IMAGE,ubuntu-docker:docker://$DOCKER_IMAGE,self-hosted:host"
else
LABELS="ubuntu-latest:host,macos-latest:host,macos-15:host,self-hosted:host"
fi
# ── already registered? just (re)load the service ───────────────────────────
if [ -f "$RUNNER_FILE" ]; then
echo " ✓ runner #$N already registered ($RUNNER_FILE) — reloading service"
launchctl bootout "gui/$(id -u)/$SVC_LABEL" 2>/dev/null || true
launchctl bootstrap "gui/$(id -u)" "$PLIST" 2>/dev/null || launchctl load "$PLIST" 2>/dev/null || true
launchctl list | grep "$SVC_LABEL" || echo " (service not listed — check $LOG_DIR)"
exit 0
fi
# ── derive a per-runner config from the canonical one ───────────────────────
# Preserve the proxy/env block + env_file; override file path, capacity, workdir.
python3 - "$CANONICAL_CONFIG" "$CONFIG" "$RUNNER_FILE" "$CAP" "$WORKDIR" "$SHARED_ENV_FILE" "$MODE" "$LABELS" <<'PY'
import sys, yaml
src, dst, runner_file, cap, workdir, env_file, mode, labels = sys.argv[1:9]
cfg = yaml.safe_load(open(src)) or {}
cfg.setdefault("runner", {})
cfg["runner"]["file"] = runner_file
cfg["runner"]["capacity"] = int(cap)
cfg["runner"]["env_file"] = env_file
# act_runner reads labels from the config file (it ignores `register --labels`).
cfg["runner"]["labels"] = labels.split(",")
cfg.setdefault("host", {})
cfg["host"]["workdir_parent"] = workdir
if mode == "docker":
c = cfg.setdefault("container", {})
c["docker_host"] = "-" # auto-detect host docker daemon
c["force_pull"] = False # use locally-pulled image (corp proxy)
c["privileged"] = False
# Let job containers reach the host's Gitea on Docker Desktop.
c["options"] = "--add-host=host.docker.internal:host-gateway"
envs = cfg["runner"].setdefault("envs", {})
# Drop host-specific envs that are invalid inside a Linux container — these
# leak the macOS HOME/PATH/PNPM_HOME and override workflow env, breaking
# $HOME-relative writes. Let the container use its image defaults (/root).
for k in ("HOME", "PATH", "PNPM_HOME"):
envs.pop(k, None)
# Containerized jobs inherit the corp proxy env; without this they route
# host.docker.internal:3300 through the proxy and get a 504. Bypass it.
for k in ("NO_PROXY", "no_proxy", "NPM_CONFIG_NOPROXY"):
v = envs.get(k, "")
if "host.docker.internal" not in v:
envs[k] = (v + "," if v else "") + "host.docker.internal"
yaml.safe_dump(cfg, open(dst, "w"), default_flow_style=False, sort_keys=False)
print(f" + wrote {dst}")
PY
# ── fetch a one-time registration token (admin API) ─────────────────────────
REG_TOKEN=$(curl -fsS -H "Authorization: token $PAT" \
"$INSTANCE/api/v1/admin/runners/registration-token" \
| python3 -c "import json,sys; print(json.load(sys.stdin)['token'])")
[ -n "$REG_TOKEN" ] || { echo "✗ could not fetch registration token" >&2; exit 1; }
# ── register (labels come from config file; --labels is informational) ──────
act_runner register \
--no-interactive \
--instance "$INSTANCE" \
--token "$REG_TOKEN" \
--name "$RUNNER_NAME" \
--labels "$LABELS" \
--config "$CONFIG"
echo " ✓ registered as $RUNNER_NAME"
# ── write launchd plist ─────────────────────────────────────────────────────
RUNNER_BIN="$(command -v act_runner)"
cat > "$PLIST" <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key><string>$SVC_LABEL</string>
<key>ProgramArguments</key>
<array>
<string>$RUNNER_BIN</string>
<string>daemon</string>
<string>--config</string>
<string>$CONFIG</string>
</array>
<key>RunAtLoad</key><true/>
<key>KeepAlive</key><true/>
<key>WorkingDirectory</key><string>$BASE</string>
<key>StandardOutPath</key><string>$LOG_DIR/act_runner.log</string>
<key>StandardErrorPath</key><string>$LOG_DIR/act_runner.err</string>
</dict>
</plist>
EOF
echo " + wrote $PLIST"
# ── load the service ────────────────────────────────────────────────────────
launchctl bootout "gui/$(id -u)/$SVC_LABEL" 2>/dev/null || true
launchctl bootstrap "gui/$(id -u)" "$PLIST" 2>/dev/null || launchctl load "$PLIST"
sleep 3
if launchctl list | grep -q "$SVC_LABEL"; then
echo " ✓ service loaded ($SVC_LABEL)"
else
echo " ⚠ service not listed — check $LOG_DIR/act_runner.err"
fi
echo " log: $LOG_DIR/act_runner.log"