fix(infra): harden setup.sh — pre-flight checks, pipefail safety, RAM management

- Add pre-flight disk space + memory checks after root validation
- Add --batch --yes to gpg dearmor calls (idempotent on re-run)
- Fix jq abort on malformed Gitea token response (|| echo guard)
- Wrap pnpm install/build in if-blocks with explicit fail() messages
- Stop Ollama during Phase 7 Docker builds to free ~3 GB RAM
- Restart Ollama after Phase 7 builds complete (before Phase 8 health check)
This commit is contained in:
saravanakumardb1 2026-03-24 13:06:05 -07:00
parent c2ca7f53b4
commit 1a1f7dd55c

View File

@ -166,7 +166,7 @@ phase1_system() {
log "Installing Docker..."
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg \
| gpg --dearmor -o /etc/apt/keyrings/docker.gpg
| gpg --batch --yes --dearmor -o /etc/apt/keyrings/docker.gpg
chmod a+r /etc/apt/keyrings/docker.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" \
@ -197,7 +197,7 @@ DJSON
# Modern NodeSource method (GPG key + apt source — the curl|bash setup_XX.x scripts are deprecated)
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
| gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
| gpg --batch --yes --dearmor -o /etc/apt/keyrings/nodesource.gpg
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" \
> /etc/apt/sources.list.d/nodesource.list
apt-get update -qq
@ -327,7 +327,7 @@ phase2_gitea() {
-H "Content-Type: application/json" \
-d '{"name":"vm-deploy","scopes":["write:package","read:package","write:organization","read:organization"]}')
GITEA_NPM_TOKEN=$(echo "$token_response" | jq -r '.sha1 // .token')
GITEA_NPM_TOKEN=$(echo "$token_response" | jq -r '.sha1 // .token' 2>/dev/null || echo "")
if [ -z "$GITEA_NPM_TOKEN" ] || [ "$GITEA_NPM_TOKEN" = "null" ]; then
fail "Failed to create Gitea API token. Response: $token_response"
fi
@ -398,11 +398,15 @@ NPMRC
# Install workspace deps (no --frozen-lockfile: shallow clones may have drift)
log " Installing workspace dependencies..."
pnpm install 2>&1 | tail -3
if ! pnpm install 2>&1 | tail -3; then
fail "pnpm install failed. Check ${INSTALL_DIR}/setup.log for full output."
fi
# Build all packages
log " Building all packages..."
pnpm -r --if-present build 2>&1 | tail -5
if ! pnpm -r --if-present build 2>&1 | tail -5; then
fail "pnpm build failed. Check ${INSTALL_DIR}/setup.log for full output."
fi
ok "Phase 4 complete. All packages built."
}
@ -599,6 +603,12 @@ build_one_service() {
phase7_deploy() {
log "Phase 7: Deploying ecosystem (per-service build + fallback)..."
# Free RAM: stop Ollama during Docker builds (Phase 1 will restart it, or we do at end)
if systemctl is-active --quiet ollama 2>/dev/null; then
log " Stopping Ollama to free RAM for Docker builds..."
systemctl stop ollama 2>/dev/null || true
fi
setup_compose_env
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
@ -679,6 +689,12 @@ phase7_deploy() {
--env-file "${plat_dir}/.env.ecosystem" \
up -d "${start_services[@]}" 2>&1 | tail -10 || true
# Restart Ollama (stopped at start of phase 7 to free RAM)
if command -v ollama &>/dev/null; then
log " Restarting Ollama..."
systemctl start ollama 2>/dev/null || nohup ollama serve > /var/log/ollama.log 2>&1 &
fi
if [ ${#build_fail[@]} -gt 0 ]; then
# Signal to run_phase() that phase 7 should NOT be marked done
PHASE7_HAD_FAILURES=1
@ -873,6 +889,14 @@ main() {
[ "$(id -u)" -eq 0 ] || fail "This script must be run as root (sudo)."
# ── Pre-flight checks ───────────────────────────────────────────
local disk_gb mem_gb
disk_gb=$(df -BG / | awk 'NR==2 {gsub(/G/,"",$4); print $4}')
mem_gb=$(free -g | awk '/^Mem:/ {print $2}')
log "Available disk: ${disk_gb} GB, Total RAM: ${mem_gb} GB"
[ "${disk_gb:-0}" -ge 40 ] || warn "Low disk space (${disk_gb} GB). Recommend 128 GB+."
[ "${mem_gb:-0}" -ge 16 ] || warn "Low memory (${mem_gb} GB). Recommend 32 GB+."
local start_time
start_time=$(date +%s)