fix(infra): harden setup.sh — pre-flight checks, pipefail safety, RAM management
- Add pre-flight disk space + memory checks after root validation - Add --batch --yes to gpg dearmor calls (idempotent on re-run) - Fix jq abort on malformed Gitea token response (|| echo guard) - Wrap pnpm install/build in if-blocks with explicit fail() messages - Stop Ollama during Phase 7 Docker builds to free ~3 GB RAM - Restart Ollama after Phase 7 builds complete (before Phase 8 health check)
This commit is contained in:
parent
c2ca7f53b4
commit
1a1f7dd55c
@ -166,7 +166,7 @@ phase1_system() {
|
||||
log "Installing Docker..."
|
||||
install -m 0755 -d /etc/apt/keyrings
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg \
|
||||
| gpg --dearmor -o /etc/apt/keyrings/docker.gpg
|
||||
| gpg --batch --yes --dearmor -o /etc/apt/keyrings/docker.gpg
|
||||
chmod a+r /etc/apt/keyrings/docker.gpg
|
||||
|
||||
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" \
|
||||
@ -197,7 +197,7 @@ DJSON
|
||||
# Modern NodeSource method (GPG key + apt source — the curl|bash setup_XX.x scripts are deprecated)
|
||||
install -m 0755 -d /etc/apt/keyrings
|
||||
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
|
||||
| gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
|
||||
| gpg --batch --yes --dearmor -o /etc/apt/keyrings/nodesource.gpg
|
||||
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" \
|
||||
> /etc/apt/sources.list.d/nodesource.list
|
||||
apt-get update -qq
|
||||
@ -327,7 +327,7 @@ phase2_gitea() {
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name":"vm-deploy","scopes":["write:package","read:package","write:organization","read:organization"]}')
|
||||
|
||||
GITEA_NPM_TOKEN=$(echo "$token_response" | jq -r '.sha1 // .token')
|
||||
GITEA_NPM_TOKEN=$(echo "$token_response" | jq -r '.sha1 // .token' 2>/dev/null || echo "")
|
||||
if [ -z "$GITEA_NPM_TOKEN" ] || [ "$GITEA_NPM_TOKEN" = "null" ]; then
|
||||
fail "Failed to create Gitea API token. Response: $token_response"
|
||||
fi
|
||||
@ -398,11 +398,15 @@ NPMRC
|
||||
|
||||
# Install workspace deps (no --frozen-lockfile: shallow clones may have drift)
|
||||
log " Installing workspace dependencies..."
|
||||
pnpm install 2>&1 | tail -3
|
||||
if ! pnpm install 2>&1 | tail -3; then
|
||||
fail "pnpm install failed. Check ${INSTALL_DIR}/setup.log for full output."
|
||||
fi
|
||||
|
||||
# Build all packages
|
||||
log " Building all packages..."
|
||||
pnpm -r --if-present build 2>&1 | tail -5
|
||||
if ! pnpm -r --if-present build 2>&1 | tail -5; then
|
||||
fail "pnpm build failed. Check ${INSTALL_DIR}/setup.log for full output."
|
||||
fi
|
||||
|
||||
ok "Phase 4 complete. All packages built."
|
||||
}
|
||||
@ -599,6 +603,12 @@ build_one_service() {
|
||||
phase7_deploy() {
|
||||
log "Phase 7: Deploying ecosystem (per-service build + fallback)..."
|
||||
|
||||
# Free RAM: stop Ollama during Docker builds (Phase 1 will restart it, or we do at end)
|
||||
if systemctl is-active --quiet ollama 2>/dev/null; then
|
||||
log " Stopping Ollama to free RAM for Docker builds..."
|
||||
systemctl stop ollama 2>/dev/null || true
|
||||
fi
|
||||
|
||||
setup_compose_env
|
||||
|
||||
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
|
||||
@ -679,6 +689,12 @@ phase7_deploy() {
|
||||
--env-file "${plat_dir}/.env.ecosystem" \
|
||||
up -d "${start_services[@]}" 2>&1 | tail -10 || true
|
||||
|
||||
# Restart Ollama (stopped at start of phase 7 to free RAM)
|
||||
if command -v ollama &>/dev/null; then
|
||||
log " Restarting Ollama..."
|
||||
systemctl start ollama 2>/dev/null || nohup ollama serve > /var/log/ollama.log 2>&1 &
|
||||
fi
|
||||
|
||||
if [ ${#build_fail[@]} -gt 0 ]; then
|
||||
# Signal to run_phase() that phase 7 should NOT be marked done
|
||||
PHASE7_HAD_FAILURES=1
|
||||
@ -873,6 +889,14 @@ main() {
|
||||
|
||||
[ "$(id -u)" -eq 0 ] || fail "This script must be run as root (sudo)."
|
||||
|
||||
# ── Pre-flight checks ───────────────────────────────────────────
|
||||
local disk_gb mem_gb
|
||||
disk_gb=$(df -BG / | awk 'NR==2 {gsub(/G/,"",$4); print $4}')
|
||||
mem_gb=$(free -g | awk '/^Mem:/ {print $2}')
|
||||
log "Available disk: ${disk_gb} GB, Total RAM: ${mem_gb} GB"
|
||||
[ "${disk_gb:-0}" -ge 40 ] || warn "Low disk space (${disk_gb} GB). Recommend 128 GB+."
|
||||
[ "${mem_gb:-0}" -ge 16 ] || warn "Low memory (${mem_gb} GB). Recommend 32 GB+."
|
||||
|
||||
local start_time
|
||||
start_time=$(date +%s)
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user