fix(infra): fix 5 bugs in setup.sh per-service build + resume logic
1. set -e + pipefail: docker compose up piped through tail would abort script on partial startup failure before printing summary — add || true 2. Phase 7 marked done even with build failures, so --resume would skip it — now only marks done when all builds succeed 3. --phase=7 printed 'Phase 7 complete' even with failures — now exits with code 1 and points to build logs 4. docker compose config --format json called 30 times in build loop — now cached once (saves ~3s) 5. Build logs now saved per-service to STATE_DIR/builds/<svc>.log for post-failure debugging
This commit is contained in:
parent
8ff9e42817
commit
a9414218ba
@ -543,11 +543,19 @@ setup_compose_env() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Build a single compose service image, return 0 on success.
|
# Build a single compose service image, return 0 on success.
|
||||||
|
# Full build log saved to STATE_DIR/builds/<svc>.log for debugging.
|
||||||
build_one_service() {
|
build_one_service() {
|
||||||
local svc="$1"
|
local svc="$1"
|
||||||
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
|
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
|
||||||
docker compose -f "${plat_dir}/${COMPOSE_FILE}" --env-file "${plat_dir}/.env.ecosystem" \
|
local log_file="${STATE_DIR}/builds/${svc}.log"
|
||||||
build "$svc" 2>&1 | tail -5
|
if docker compose -f "${plat_dir}/${COMPOSE_FILE}" --env-file "${plat_dir}/.env.ecosystem" \
|
||||||
|
build "$svc" > "$log_file" 2>&1; then
|
||||||
|
tail -3 "$log_file"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
tail -5 "$log_file"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
phase7_deploy() {
|
phase7_deploy() {
|
||||||
@ -567,13 +575,19 @@ phase7_deploy() {
|
|||||||
log " Building ${total} service images individually..."
|
log " Building ${total} service images individually..."
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
|
# Cache compose config JSON once (avoid calling config 30 times)
|
||||||
|
local compose_json
|
||||||
|
compose_json=$(docker compose -f "${plat_dir}/${COMPOSE_FILE}" \
|
||||||
|
--env-file "${plat_dir}/.env.ecosystem" config --format json 2>/dev/null || true)
|
||||||
|
|
||||||
for svc in "${all_services[@]}"; do
|
for svc in "${all_services[@]}"; do
|
||||||
idx=$((idx + 1))
|
idx=$((idx + 1))
|
||||||
|
|
||||||
# Infrastructure services use pre-built images (no build step)
|
# Infrastructure services use pre-built images (no build step)
|
||||||
local has_build
|
local has_build=""
|
||||||
has_build=$(docker compose -f "${plat_dir}/${COMPOSE_FILE}" config --format json 2>/dev/null \
|
if [ -n "$compose_json" ]; then
|
||||||
| jq -r ".services.\"${svc}\".build // empty" 2>/dev/null || true)
|
has_build=$(echo "$compose_json" | jq -r ".services.\"${svc}\".build // empty" 2>/dev/null || true)
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -z "$has_build" ] || [ "$has_build" = "null" ]; then
|
if [ -z "$has_build" ] || [ "$has_build" = "null" ]; then
|
||||||
build_skip+=("$svc")
|
build_skip+=("$svc")
|
||||||
@ -611,15 +625,19 @@ phase7_deploy() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
log " Starting ${#start_services[@]} services..."
|
log " Starting ${#start_services[@]} services..."
|
||||||
|
# Use || true so set -e doesn't abort before we print the summary
|
||||||
docker compose \
|
docker compose \
|
||||||
-f "${plat_dir}/${COMPOSE_FILE}" \
|
-f "${plat_dir}/${COMPOSE_FILE}" \
|
||||||
--env-file "${plat_dir}/.env.ecosystem" \
|
--env-file "${plat_dir}/.env.ecosystem" \
|
||||||
up -d "${start_services[@]}" 2>&1 | tail -10
|
up -d "${start_services[@]}" 2>&1 | tail -10 || true
|
||||||
|
|
||||||
if [ ${#build_fail[@]} -gt 0 ]; then
|
if [ ${#build_fail[@]} -gt 0 ]; then
|
||||||
|
# Signal to run_phase() that phase 7 should NOT be marked done
|
||||||
|
PHASE7_HAD_FAILURES=1
|
||||||
warn "Phase 7 complete with ${#build_fail[@]} failed builds: ${build_fail[*]}"
|
warn "Phase 7 complete with ${#build_fail[@]} failed builds: ${build_fail[*]}"
|
||||||
warn " Fix and re-run: sudo ./setup.sh --phase=7"
|
warn " Fix and re-run: sudo ./setup.sh --phase=7"
|
||||||
else
|
else
|
||||||
|
PHASE7_HAD_FAILURES=0
|
||||||
ok "Phase 7 complete. All ${#start_services[@]} services started."
|
ok "Phase 7 complete. All ${#start_services[@]} services started."
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
@ -711,6 +729,8 @@ HEALTH
|
|||||||
# ═══════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
# MAIN
|
# MAIN
|
||||||
# ═══════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
|
PHASE7_HAD_FAILURES=0
|
||||||
|
|
||||||
run_phase() {
|
run_phase() {
|
||||||
local phase_num="$1"
|
local phase_num="$1"
|
||||||
case "$phase_num" in
|
case "$phase_num" in
|
||||||
@ -724,6 +744,11 @@ run_phase() {
|
|||||||
8) phase8_verify ;;
|
8) phase8_verify ;;
|
||||||
*) fail "Unknown phase: $phase_num" ;;
|
*) fail "Unknown phase: $phase_num" ;;
|
||||||
esac
|
esac
|
||||||
|
# Don't mark phase 7 done if there were build failures (--resume should retry it)
|
||||||
|
if [ "$phase_num" -eq 7 ] && [ "$PHASE7_HAD_FAILURES" -eq 1 ]; then
|
||||||
|
warn "Phase 7 NOT marked done (build failures). --resume will retry it."
|
||||||
|
return
|
||||||
|
fi
|
||||||
mark_phase_done "$phase_num"
|
mark_phase_done "$phase_num"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -811,6 +836,11 @@ main() {
|
|||||||
log "Running ONLY phase ${only_phase}..."
|
log "Running ONLY phase ${only_phase}..."
|
||||||
restore_gitea_token
|
restore_gitea_token
|
||||||
run_phase "$only_phase"
|
run_phase "$only_phase"
|
||||||
|
if [ "$only_phase" -eq 7 ] && [ "$PHASE7_HAD_FAILURES" -eq 1 ]; then
|
||||||
|
warn "Phase 7 finished with failures. Fix and re-run: sudo ./setup.sh --phase=7"
|
||||||
|
warn "Build logs: ${STATE_DIR}/builds/"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
ok "Phase ${only_phase} complete."
|
ok "Phase ${only_phase} complete."
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user