feat(infra): add resume/retry, per-service build, and fallback to setup.sh
- --resume: auto-detect last completed phase and continue from there - --resume-from=N: resume from a specific phase - --phase=N: run only one phase (e.g. --phase=7 to retry deploy) - --reset: clear phase markers and start fresh - --status: show completed phases - Phase 7 now builds each of 27 services individually with progress - Failed builds are skipped; remaining services still start - Phase completion markers stored in /opt/bytelyst/.setup-state/ - GITEA_NPM_TOKEN auto-restored from saved state on resume
This commit is contained in:
parent
c0bc13e10a
commit
8ff9e42817
@ -14,7 +14,13 @@
|
||||
# - All @bytelyst/* packages (built + published to Gitea)
|
||||
# - Full 27-service ecosystem (via docker-compose.ecosystem.yml)
|
||||
#
|
||||
# Usage: sudo ./setup.sh
|
||||
# Usage: sudo ./setup.sh [OPTIONS]
|
||||
#
|
||||
# Options:
|
||||
# --resume Auto-resume from last completed phase
|
||||
# --resume-from=N Resume from phase N (1-8)
|
||||
# --phase=N Run ONLY phase N (useful for retrying a single phase)
|
||||
# --reset Clear phase markers and start fresh
|
||||
#
|
||||
# Optional env vars:
|
||||
# GITHUB_USER — GitHub org/user to clone from (default: saravanakumardb1)
|
||||
@ -78,6 +84,40 @@ detect_docker_host_ip() {
|
||||
ip -4 addr show docker0 2>/dev/null | grep -oP '(?<=inet\s)\d+(\.\d+){3}' || echo "172.17.0.1"
|
||||
}
|
||||
|
||||
# ── Phase tracking (resume/retry support) ──────────────────────────
|
||||
STATE_DIR="${INSTALL_DIR}/.setup-state"
|
||||
|
||||
mark_phase_done() {
|
||||
mkdir -p "$STATE_DIR"
|
||||
date -Iseconds > "${STATE_DIR}/phase${1}.done"
|
||||
}
|
||||
|
||||
is_phase_done() {
|
||||
[ -f "${STATE_DIR}/phase${1}.done" ]
|
||||
}
|
||||
|
||||
last_completed_phase() {
|
||||
local last=0
|
||||
for i in 1 2 3 4 5 6 7 8; do
|
||||
is_phase_done "$i" && last=$i
|
||||
done
|
||||
echo "$last"
|
||||
}
|
||||
|
||||
reset_phase_markers() {
|
||||
rm -rf "$STATE_DIR"
|
||||
ok "Phase markers cleared."
|
||||
}
|
||||
|
||||
# Restore GITEA_NPM_TOKEN from saved state (needed when resuming after phase 2)
|
||||
restore_gitea_token() {
|
||||
if [ -z "${GITEA_NPM_TOKEN:-}" ] && [ -f "${INSTALL_DIR}/.gitea_token" ]; then
|
||||
GITEA_NPM_TOKEN=$(cat "${INSTALL_DIR}/.gitea_token")
|
||||
export GITEA_NPM_TOKEN
|
||||
log "Restored GITEA_NPM_TOKEN from saved state."
|
||||
fi
|
||||
}
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# PHASE 1: System Dependencies
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
@ -472,31 +512,116 @@ ENV
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# PHASE 7: Deploy Ecosystem via Docker Compose
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
phase7_deploy() {
|
||||
log "Phase 7: Deploying 27-service ecosystem..."
|
||||
# All 27 compose services, grouped for ordered build + reporting.
|
||||
INFRA_SERVICES=(cosmos-emulator azurite mailpit loki grafana gateway)
|
||||
PLATFORM_SERVICES=(platform-service extraction-service mcp-server)
|
||||
DASHBOARD_SERVICES=(admin-web tracker-web)
|
||||
BACKEND_SERVICES=(
|
||||
peakpulse-backend chronomind-backend jarvisjr-backend nomgap-backend
|
||||
mindlyst-backend lysnrai-backend notelett-backend flowmonk-backend
|
||||
actiontrail-backend localmemgpt-backend
|
||||
)
|
||||
WEB_SERVICES=(
|
||||
lysnrai-dashboard chronomind-web jarvisjr-web flowmonk-web notelett-web
|
||||
mindlyst-web nomgap-web actiontrail-web localmemgpt-web
|
||||
)
|
||||
|
||||
setup_compose_env() {
|
||||
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
|
||||
cd "$plat_dir"
|
||||
|
||||
# Detect host IP for Docker builds to reach Gitea
|
||||
restore_gitea_token
|
||||
|
||||
local docker_host_ip
|
||||
docker_host_ip=$(detect_docker_host_ip)
|
||||
log " Docker host IP for Gitea access: ${docker_host_ip}"
|
||||
|
||||
# Export vars needed by compose
|
||||
export GITEA_NPM_TOKEN
|
||||
export GITEA_NPM_HOST="${docker_host_ip}"
|
||||
export DOCKER_BUILDKIT=1
|
||||
export COMPOSE_DOCKER_CLI_BUILD=1
|
||||
}
|
||||
|
||||
# Build and start all services
|
||||
log " Building and starting services (this takes ~10-15 minutes)..."
|
||||
# Build a single compose service image, return 0 on success.
|
||||
build_one_service() {
|
||||
local svc="$1"
|
||||
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
|
||||
docker compose -f "${plat_dir}/${COMPOSE_FILE}" --env-file "${plat_dir}/.env.ecosystem" \
|
||||
build "$svc" 2>&1 | tail -5
|
||||
}
|
||||
|
||||
phase7_deploy() {
|
||||
log "Phase 7: Deploying ecosystem (per-service build + fallback)..."
|
||||
|
||||
setup_compose_env
|
||||
|
||||
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
|
||||
local build_ok=() build_fail=() build_skip=()
|
||||
mkdir -p "${STATE_DIR}/builds"
|
||||
|
||||
# ── 7a: Build each service individually ──────────────────────────
|
||||
local all_services=("${INFRA_SERVICES[@]}" "${PLATFORM_SERVICES[@]}" "${DASHBOARD_SERVICES[@]}" "${BACKEND_SERVICES[@]}" "${WEB_SERVICES[@]}")
|
||||
local total=${#all_services[@]}
|
||||
local idx=0
|
||||
|
||||
log " Building ${total} service images individually..."
|
||||
echo ""
|
||||
|
||||
for svc in "${all_services[@]}"; do
|
||||
idx=$((idx + 1))
|
||||
|
||||
# Infrastructure services use pre-built images (no build step)
|
||||
local has_build
|
||||
has_build=$(docker compose -f "${plat_dir}/${COMPOSE_FILE}" config --format json 2>/dev/null \
|
||||
| jq -r ".services.\"${svc}\".build // empty" 2>/dev/null || true)
|
||||
|
||||
if [ -z "$has_build" ] || [ "$has_build" = "null" ]; then
|
||||
build_skip+=("$svc")
|
||||
ok " [${idx}/${total}] ${svc} — pre-built image (skip build)"
|
||||
continue
|
||||
fi
|
||||
|
||||
log " [${idx}/${total}] Building ${svc}..."
|
||||
if build_one_service "$svc"; then
|
||||
build_ok+=("$svc")
|
||||
date -Iseconds > "${STATE_DIR}/builds/${svc}.ok"
|
||||
ok " [${idx}/${total}] ${svc} — build OK"
|
||||
else
|
||||
build_fail+=("$svc")
|
||||
date -Iseconds > "${STATE_DIR}/builds/${svc}.fail"
|
||||
warn " [${idx}/${total}] ${svc} — BUILD FAILED (will skip)"
|
||||
fi
|
||||
done
|
||||
|
||||
# ── Build summary ────────────────────────────────────────────────
|
||||
echo ""
|
||||
log " Build results: ${#build_ok[@]} OK, ${#build_fail[@]} FAILED, ${#build_skip[@]} pre-built"
|
||||
if [ ${#build_fail[@]} -gt 0 ]; then
|
||||
warn " Failed services: ${build_fail[*]}"
|
||||
fi
|
||||
|
||||
# ── 7b: Start services (skip failed builds) ─────────────────────
|
||||
# Compose up only the services that built successfully + pre-built infra
|
||||
local start_services=()
|
||||
for svc in "${build_skip[@]}"; do start_services+=("$svc"); done
|
||||
for svc in "${build_ok[@]}"; do start_services+=("$svc"); done
|
||||
|
||||
if [ ${#start_services[@]} -eq 0 ]; then
|
||||
fail "No services to start — all builds failed."
|
||||
fi
|
||||
|
||||
log " Starting ${#start_services[@]} services..."
|
||||
docker compose \
|
||||
-f "$COMPOSE_FILE" \
|
||||
--env-file .env.ecosystem \
|
||||
up --build -d 2>&1 | tail -20
|
||||
-f "${plat_dir}/${COMPOSE_FILE}" \
|
||||
--env-file "${plat_dir}/.env.ecosystem" \
|
||||
up -d "${start_services[@]}" 2>&1 | tail -10
|
||||
|
||||
ok "Phase 7 complete. All containers started."
|
||||
if [ ${#build_fail[@]} -gt 0 ]; then
|
||||
warn "Phase 7 complete with ${#build_fail[@]} failed builds: ${build_fail[*]}"
|
||||
warn " Fix and re-run: sudo ./setup.sh --phase=7"
|
||||
else
|
||||
ok "Phase 7 complete. All ${#start_services[@]} services started."
|
||||
fi
|
||||
}
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
@ -586,7 +711,81 @@ HEALTH
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# MAIN
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
run_phase() {
|
||||
local phase_num="$1"
|
||||
case "$phase_num" in
|
||||
1) phase1_system ;;
|
||||
2) phase2_gitea ;;
|
||||
3) phase3_clone ;;
|
||||
4) phase4_build ;;
|
||||
5) phase5_publish ;;
|
||||
6) phase6_env ;;
|
||||
7) phase7_deploy ;;
|
||||
8) phase8_verify ;;
|
||||
*) fail "Unknown phase: $phase_num" ;;
|
||||
esac
|
||||
mark_phase_done "$phase_num"
|
||||
}
|
||||
|
||||
usage() {
|
||||
echo "Usage: sudo ./setup.sh [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --resume Auto-resume from last completed phase"
|
||||
echo " --resume-from=N Resume starting at phase N (1-8)"
|
||||
echo " --phase=N Run ONLY phase N"
|
||||
echo " --reset Clear phase markers and start fresh"
|
||||
echo " --status Show completed phases and exit"
|
||||
echo " -h, --help Show this help"
|
||||
echo ""
|
||||
echo "Phases:"
|
||||
echo " 1 System dependencies (Docker, Node, pnpm, Ollama)"
|
||||
echo " 2 Gitea npm registry"
|
||||
echo " 3 Clone repositories"
|
||||
echo " 4 Build @bytelyst/* packages"
|
||||
echo " 5 Publish packages to Gitea"
|
||||
echo " 6 Generate .env.ecosystem"
|
||||
echo " 7 Build + deploy Docker services (per-service, with fallback)"
|
||||
echo " 8 Health check"
|
||||
}
|
||||
|
||||
main() {
|
||||
# Parse CLI arguments
|
||||
local mode="full" start_phase=1 only_phase=0
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--resume)
|
||||
mode="resume" ;;
|
||||
--resume-from=*)
|
||||
mode="resume-from"
|
||||
start_phase="${arg#*=}" ;;
|
||||
--phase=*)
|
||||
mode="single"
|
||||
only_phase="${arg#*=}" ;;
|
||||
--reset)
|
||||
mkdir -p "$INSTALL_DIR"
|
||||
reset_phase_markers
|
||||
exit 0 ;;
|
||||
--status)
|
||||
mkdir -p "$INSTALL_DIR"
|
||||
echo "Phase completion status:"
|
||||
for i in 1 2 3 4 5 6 7 8; do
|
||||
if is_phase_done "$i"; then
|
||||
echo " Phase $i: DONE ($(cat "${STATE_DIR}/phase${i}.done"))"
|
||||
else
|
||||
echo " Phase $i: pending"
|
||||
fi
|
||||
done
|
||||
exit 0 ;;
|
||||
-h|--help)
|
||||
usage; exit 0 ;;
|
||||
*)
|
||||
warn "Unknown option: $arg"
|
||||
usage; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Tee all output to a log file so SSH disconnection doesn't lose context
|
||||
mkdir -p "$INSTALL_DIR"
|
||||
exec > >(tee -a "${INSTALL_DIR}/setup.log") 2>&1
|
||||
@ -606,16 +805,46 @@ main() {
|
||||
|
||||
log "Target OS: $(lsb_release -ds 2>/dev/null || cat /etc/os-release | grep PRETTY_NAME | cut -d= -f2 | tr -d '"')"
|
||||
log "Target arch: $(uname -m)"
|
||||
|
||||
# ── Single-phase mode ────────────────────────────────────────────
|
||||
if [ "$mode" = "single" ]; then
|
||||
log "Running ONLY phase ${only_phase}..."
|
||||
restore_gitea_token
|
||||
run_phase "$only_phase"
|
||||
ok "Phase ${only_phase} complete."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# ── Auto-resume mode ─────────────────────────────────────────────
|
||||
if [ "$mode" = "resume" ]; then
|
||||
local last
|
||||
last=$(last_completed_phase)
|
||||
if [ "$last" -eq 0 ]; then
|
||||
log "No completed phases found. Starting from phase 1."
|
||||
start_phase=1
|
||||
elif [ "$last" -ge 8 ]; then
|
||||
ok "All phases already completed. Use --reset to start over."
|
||||
exit 0
|
||||
else
|
||||
start_phase=$((last + 1))
|
||||
log "Resuming from phase ${start_phase} (phases 1-${last} already done)."
|
||||
fi
|
||||
elif [ "$mode" = "resume-from" ]; then
|
||||
log "Resuming from phase ${start_phase} (as requested)."
|
||||
fi
|
||||
|
||||
# Restore token if resuming past phase 2
|
||||
if [ "$start_phase" -gt 2 ]; then
|
||||
restore_gitea_token
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
phase1_system
|
||||
phase2_gitea
|
||||
phase3_clone
|
||||
phase4_build
|
||||
phase5_publish
|
||||
phase6_env
|
||||
phase7_deploy
|
||||
phase8_verify
|
||||
# ── Run phases ───────────────────────────────────────────────────
|
||||
for phase_num in 1 2 3 4 5 6 7 8; do
|
||||
[ "$phase_num" -ge "$start_phase" ] || continue
|
||||
run_phase "$phase_num"
|
||||
done
|
||||
|
||||
local elapsed=$(( $(date +%s) - start_time ))
|
||||
local minutes=$(( elapsed / 60 ))
|
||||
@ -627,6 +856,8 @@ main() {
|
||||
echo "║ ║"
|
||||
echo "║ Health check: /opt/bytelyst/check-health.sh ║"
|
||||
echo "║ Compose logs: docker compose -f ${COMPOSE_FILE} logs -f ║"
|
||||
echo "║ Retry failed: sudo ./setup.sh --phase=7 ║"
|
||||
echo "║ Resume: sudo ./setup.sh --resume ║"
|
||||
echo "║ Gitea UI: http://localhost:3300 ║"
|
||||
echo "║ Ollama API: http://localhost:11434 ║"
|
||||
echo "║ Grafana: http://localhost:3000 (admin / bytelyst) ║"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user