learning_ai_common_plat/docs/devops/single_azure_vm/setup.sh
saravanakumardb1 f78d382d62 fix(infra): add Azurite + Loki to health check script
- Azurite blob storage (:10000) was missing from check-health.sh
- Loki log aggregation (:3100/ready) was missing from check-health.sh
- Now covers all 30 compose services + Gitea + Ollama = 32 endpoints
2026-03-24 13:08:12 -07:00

974 lines
38 KiB
Bash
Executable File

#!/usr/bin/env bash
# ═══════════════════════════════════════════════════════════════════════
# ByteLyst Single-VM Bootstrap Script
# ═══════════════════════════════════════════════════════════════════════
# Deploys the ENTIRE ByteLyst ecosystem on a **raw** Ubuntu Azure VM.
# Installs ALL dependencies from scratch — nothing pre-installed required.
#
# What gets installed:
# - Docker CE + Docker Compose + BuildKit
# - Node.js 22 LTS + pnpm 10.6.5
# - Gitea (Docker container — npm package registry on :3300)
# - Ollama (local LLM inference for LocalMemGPT on :11434)
# - All 11 ByteLyst repos (cloned from GitHub)
# - All @bytelyst/* packages (built + published to Gitea)
# - Full 30-service ecosystem (via docker-compose.ecosystem.yml)
#
# Usage: sudo ./setup.sh [OPTIONS]
#
# Options:
# --resume Auto-resume from last completed phase
# --resume-from=N Resume from phase N (1-8)
# --phase=N Run ONLY phase N (useful for retrying a single phase)
# --reset Clear phase markers and start fresh
# --status Show completed phases and exit
# -h, --help Show usage help
#
# Phases:
# 1 System dependencies (Docker, Node, pnpm, Ollama)
# 2 Gitea npm registry (container on :3300)
# 3 Clone 11 repositories from GitHub
# 4 Build all @bytelyst/* packages
# 5 Publish packages to Gitea npm registry
# 6 Generate .env.ecosystem config
# 7 Build + deploy 30 Docker services (per-service, with fallback)
# 8 Health check (30 endpoints)
#
# Examples:
# sudo ./setup.sh # Fresh install (all 8 phases)
# sudo ./setup.sh --phase=7 # Retry just the deploy phase
# sudo ./setup.sh --resume # Auto-resume after SSH disconnect
# sudo ./setup.sh --resume-from=7 # Jump to deploy after manual fix
# sudo ./setup.sh --status # Check what's done
# sudo ./setup.sh --reset # Start completely over
#
# Resume/retry:
# Phase completion is tracked in /opt/bytelyst/.setup-state/.
# If a phase fails, fix the issue and re-run with --resume or --phase=N.
# Phase 7 builds each service individually — failed builds are skipped,
# remaining services still start. Build logs: /opt/bytelyst/.setup-state/builds/
#
# Optional env vars:
# GITHUB_USER — GitHub org/user to clone from (default: saravanakumardb1)
# GITHUB_TOKEN — If repos are private, set this for HTTPS auth
# GITEA_ADMIN — Gitea admin username (default: bytelyst-admin)
# GITEA_PASS — Gitea admin password (default: ByteLyst2026!)
# OLLAMA_MODEL — Default LLM model to pull (default: llama3.2:3b)
# SKIP_CLONE — Set to 1 to skip cloning (repos already exist)
# SKIP_BUILD — Set to 1 to skip package build+publish
# ═══════════════════════════════════════════════════════════════════════
set -euo pipefail
# ── Configuration ────────────────────────────────────────────────────
INSTALL_DIR="/opt/bytelyst"
GITHUB_USER="${GITHUB_USER:-saravanakumardb1}"
GITEA_ADMIN="${GITEA_ADMIN:-bytelyst-admin}"
GITEA_PASS="${GITEA_PASS:-ByteLyst2026!}"
GITEA_PORT=3300
NODE_VERSION=22
PNPM_VERSION="10.6.5"
COMPOSE_FILE="docker-compose.ecosystem.yml"
OLLAMA_MODEL="${OLLAMA_MODEL:-llama3.2:3b}"
# Well-known emulator keys (public, safe to embed)
COSMOS_EMULATOR_KEY="C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw=="
AZURITE_KEY="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
REPOS=(
learning_ai_common_plat
learning_voice_ai_agent
learning_multimodal_memory_agents
learning_ai_clock
learning_ai_jarvis_jr
learning_ai_fastgap
learning_ai_peakpulse
learning_ai_flowmonk
learning_ai_notes
learning_ai_trails
learning_ai_local_memory_gpt
)
# ── Helpers ──────────────────────────────────────────────────────────
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'; NC='\033[0m'
log() { echo -e "${CYAN}[$(date +%H:%M:%S)]${NC} $*"; }
ok() { echo -e "${GREEN}[$(date +%H:%M:%S)] ✓${NC} $*"; }
warn() { echo -e "${YELLOW}[$(date +%H:%M:%S)] ⚠${NC} $*"; }
fail() { echo -e "${RED}[$(date +%H:%M:%S)] ✗${NC} $*"; exit 1; }
wait_for_url() {
local url="$1" max="${2:-60}" i=0
while ! curl -sf "$url" > /dev/null 2>&1; do
sleep 2; i=$((i + 2))
[ "$i" -ge "$max" ] && fail "Timeout waiting for $url"
done
}
# Detect the host IP that Docker containers can reach
detect_docker_host_ip() {
# On Linux, the Docker bridge gateway (172.17.0.1) is reachable from containers
ip -4 addr show docker0 2>/dev/null | grep -oP '(?<=inet\s)\d+(\.\d+){3}' || echo "172.17.0.1"
}
# ── Phase tracking (resume/retry support) ──────────────────────────
STATE_DIR="${INSTALL_DIR}/.setup-state"
mark_phase_done() {
mkdir -p "$STATE_DIR"
date -Iseconds > "${STATE_DIR}/phase${1}.done"
}
is_phase_done() {
[ -f "${STATE_DIR}/phase${1}.done" ]
}
last_completed_phase() {
local last=0
for i in 1 2 3 4 5 6 7 8; do
# Stop at first gap — phases must be sequential
is_phase_done "$i" && last=$i || break
done
echo "$last"
}
reset_phase_markers() {
rm -rf "$STATE_DIR"
ok "Phase markers cleared."
}
# Restore GITEA_NPM_TOKEN from saved state (needed when resuming after phase 2)
restore_gitea_token() {
if [ -z "${GITEA_NPM_TOKEN:-}" ] && [ -f "${INSTALL_DIR}/.gitea_token" ]; then
GITEA_NPM_TOKEN=$(cat "${INSTALL_DIR}/.gitea_token")
export GITEA_NPM_TOKEN
log "Restored GITEA_NPM_TOKEN from saved state."
fi
}
# ═══════════════════════════════════════════════════════════════════════
# PHASE 1: System Dependencies
# ═══════════════════════════════════════════════════════════════════════
phase1_system() {
log "Phase 1: Installing system dependencies..."
export DEBIAN_FRONTEND=noninteractive
# Update package index
apt-get update -qq
# Install essentials
apt-get install -y -qq \
ca-certificates curl gnupg lsb-release git jq unzip iproute2 \
build-essential python3
# ── Docker ─────────────────────────────────────────────────────────
if ! command -v docker &>/dev/null; then
log "Installing Docker..."
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg \
| gpg --batch --yes --dearmor -o /etc/apt/keyrings/docker.gpg
chmod a+r /etc/apt/keyrings/docker.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" \
> /etc/apt/sources.list.d/docker.list
apt-get update -qq
apt-get install -y -qq docker-ce docker-ce-cli containerd.io docker-compose-plugin docker-buildx-plugin
systemctl enable --now docker
ok "Docker installed: $(docker --version)"
else
ok "Docker already installed: $(docker --version)"
fi
# Enable BuildKit globally
mkdir -p /etc/docker
cat > /etc/docker/daemon.json <<'DJSON'
{
"features": { "buildkit": true },
"log-driver": "json-file",
"log-opts": { "max-size": "50m", "max-file": "3" }
}
DJSON
systemctl restart docker
# ── Node.js ────────────────────────────────────────────────────────
if ! command -v node &>/dev/null || ! node -v | grep -q "v${NODE_VERSION}"; then
log "Installing Node.js ${NODE_VERSION}..."
# Modern NodeSource method (GPG key + apt source — the curl|bash setup_XX.x scripts are deprecated)
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
| gpg --batch --yes --dearmor -o /etc/apt/keyrings/nodesource.gpg
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" \
> /etc/apt/sources.list.d/nodesource.list
apt-get update -qq
apt-get install -y -qq nodejs
ok "Node.js installed: $(node -v)"
else
ok "Node.js already installed: $(node -v)"
fi
# ── pnpm ───────────────────────────────────────────────────────────
if ! command -v pnpm &>/dev/null; then
log "Installing pnpm ${PNPM_VERSION}..."
npm install -g "pnpm@${PNPM_VERSION}"
ok "pnpm installed: $(pnpm -v)"
else
ok "pnpm already installed: $(pnpm -v)"
fi
# ── Ollama (local LLM inference) ──────────────────────────────────
if ! command -v ollama &>/dev/null; then
log "Installing Ollama..."
curl -fsSL https://ollama.com/install.sh | sh
ok "Ollama installed: $(ollama --version 2>&1 || echo 'ok')"
else
ok "Ollama already installed: $(ollama --version 2>&1 || echo 'ok')"
fi
# Start Ollama service (systemd)
if ! systemctl is-active --quiet ollama 2>/dev/null; then
systemctl enable --now ollama 2>/dev/null || true
# Fallback: start manually if not using systemd
if ! curl -sf http://localhost:11434/api/version > /dev/null 2>&1; then
nohup ollama serve > /var/log/ollama.log 2>&1 &
sleep 3
fi
fi
# Wait for Ollama API
log "Waiting for Ollama API..."
wait_for_url "http://localhost:11434/api/version" 30
# Pull default model (non-fatal — LocalMemGPT needs it but other products don't)
log "Pulling Ollama model: ${OLLAMA_MODEL} (this may take a few minutes)..."
if ollama pull "$OLLAMA_MODEL" 2>&1 | tail -3; then
ok "Ollama ready with model: ${OLLAMA_MODEL}"
else
warn "Ollama model pull failed (network issue?). LocalMemGPT may not work."
warn " Retry later: ollama pull ${OLLAMA_MODEL}"
fi
# ── Create install directory ───────────────────────────────────────
mkdir -p "$INSTALL_DIR"
ok "Phase 1 complete."
}
# ═══════════════════════════════════════════════════════════════════════
# PHASE 2: Gitea (npm Registry)
# ═══════════════════════════════════════════════════════════════════════
phase2_gitea() {
log "Phase 2: Setting up Gitea npm registry on port ${GITEA_PORT}..."
local GITEA_CONTAINER="gitea-npm-registry"
# Check if already running
if docker ps --format '{{.Names}}' | grep -q "^${GITEA_CONTAINER}$"; then
ok "Gitea already running."
else
# Remove stopped container if exists
docker rm -f "$GITEA_CONTAINER" 2>/dev/null || true
docker run -d \
--name "$GITEA_CONTAINER" \
--restart unless-stopped \
-p "${GITEA_PORT}:3000" \
-v gitea-data:/data \
-e GITEA__server__ROOT_URL="http://localhost:${GITEA_PORT}/" \
-e GITEA__server__HTTP_PORT=3000 \
-e GITEA__packages__ENABLED=true \
-e INSTALL_LOCK=true \
-e GITEA__security__INSTALL_LOCK=true \
gitea/gitea:1.22
ok "Gitea container started."
fi
# Wait for Gitea to become ready
log "Waiting for Gitea to start..."
wait_for_url "http://localhost:${GITEA_PORT}/api/v1/version" 90
# ── Create admin user (idempotent) ─────────────────────────────────
log "Creating Gitea admin user..."
docker exec "$GITEA_CONTAINER" gitea admin user create \
--username "$GITEA_ADMIN" \
--password "$GITEA_PASS" \
--email "admin@bytelyst.local" \
--admin \
--must-change-password=false 2>/dev/null || true
# ── Create "bytelyst" organization ─────────────────────────────────
local GITEA_URL="http://localhost:${GITEA_PORT}"
local AUTH_HEADER="Authorization: Basic $(echo -n "${GITEA_ADMIN}:${GITEA_PASS}" | base64)"
# Check if org exists
local org_status
org_status=$(curl -sf -o /dev/null -w "%{http_code}" \
-H "$AUTH_HEADER" "${GITEA_URL}/api/v1/orgs/bytelyst")
if [ "$org_status" != "200" ]; then
curl -sf -X POST "${GITEA_URL}/api/v1/orgs" \
-H "$AUTH_HEADER" \
-H "Content-Type: application/json" \
-d '{"username":"bytelyst","visibility":"public"}' > /dev/null
ok "Created 'bytelyst' organization."
else
ok "'bytelyst' organization already exists."
fi
# ── Create API token ───────────────────────────────────────────────
# Delete old token if exists, then create fresh
curl -sf -X DELETE "${GITEA_URL}/api/v1/users/${GITEA_ADMIN}/tokens/vm-deploy" \
-H "$AUTH_HEADER" > /dev/null 2>&1 || true
local token_response
token_response=$(curl -sf -X POST "${GITEA_URL}/api/v1/users/${GITEA_ADMIN}/tokens" \
-H "$AUTH_HEADER" \
-H "Content-Type: application/json" \
-d '{"name":"vm-deploy","scopes":["write:package","read:package","write:organization","read:organization"]}')
GITEA_NPM_TOKEN=$(echo "$token_response" | jq -r '.sha1 // .token' 2>/dev/null || echo "")
if [ -z "$GITEA_NPM_TOKEN" ] || [ "$GITEA_NPM_TOKEN" = "null" ]; then
fail "Failed to create Gitea API token. Response: $token_response"
fi
# Export for later phases
export GITEA_NPM_TOKEN
echo "$GITEA_NPM_TOKEN" > "${INSTALL_DIR}/.gitea_token"
chmod 600 "${INSTALL_DIR}/.gitea_token"
ok "Phase 2 complete. Gitea running at http://localhost:${GITEA_PORT}"
}
# ═══════════════════════════════════════════════════════════════════════
# PHASE 3: Clone Repositories
# ═══════════════════════════════════════════════════════════════════════
phase3_clone() {
if [ "${SKIP_CLONE:-0}" = "1" ]; then
warn "Skipping clone (SKIP_CLONE=1)."
return
fi
log "Phase 3: Cloning ${#REPOS[@]} repositories..."
local clone_base="https://github.com/${GITHUB_USER}"
if [ -n "${GITHUB_TOKEN:-}" ]; then
clone_base="https://${GITHUB_TOKEN}@github.com/${GITHUB_USER}"
fi
for repo in "${REPOS[@]}"; do
local target="${INSTALL_DIR}/${repo}"
if [ -d "$target/.git" ]; then
log " Pulling latest: $repo"
git -C "$target" pull --ff-only 2>/dev/null || true
else
log " Cloning: $repo"
git clone --depth 1 "$clone_base/${repo}.git" "$target"
fi
done
# NOTE: Corporate proxy was removed from all Dockerfiles at source
# (commit fix(docker) across all 10 product repos + dashboards).
# No runtime sed stripping needed.
ok "Phase 3 complete. All repos in ${INSTALL_DIR}/"
}
# ═══════════════════════════════════════════════════════════════════════
# PHASE 4: Build @bytelyst/* Packages
# ═══════════════════════════════════════════════════════════════════════
phase4_build() {
if [ "${SKIP_BUILD:-0}" = "1" ]; then
warn "Skipping build (SKIP_BUILD=1)."
return
fi
log "Phase 4: Building @bytelyst/* packages..."
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
# Configure .npmrc for the common-plat workspace (publish target)
cat > "${plat_dir}/.npmrc" <<NPMRC
@bytelyst:registry=http://localhost:${GITEA_PORT}/api/packages/bytelyst/npm/
//localhost:${GITEA_PORT}/api/packages/bytelyst/npm/:_authToken=${GITEA_NPM_TOKEN}
strict-ssl=false
NPMRC
cd "$plat_dir"
# Install workspace deps (no --frozen-lockfile: shallow clones may have drift)
log " Installing workspace dependencies..."
if ! pnpm install 2>&1 | tail -3; then
fail "pnpm install failed. Check ${INSTALL_DIR}/setup.log for full output."
fi
# Build all packages
log " Building all packages..."
if ! pnpm -r --if-present build 2>&1 | tail -5; then
fail "pnpm build failed. Check ${INSTALL_DIR}/setup.log for full output."
fi
ok "Phase 4 complete. All packages built."
}
# ═══════════════════════════════════════════════════════════════════════
# PHASE 5: Publish Packages to Gitea npm Registry
# ═══════════════════════════════════════════════════════════════════════
phase5_publish() {
if [ "${SKIP_BUILD:-0}" = "1" ]; then
warn "Skipping publish (SKIP_BUILD=1)."
return
fi
log "Phase 5: Publishing @bytelyst/* packages to Gitea..."
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
cd "$plat_dir"
local published=0 skipped=0
local registry_url="http://localhost:${GITEA_PORT}/api/packages/bytelyst/npm/"
for pkg_dir in packages/*/; do
local pkg_json="${pkg_dir}package.json"
[ -f "$pkg_json" ] || continue
local pkg_name
pkg_name=$(jq -r '.name // ""' "$pkg_json")
# Skip non-@bytelyst packages and private packages
[[ "$pkg_name" == @bytelyst/* ]] || continue
[ "$(jq -r '.private // false' "$pkg_json")" = "true" ] && continue
# Skip packages without a build output
[ -d "${pkg_dir}dist" ] || {
skipped=$((skipped + 1))
continue
}
# Single publish attempt — "already exists" (409) errors are expected and OK
local pub_output
if pub_output=$(cd "$pkg_dir" && pnpm publish --registry "$registry_url" --no-git-checks 2>&1); then
published=$((published + 1))
elif echo "$pub_output" | grep -qi 'already exists\|409\|conflict'; then
published=$((published + 1))
else
warn " Failed to publish ${pkg_name}: $(echo "$pub_output" | tail -1)"
skipped=$((skipped + 1))
fi
done
ok "Phase 5 complete. Published: ${published}, Skipped: ${skipped}"
}
# ═══════════════════════════════════════════════════════════════════════
# PHASE 6: Generate .env.ecosystem
# ═══════════════════════════════════════════════════════════════════════
phase6_env() {
log "Phase 6: Generating .env.ecosystem..."
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
local env_file="${plat_dir}/.env.ecosystem"
# Generate a random JWT secret
local jwt_secret
jwt_secret=$(openssl rand -base64 32)
cat > "$env_file" <<ENV
# ── Auto-generated by setup.sh on $(date -Iseconds) ──────────────────
# Cosmos DB Emulator
COSMOS_ENDPOINT=http://cosmos-emulator:8081
COSMOS_KEY=${COSMOS_EMULATOR_KEY}
COSMOS_DATABASE=bytelyst
# Auth
JWT_SECRET=${jwt_secret}
RATE_LIMIT_STORE_MODE=datastore
# Azure Blob Storage (Azurite)
STORAGE_PROVIDER=azure
AZURE_BLOB_CONNECTION_STRING=DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=${AZURITE_KEY};BlobEndpoint=http://azurite:10000/devstoreaccount1;
AZURE_BLOB_ACCOUNT_NAME=devstoreaccount1
AZURE_BLOB_ACCOUNT_KEY=${AZURITE_KEY}
AZURE_BLOB_PUBLIC_ENDPOINT=http://localhost:10000/devstoreaccount1
# Email (Mailpit)
EMAIL_PROVIDER=smtp
EMAIL_FROM_ADDRESS=noreply@bytelyst.local
EMAIL_FROM_NAME=ByteLyst
SMTP_HOST=mailpit
SMTP_PORT=1025
SMTP_SECURE=false
SMTP_USER=
SMTP_PASSWORD=
# Stripe (test placeholders)
STRIPE_SECRET_KEY=sk_test_placeholder
STRIPE_WEBHOOK_SECRET=whsec_placeholder
STRIPE_PRICE_PRO=price_placeholder
STRIPE_PRICE_ENTERPRISE=price_placeholder
# Extraction Service
PYTHON_SIDECAR_URL=http://localhost:4006
DEFAULT_MODEL_ID=gemini-2.5-flash
GEMINI_API_KEY=placeholder
EXTRACTION_QUEUE_BACKEND=file
EXTRACTION_QUEUE_FILE=.data/extraction-jobs.json
# Cross-service URLs
PLATFORM_SERVICE_URL=http://platform-service:4003
EXTRACTION_SERVICE_URL=http://extraction-service:4005
MCP_SERVER_URL=http://mcp-server:4007
# Datastore
DB_PROVIDER=cosmos
# Telemetry
TELEMETRY_ENABLED=true
# Event Bus
EVENT_BUS_BACKEND=file
EVENT_BUS_FILE=.data/platform-events.json
# Field Encryption
FIELD_ENCRYPT_KEY_PROVIDER=memory
# Product Identity
DEFAULT_PRODUCT_ID=lysnrai
# Webhooks (disabled)
WEBHOOK_INVITATION_REDEEMED_URL=
WEBHOOK_REFERRAL_STATUS_URL=
WEBHOOK_WAITLIST_JOINED_URL=
# Notifications (disabled)
TELEGRAM_BOT_TOKEN=
TELEGRAM_DEFAULT_CHAT_ID=
SLACK_WEBHOOK_URL=
SLACK_DEFAULT_CHANNEL=
ENV
ok "Phase 6 complete. Generated ${env_file}"
}
# ═══════════════════════════════════════════════════════════════════════
# PHASE 7: Deploy Ecosystem via Docker Compose
# ═══════════════════════════════════════════════════════════════════════
# All 27 compose services, grouped for ordered build + reporting.
INFRA_SERVICES=(cosmos-emulator azurite mailpit loki grafana gateway)
PLATFORM_SERVICES=(platform-service extraction-service mcp-server)
DASHBOARD_SERVICES=(admin-web tracker-web)
BACKEND_SERVICES=(
peakpulse-backend chronomind-backend jarvisjr-backend nomgap-backend
mindlyst-backend lysnrai-backend notelett-backend flowmonk-backend
actiontrail-backend localmemgpt-backend
)
WEB_SERVICES=(
lysnrai-dashboard chronomind-web jarvisjr-web flowmonk-web notelett-web
mindlyst-web nomgap-web actiontrail-web localmemgpt-web
)
setup_compose_env() {
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
cd "$plat_dir"
restore_gitea_token
local docker_host_ip
docker_host_ip=$(detect_docker_host_ip)
log " Docker host IP for Gitea access: ${docker_host_ip}"
export GITEA_NPM_TOKEN
export GITEA_NPM_HOST="${docker_host_ip}"
export DOCKER_BUILDKIT=1
export COMPOSE_DOCKER_CLI_BUILD=1
}
# Build a single compose service image, return 0 on success.
# Full build log saved to STATE_DIR/builds/<svc>.log for debugging.
build_one_service() {
local svc="$1"
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
local log_file="${STATE_DIR}/builds/${svc}.log"
if docker compose -f "${plat_dir}/${COMPOSE_FILE}" --env-file "${plat_dir}/.env.ecosystem" \
build "$svc" > "$log_file" 2>&1; then
tail -3 "$log_file"
return 0
else
tail -5 "$log_file"
return 1
fi
}
phase7_deploy() {
log "Phase 7: Deploying ecosystem (per-service build + fallback)..."
# Free RAM: stop Ollama during Docker builds (Phase 1 will restart it, or we do at end)
if systemctl is-active --quiet ollama 2>/dev/null; then
log " Stopping Ollama to free RAM for Docker builds..."
systemctl stop ollama 2>/dev/null || true
fi
setup_compose_env
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
# Guard: .env.ecosystem must exist (created by phase 6)
if [ ! -f "${plat_dir}/.env.ecosystem" ]; then
fail "Missing ${plat_dir}/.env.ecosystem — run phase 6 first: sudo ./setup.sh --phase=6"
fi
local build_ok=() build_fail=() build_skip=()
mkdir -p "${STATE_DIR}/builds"
# ── 7a: Build each service individually ──────────────────────────
local all_services=("${INFRA_SERVICES[@]}" "${PLATFORM_SERVICES[@]}" "${DASHBOARD_SERVICES[@]}" "${BACKEND_SERVICES[@]}" "${WEB_SERVICES[@]}")
local total=${#all_services[@]}
local idx=0
log " Building ${total} service images individually..."
echo ""
# Cache compose config JSON once (avoid calling config 30 times)
local compose_json
compose_json=$(docker compose -f "${plat_dir}/${COMPOSE_FILE}" \
--env-file "${plat_dir}/.env.ecosystem" config --format json 2>/dev/null || true)
if [ -z "$compose_json" ]; then
warn " Could not parse compose config (docker compose config --format json failed)."
warn " Per-service build detection disabled — all services will attempt to build."
fi
for svc in "${all_services[@]}"; do
idx=$((idx + 1))
# Infrastructure services use pre-built images (no build step)
local has_build=""
if [ -n "$compose_json" ]; then
has_build=$(echo "$compose_json" | jq -r ".services.\"${svc}\".build // empty" 2>/dev/null || true)
fi
if [ -z "$has_build" ] || [ "$has_build" = "null" ]; then
build_skip+=("$svc")
ok " [${idx}/${total}] ${svc} — pre-built image (skip build)"
continue
fi
log " [${idx}/${total}] Building ${svc}..."
if build_one_service "$svc"; then
build_ok+=("$svc")
date -Iseconds > "${STATE_DIR}/builds/${svc}.ok"
ok " [${idx}/${total}] ${svc} — build OK"
else
build_fail+=("$svc")
date -Iseconds > "${STATE_DIR}/builds/${svc}.fail"
warn " [${idx}/${total}] ${svc} — BUILD FAILED (will skip)"
fi
done
# ── Build summary ────────────────────────────────────────────────
echo ""
log " Build results: ${#build_ok[@]} OK, ${#build_fail[@]} FAILED, ${#build_skip[@]} pre-built"
if [ ${#build_fail[@]} -gt 0 ]; then
warn " Failed services: ${build_fail[*]}"
fi
# ── 7b: Start services (skip failed builds) ─────────────────────
# Compose up only the services that built successfully + pre-built infra
local start_services=()
for svc in "${build_skip[@]}"; do start_services+=("$svc"); done
for svc in "${build_ok[@]}"; do start_services+=("$svc"); done
if [ ${#start_services[@]} -eq 0 ]; then
fail "No services to start — all builds failed."
fi
log " Starting ${#start_services[@]} services..."
# Use || true so set -e doesn't abort before we print the summary
docker compose \
-f "${plat_dir}/${COMPOSE_FILE}" \
--env-file "${plat_dir}/.env.ecosystem" \
up -d "${start_services[@]}" 2>&1 | tail -10 || true
# Restart Ollama (stopped at start of phase 7 to free RAM)
if command -v ollama &>/dev/null; then
log " Restarting Ollama..."
systemctl start ollama 2>/dev/null || nohup ollama serve > /var/log/ollama.log 2>&1 &
fi
if [ ${#build_fail[@]} -gt 0 ]; then
# Signal to run_phase() that phase 7 should NOT be marked done
PHASE7_HAD_FAILURES=1
warn "Phase 7 complete with ${#build_fail[@]} failed builds: ${build_fail[*]}"
warn " Fix and re-run: sudo ./setup.sh --phase=7"
else
PHASE7_HAD_FAILURES=0
ok "Phase 7 complete. All ${#start_services[@]} services started."
fi
}
# ═══════════════════════════════════════════════════════════════════════
# PHASE 8: Health Check
# ═══════════════════════════════════════════════════════════════════════
phase8_verify() {
log "Phase 8: Verifying service health..."
local plat_dir="${INSTALL_DIR}/learning_ai_common_plat"
# Wait for platform-service (everything else depends on it)
log " Waiting for platform-service..."
wait_for_url "http://localhost:4003/health" 120
# Create a reusable health-check script
cat > "${INSTALL_DIR}/check-health.sh" <<'HEALTH'
#!/usr/bin/env bash
RED='\033[0;31m'; GREEN='\033[0;32m'; NC='\033[0m'
check() {
local name="$1" url="$2"
if curl -sf "$url" > /dev/null 2>&1; then
echo -e "${GREEN} ✓ ${name}${NC} ${url}"
else
echo -e "${RED} ✗ ${name}${NC} ${url}"
fi
}
echo ""
echo "═══ Infrastructure ═══"
check "Gitea (npm)" "http://localhost:3300/api/v1/version"
check "Ollama (LLM)" "http://localhost:11434/api/version"
check "Cosmos Explorer" "http://localhost:1234"
check "Azurite (Blob)" "http://localhost:10000/devstoreaccount1?comp=list"
check "Mailpit" "http://localhost:8025"
check "Loki" "http://localhost:3100/ready"
check "Grafana" "http://localhost:3000/api/health"
check "Traefik" "http://localhost:8080/api/overview"
echo ""
echo "═══ Platform Services ═══"
check "platform-service" "http://localhost:4003/health"
check "extraction-service" "http://localhost:4005/health"
check "mcp-server" "http://localhost:4007/health"
echo ""
echo "═══ Dashboards ═══"
check "admin-web" "http://localhost:3001"
check "tracker-web" "http://localhost:3003"
echo ""
echo "═══ Product Backends ═══"
check "peakpulse" "http://localhost:4010/health"
check "chronomind" "http://localhost:4011/health"
check "jarvisjr" "http://localhost:4012/health"
check "nomgap" "http://localhost:4013/health"
check "mindlyst" "http://localhost:4014/health"
check "lysnrai" "http://localhost:4015/health"
check "notelett" "http://localhost:4016/health"
check "flowmonk" "http://localhost:4017/health"
check "actiontrail" "http://localhost:4018/health"
check "localmemgpt" "http://localhost:4019/health"
echo ""
echo "═══ Product Web Apps ═══"
check "lysnrai-dashboard" "http://localhost:3002"
check "chronomind-web" "http://localhost:3030"
check "jarvisjr-web" "http://localhost:3035"
check "flowmonk-web" "http://localhost:3040"
check "notelett-web" "http://localhost:3045"
check "mindlyst-web" "http://localhost:3050"
check "nomgap-web" "http://localhost:3055"
check "actiontrail-web" "http://localhost:3060"
check "localmemgpt-web" "http://localhost:3070"
echo ""
HEALTH
chmod +x "${INSTALL_DIR}/check-health.sh"
# Give services a moment to start, then run health check
log " Waiting 30s for services to stabilize..."
sleep 30
# Run the health check
bash "${INSTALL_DIR}/check-health.sh"
ok "Phase 8 complete."
}
# ═══════════════════════════════════════════════════════════════════════
# MAIN
# ═══════════════════════════════════════════════════════════════════════
PHASE7_HAD_FAILURES=0
run_phase() {
local phase_num="$1"
case "$phase_num" in
1) phase1_system ;;
2) phase2_gitea ;;
3) phase3_clone ;;
4) phase4_build ;;
5) phase5_publish ;;
6) phase6_env ;;
7) phase7_deploy ;;
8) phase8_verify ;;
*) fail "Unknown phase: $phase_num" ;;
esac
# Don't mark phase 7 done if there were build failures (--resume should retry it)
if [ "$phase_num" -eq 7 ] && [ "$PHASE7_HAD_FAILURES" -eq 1 ]; then
warn "Phase 7 NOT marked done (build failures). --resume will retry it."
return
fi
mark_phase_done "$phase_num"
}
usage() {
echo "Usage: sudo ./setup.sh [OPTIONS]"
echo ""
echo "Options:"
echo " --resume Auto-resume from last completed phase"
echo " --resume-from=N Resume starting at phase N (1-8)"
echo " --phase=N Run ONLY phase N"
echo " --reset Clear phase markers and start fresh"
echo " --status Show completed phases and exit"
echo " -h, --help Show this help"
echo ""
echo "Phases:"
echo " 1 System dependencies (Docker, Node, pnpm, Ollama)"
echo " 2 Gitea npm registry"
echo " 3 Clone repositories"
echo " 4 Build @bytelyst/* packages"
echo " 5 Publish packages to Gitea"
echo " 6 Generate .env.ecosystem"
echo " 7 Build + deploy Docker services (per-service, with fallback)"
echo " 8 Health check"
}
main() {
# Parse CLI arguments
local mode="full" start_phase=1 only_phase=0
for arg in "$@"; do
case "$arg" in
--resume)
mode="resume" ;;
--resume-from=*)
mode="resume-from"
start_phase="${arg#*=}" ;;
--phase=*)
mode="single"
only_phase="${arg#*=}" ;;
--reset)
mkdir -p "$INSTALL_DIR"
reset_phase_markers
exit 0 ;;
--status)
mkdir -p "$INSTALL_DIR"
echo "Phase completion status:"
for i in 1 2 3 4 5 6 7 8; do
if is_phase_done "$i"; then
echo " Phase $i: DONE ($(cat "${STATE_DIR}/phase${i}.done"))"
else
echo " Phase $i: pending"
fi
done
exit 0 ;;
-h|--help)
usage; exit 0 ;;
*)
warn "Unknown option: $arg"
usage; exit 1 ;;
esac
done
# Tee all output to a log file so SSH disconnection doesn't lose context
mkdir -p "$INSTALL_DIR"
exec > >(tee -a "${INSTALL_DIR}/setup.log") 2>&1
echo ""
echo "╔═══════════════════════════════════════════════════════════════╗"
echo "║ ByteLyst Single-VM Deployment (raw Ubuntu) ║"
echo "║ 30 services · 10 products · Ollama · Gitea · 1 VM ║"
echo "╚═══════════════════════════════════════════════════════════════╝"
echo ""
log "Log file: ${INSTALL_DIR}/setup.log"
[ "$(id -u)" -eq 0 ] || fail "This script must be run as root (sudo)."
# ── Pre-flight checks ───────────────────────────────────────────
local disk_gb mem_gb
disk_gb=$(df -BG / | awk 'NR==2 {gsub(/G/,"",$4); print $4}')
mem_gb=$(free -g | awk '/^Mem:/ {print $2}')
log "Available disk: ${disk_gb} GB, Total RAM: ${mem_gb} GB"
[ "${disk_gb:-0}" -ge 40 ] || warn "Low disk space (${disk_gb} GB). Recommend 128 GB+."
[ "${mem_gb:-0}" -ge 16 ] || warn "Low memory (${mem_gb} GB). Recommend 32 GB+."
local start_time
start_time=$(date +%s)
log "Target OS: $(lsb_release -ds 2>/dev/null || cat /etc/os-release | grep PRETTY_NAME | cut -d= -f2 | tr -d '"')"
log "Target arch: $(uname -m)"
# ── Single-phase mode ────────────────────────────────────────────
if [ "$mode" = "single" ]; then
log "Running ONLY phase ${only_phase}..."
restore_gitea_token
run_phase "$only_phase"
if [ "$only_phase" -eq 7 ] && [ "$PHASE7_HAD_FAILURES" -eq 1 ]; then
warn "Phase 7 finished with failures. Fix and re-run: sudo ./setup.sh --phase=7"
warn "Build logs: ${STATE_DIR}/builds/"
exit 1
fi
ok "Phase ${only_phase} complete."
exit 0
fi
# ── Auto-resume mode ─────────────────────────────────────────────
if [ "$mode" = "resume" ]; then
local last
last=$(last_completed_phase)
if [ "$last" -eq 0 ]; then
log "No completed phases found. Starting from phase 1."
start_phase=1
elif [ "$last" -ge 8 ]; then
ok "All phases already completed. Use --reset to start over."
exit 0
else
start_phase=$((last + 1))
log "Resuming from phase ${start_phase} (phases 1-${last} already done)."
fi
elif [ "$mode" = "resume-from" ]; then
log "Resuming from phase ${start_phase} (as requested)."
fi
# Restore token if resuming past phase 2
if [ "$start_phase" -gt 2 ]; then
restore_gitea_token
fi
echo ""
# ── Run phases ───────────────────────────────────────────────────
for phase_num in 1 2 3 4 5 6 7 8; do
[ "$phase_num" -ge "$start_phase" ] || continue
run_phase "$phase_num"
done
local elapsed=$(( $(date +%s) - start_time ))
local minutes=$(( elapsed / 60 ))
local seconds=$(( elapsed % 60 ))
echo ""
echo "╔═══════════════════════════════════════════════════════════════╗"
echo "║ Deployment complete in ${minutes}m ${seconds}s ║"
echo "║ ║"
echo "║ Health check: /opt/bytelyst/check-health.sh ║"
echo "║ Compose logs: docker compose -f ${COMPOSE_FILE} logs -f ║"
echo "║ Retry failed: sudo ./setup.sh --phase=7 ║"
echo "║ Resume: sudo ./setup.sh --resume ║"
echo "║ Gitea UI: http://localhost:3300 ║"
echo "║ Ollama API: http://localhost:11434 ║"
echo "║ Grafana: http://localhost:3000 (admin / bytelyst) ║"
echo "║ Mailpit: http://localhost:8025 ║"
echo "╚═══════════════════════════════════════════════════════════════╝"
echo ""
}
main "$@"