From 6d6ca217a5552d17fa4ff29f33fea3b004bdb250 Mon Sep 17 00:00:00 2001 From: Saravana Achu Mac Date: Thu, 5 Mar 2026 12:37:44 -0800 Subject: [PATCH] chore(devops): improve railway deploy script, add env sync and deployment runbook - Refactor railway-deploy.sh: add --sync-env, --dry-run, --detach flags and service selector - Add railway-sync-env.sh for pre-deploy environment variable synchronization - Add RAILWAY_DEPLOYMENT_RUNBOOK.md with step-by-step deployment guide Co-Authored-By: Oz --- docs/devops/RAILWAY_DEPLOYMENT_RUNBOOK.md | 129 +++++++++ scripts/railway-deploy.sh | 135 +++++++-- scripts/railway-sync-env.sh | 316 ++++++++++++++++++++++ 3 files changed, 550 insertions(+), 30 deletions(-) create mode 100644 docs/devops/RAILWAY_DEPLOYMENT_RUNBOOK.md mode change 100644 => 100755 scripts/railway-deploy.sh create mode 100755 scripts/railway-sync-env.sh diff --git a/docs/devops/RAILWAY_DEPLOYMENT_RUNBOOK.md b/docs/devops/RAILWAY_DEPLOYMENT_RUNBOOK.md new file mode 100644 index 00000000..2198f0d5 --- /dev/null +++ b/docs/devops/RAILWAY_DEPLOYMENT_RUNBOOK.md @@ -0,0 +1,129 @@ +# Railway Deployment Runbook (Common Platform) + +Last updated: 2026-02-16 + +This runbook covers repeatable deploys for shared services in `learning_ai_common_plat`. + +## Scope + +Deployable Railway services from this repo: + +1. `platform-service` +2. `extraction-service` + +There are currently **2 Railway backend services** in this repo. Monitoring (`loki`, `grafana`, `gateway`) is local Docker Compose infra, not deployed by Railway scripts. + +## Deploy Scripts Inventory + +| Script | Purpose | Services Affected | +| --- | --- | --- | +| `scripts/railway-deploy.sh` | Deploy code to Railway via `railway up` | `platform-service`, `extraction-service`, or both | +| `scripts/railway-sync-env.sh` | Sync selected vars from local `.env` into Railway service variables | `platform-service`, `extraction-service`, or both | +| `scripts/docker-prep.sh` | Build shared packages for local Docker builds | No Railway deploy; local prep only | + +## One-Time Setup + +1. Install Railway CLI: +```bash +npm i -g @railway/cli +``` +2. Login: +```bash +railway login +``` +3. Optional shell defaults: +```bash +export RAILWAY_PROJECT_ID="a6bc4ea7-e89c-42da-819a-8879fb022a0d" +export RAILWAY_ENVIRONMENT="production" +``` + +## First Deploy (or Env Changes) + +1. Update local `.env` at repo root. +2. Sync env vars to Railway (no deploy per var): +```bash +scripts/railway-sync-env.sh all --env-file .env +``` +3. Deploy both services: +```bash +scripts/railway-deploy.sh all +``` + +## Repeat Deploy for New Code + +If only code changed: +```bash +scripts/railway-deploy.sh all +``` + +Single-service deploys: +```bash +scripts/railway-deploy.sh platform +scripts/railway-deploy.sh extraction +``` + +If env vars changed too: +```bash +scripts/railway-sync-env.sh all --env-file .env +scripts/railway-deploy.sh all +``` + +## Useful Flags + +### `scripts/railway-deploy.sh` + +```bash +# Sync env before deploy, then deploy extraction +scripts/railway-deploy.sh extraction --sync-env --env-file .env + +# Custom message + attach to logs +scripts/railway-deploy.sh platform --message "hotfix: auth token handling" --attach + +# Dry run +scripts/railway-deploy.sh all --sync-env --dry-run +``` + +### `scripts/railway-sync-env.sh` + +```bash +# Sync only platform-service +scripts/railway-sync-env.sh platform --env-file .env + +# Trigger deploys while setting vars (normally skipped) +scripts/railway-sync-env.sh extraction --trigger-deploys + +# Dry run +scripts/railway-sync-env.sh all --dry-run +``` + +## Verification + +After deploy: + +```bash +railway link --project "$RAILWAY_PROJECT_ID" --environment "$RAILWAY_ENVIRONMENT" +railway service status -a -e "$RAILWAY_ENVIRONMENT" +``` + +Service logs: + +```bash +railway logs --service platform-service --environment "$RAILWAY_ENVIRONMENT" --deployment --lines 200 +railway logs --service extraction-service --environment "$RAILWAY_ENVIRONMENT" --deployment --lines 200 +``` + +## Troubleshooting + +1. `Railway CLI is not authenticated`: +```bash +railway login +``` +2. `Unable to access Railway service ...`: + - Confirm project ID and environment. + - Confirm service names: `platform-service`, `extraction-service`. +3. Missing required vars during sync: + - Provide `COSMOS_ENDPOINT`, `COSMOS_KEY`, `JWT_SECRET` in `.env`, or + - Set `AZURE_KEYVAULT_URL` and ensure runtime identity can fetch secrets. +4. Deploy command succeeds but app is unhealthy: + - Check deployment logs using `railway logs --deployment`. + - Validate env vars synced for the affected service. diff --git a/scripts/railway-deploy.sh b/scripts/railway-deploy.sh old mode 100644 new mode 100755 index ad547599..c54f0c1e --- a/scripts/railway-deploy.sh +++ b/scripts/railway-deploy.sh @@ -6,42 +6,43 @@ cd "$ROOT" PROJECT_ID="${RAILWAY_PROJECT_ID:-a6bc4ea7-e89c-42da-819a-8879fb022a0d}" ENVIRONMENT="${RAILWAY_ENVIRONMENT:-production}" +ENV_FILE="${RAILWAY_ENV_FILE:-$ROOT/.env}" + +SERVICE_SELECTOR="all" +MESSAGE="" +SYNC_ENV=false +DRY_RUN=false +DETACH=true usage() { - cat <<'EOF' -Deploy services to Railway from this monorepo (Dockerfile builds on Railway). + cat <<'USAGE' +Deploy Railway services from this monorepo. Usage: - scripts/railway-deploy.sh [all|platform|extraction] [--project ] [--env ] [--message ] + scripts/railway-deploy.sh [all|platform|extraction] [options] -Env vars: - RAILWAY_PROJECT_ID Defaults to this repo's Railway project ID - RAILWAY_ENVIRONMENT Defaults to "production" +Options: + --project Railway project ID (default: $RAILWAY_PROJECT_ID or script default) + --env Railway environment name (default: $RAILWAY_ENVIRONMENT or production) + --message Deployment message + --sync-env Sync variables from .env before deploying (uses railway-sync-env.sh) + --env-file Env file path used when --sync-env is set (default: ./.env) + --attach Attach to deployment logs instead of detached mode + --dry-run Print what would run without deploying + -h, --help Show help Examples: scripts/railway-deploy.sh all - scripts/railway-deploy.sh platform --message "hotfix" - RAILWAY_ENVIRONMENT=staging scripts/railway-deploy.sh extraction -EOF + scripts/railway-deploy.sh extraction --message "hotfix: parser" + scripts/railway-deploy.sh platform --sync-env --env-file .env +USAGE } -SERVICE_ARG="${1:-all}" -shift || true +if [[ $# -gt 0 && "${1#-}" == "$1" ]]; then + SERVICE_SELECTOR="$1" + shift +fi -SERVICES=() -case "$SERVICE_ARG" in - all) SERVICES=("platform-service" "extraction-service") ;; - platform|platform-service) SERVICES=("platform-service") ;; - extraction|extraction-service) SERVICES=("extraction-service") ;; - -h|--help) usage; exit 0 ;; - *) - echo "Unknown service selector: $SERVICE_ARG" >&2 - usage - exit 2 - ;; -esac - -MESSAGE="" while [[ $# -gt 0 ]]; do case "$1" in --project) @@ -56,6 +57,22 @@ while [[ $# -gt 0 ]]; do MESSAGE="${2:-}" shift 2 ;; + --sync-env) + SYNC_ENV=true + shift + ;; + --env-file) + ENV_FILE="${2:-}" + shift 2 + ;; + --attach) + DETACH=false + shift + ;; + --dry-run) + DRY_RUN=true + shift + ;; -h|--help) usage exit 0 @@ -68,13 +85,31 @@ while [[ $# -gt 0 ]]; do esac done +SERVICES=() +case "$SERVICE_SELECTOR" in + all) + SERVICES=("platform-service" "extraction-service") + ;; + platform|platform-service) + SERVICES=("platform-service") + ;; + extraction|extraction-service) + SERVICES=("extraction-service") + ;; + *) + echo "Unknown service selector: $SERVICE_SELECTOR" >&2 + usage + exit 2 + ;; +esac + if [[ -z "$PROJECT_ID" ]]; then - echo "Missing --project (or RAILWAY_PROJECT_ID)." >&2 + echo "Missing Railway project ID. Use --project or set RAILWAY_PROJECT_ID." >&2 exit 2 fi if [[ -z "$ENVIRONMENT" ]]; then - echo "Missing --env (or RAILWAY_ENVIRONMENT)." >&2 + echo "Missing Railway environment. Use --env or set RAILWAY_ENVIRONMENT." >&2 exit 2 fi @@ -92,12 +127,52 @@ if [[ -z "$MESSAGE" ]]; then fi if ! command -v railway >/dev/null 2>&1; then - echo "railway CLI not found. Install: npm i -g @railway/cli" >&2 + echo "railway CLI not found. Install with: npm i -g @railway/cli" >&2 exit 1 fi +if [[ "$DRY_RUN" == false ]]; then + if ! railway whoami >/dev/null 2>&1; then + echo "Railway CLI is not authenticated. Run: railway login" >&2 + exit 1 + fi +fi + +if [[ "$SYNC_ENV" == true ]]; then + sync_cmd=( + "scripts/railway-sync-env.sh" + "$SERVICE_SELECTOR" + "--project" "$PROJECT_ID" + "--env" "$ENVIRONMENT" + "--env-file" "$ENV_FILE" + ) + + if [[ "$DRY_RUN" == true ]]; then + sync_cmd+=("--dry-run") + fi + + echo "Syncing variables before deployment..." + "${sync_cmd[@]}" +fi + for svc in "${SERVICES[@]}"; do - echo "Deploying $svc (project=$PROJECT_ID env=$ENVIRONMENT)…" - railway up -p "$PROJECT_ID" -e "$ENVIRONMENT" -s "$svc" -d -m "$MESSAGE" + cmd=(railway up -p "$PROJECT_ID" -e "$ENVIRONMENT" -s "$svc" -m "$MESSAGE") + if [[ "$DETACH" == true ]]; then + cmd+=(-d) + fi + + if [[ "$DRY_RUN" == true ]]; then + printf 'DRY RUN:' + printf ' %q' "${cmd[@]}" + printf '\n' + continue + fi + + echo "Deploying $svc (project=$PROJECT_ID, env=$ENVIRONMENT)..." + "${cmd[@]}" done +echo "Deployment command(s) submitted." +echo "To verify status quickly:" +echo " railway link --project $PROJECT_ID --environment $ENVIRONMENT" +echo " railway service status -a -e $ENVIRONMENT" diff --git a/scripts/railway-sync-env.sh b/scripts/railway-sync-env.sh new file mode 100755 index 00000000..18b28c8a --- /dev/null +++ b/scripts/railway-sync-env.sh @@ -0,0 +1,316 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT" + +PROJECT_ID="${RAILWAY_PROJECT_ID:-a6bc4ea7-e89c-42da-819a-8879fb022a0d}" +ENVIRONMENT="${RAILWAY_ENVIRONMENT:-production}" +ENV_FILE="${RAILWAY_ENV_FILE:-$ROOT/.env}" + +SERVICE_SELECTOR="all" +SKIP_DEPLOYS=true +DRY_RUN=false + +usage() { + cat <<'USAGE' +Sync selected environment variables from a local .env file to Railway services. + +Usage: + scripts/railway-sync-env.sh [all|platform|extraction] [options] + +Options: + --project Railway project ID (default: $RAILWAY_PROJECT_ID or script default) + --env Railway environment name (default: $RAILWAY_ENVIRONMENT or production) + --env-file Local env file to read (default: ./.env) + --trigger-deploys Trigger deploys while setting vars (default is --skip-deploys) + --dry-run Print variables that would be synced (no changes) + -h, --help Show help + +Notes: + - Missing optional vars are skipped. + - Required vars (COSMOS_ENDPOINT, COSMOS_KEY, JWT_SECRET) must exist unless AZURE_KEYVAULT_URL is set. +USAGE +} + +if [[ $# -gt 0 && "${1#-}" == "$1" ]]; then + SERVICE_SELECTOR="$1" + shift +fi + +while [[ $# -gt 0 ]]; do + case "$1" in + --project) + PROJECT_ID="${2:-}" + shift 2 + ;; + --env) + ENVIRONMENT="${2:-}" + shift 2 + ;; + --env-file) + ENV_FILE="${2:-}" + shift 2 + ;; + --trigger-deploys) + SKIP_DEPLOYS=false + shift + ;; + --dry-run) + DRY_RUN=true + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown arg: $1" >&2 + usage + exit 2 + ;; + esac +done + +SERVICES=() +case "$SERVICE_SELECTOR" in + all) + SERVICES=("platform-service" "extraction-service") + ;; + platform|platform-service) + SERVICES=("platform-service") + ;; + extraction|extraction-service) + SERVICES=("extraction-service") + ;; + *) + echo "Unknown service selector: $SERVICE_SELECTOR" >&2 + usage + exit 2 + ;; +esac + +if [[ -z "$PROJECT_ID" ]]; then + echo "Missing Railway project ID. Use --project or set RAILWAY_PROJECT_ID." >&2 + exit 2 +fi + +if [[ -z "$ENVIRONMENT" ]]; then + echo "Missing Railway environment. Use --env or set RAILWAY_ENVIRONMENT." >&2 + exit 2 +fi + +if [[ ! -f "$ENV_FILE" ]]; then + echo "Env file not found: $ENV_FILE" >&2 + exit 2 +fi + +if ! command -v railway >/dev/null 2>&1; then + echo "railway CLI not found. Install with: npm i -g @railway/cli" >&2 + exit 1 +fi + +if [[ "$DRY_RUN" == false ]]; then + if ! railway whoami >/dev/null 2>&1; then + echo "Railway CLI is not authenticated. Run: railway login" >&2 + exit 1 + fi +fi + +# Load .env values into current shell for lookup. +# shellcheck disable=SC1090 +set -a +source "$ENV_FILE" +set +a + +get_value() { + local key="$1" + printf '%s' "${!key-}" +} + +has_value() { + local key="$1" + [[ -n "${!key-}" ]] +} + +COMMON_REQUIRED=( + COSMOS_ENDPOINT + COSMOS_KEY + JWT_SECRET +) + +COMMON_OPTIONAL=( + AZURE_KEYVAULT_URL + COSMOS_DATABASE + DEFAULT_PRODUCT_ID + CORS_ORIGIN + PRODUCT_ID + NODE_ENV + HOST +) + +PLATFORM_OPTIONAL=( + AZURE_BLOB_CONNECTION_STRING + AZURE_BLOB_ACCOUNT_NAME + AZURE_BLOB_ACCOUNT_KEY + STRIPE_SECRET_KEY + STRIPE_WEBHOOK_SECRET + STRIPE_PRICE_PRO + STRIPE_PRICE_ENTERPRISE + BILLING_INTERNAL_KEY + WEBHOOK_INVITATION_REDEEMED_URL + WEBHOOK_REFERRAL_STATUS_URL + BACKEND_URL + PLAN_LIMITS_JSON + USAGE_WARN_THRESHOLD + RATE_LIMIT_CONFIG_JSON + LICENSE_ACTIVATE_LOCKOUT_WINDOW_MS + LICENSE_ACTIVATE_MAX_FAILED_ATTEMPTS + COSMOS_AUTO_INIT +) + +EXTRACTION_OPTIONAL=( + GEMINI_API_KEY + DEFAULT_MODEL_ID + PYTHON_SIDECAR_URL + EXTRACTION_CACHE_TTL_MS + EXTRACTION_CACHE_MAX + EXTRACTION_CACHE_TTL + EXTRACTION_CACHE_MAX_SIZE + USE_MOCK_EXTRACTOR + SIDECAR_PORT + SIDECAR_HOST + AZURE_OPENAI_KEY + AZURE_OPENAI_ENDPOINT +) + +missing_required=() +for key in "${COMMON_REQUIRED[@]}"; do + if ! has_value "$key"; then + missing_required+=("$key") + fi +done + +if [[ ${#missing_required[@]} -gt 0 && -z "$(get_value AZURE_KEYVAULT_URL)" ]]; then + echo "Missing required variables in $ENV_FILE: ${missing_required[*]}" >&2 + echo "Either set them in the env file, or set AZURE_KEYVAULT_URL and ensure Railway can access Key Vault." >&2 + exit 2 +fi + +if [[ ${#missing_required[@]} -gt 0 ]]; then + echo "Warning: missing ${missing_required[*]} in $ENV_FILE; relying on Key Vault resolution at runtime." +fi + +LINK_DIR="" +cleanup() { + if [[ -n "$LINK_DIR" && -d "$LINK_DIR" ]]; then + rm -rf "$LINK_DIR" + fi +} +trap cleanup EXIT + +run_railway() { + if [[ "$DRY_RUN" == true ]]; then + return 0 + fi + ( + cd "$LINK_DIR" + railway "$@" + ) +} + +if [[ "$DRY_RUN" == false ]]; then + LINK_DIR="$(mktemp -d)" + ( + cd "$LINK_DIR" + railway link --project "$PROJECT_ID" --environment "$ENVIRONMENT" >/dev/null + ) +fi + +ensure_service_exists() { + local svc="$1" + if [[ "$DRY_RUN" == true ]]; then + return 0 + fi + + if ! run_railway variable list --service "$svc" --environment "$ENVIRONMENT" --json >/dev/null 2>&1; then + echo "Unable to access Railway service '$svc' in env '$ENVIRONMENT' (project '$PROJECT_ID')." >&2 + echo "Check project/environment IDs and service names." >&2 + exit 1 + fi +} + +set_variable() { + local svc="$1" + local key="$2" + local value="$3" + local -a cmd + + if [[ "$DRY_RUN" == true ]]; then + echo " - would set $key" + return 0 + fi + + cmd=(variable set --service "$svc" --environment "$ENVIRONMENT") + if [[ "$SKIP_DEPLOYS" == true ]]; then + cmd+=(--skip-deploys) + fi + cmd+=("${key}=${value}") + + run_railway "${cmd[@]}" >/dev/null + echo " - set $key" +} + +sync_service() { + local svc="$1" + shift + local keys=("$@") + local value + + local seen="|" + local synced=0 + local skipped=0 + + echo "Syncing variables for $svc..." + + for key in "${keys[@]}"; do + if [[ "$seen" == *"|$key|"* ]]; then + continue + fi + seen+="$key|" + + value="$(get_value "$key")" + if [[ -z "$value" ]]; then + skipped=$((skipped + 1)) + continue + fi + + set_variable "$svc" "$key" "$value" + synced=$((synced + 1)) + done + + echo "Synced $synced variables to $svc (skipped $skipped unset keys)." +} + +for svc in "${SERVICES[@]}"; do + ensure_service_exists "$svc" + + if [[ "$svc" == "platform-service" ]]; then + sync_service "$svc" \ + "${COMMON_REQUIRED[@]}" \ + "${COMMON_OPTIONAL[@]}" \ + "${PLATFORM_OPTIONAL[@]}" + else + sync_service "$svc" \ + "${COMMON_REQUIRED[@]}" \ + "${COMMON_OPTIONAL[@]}" \ + "${EXTRACTION_OPTIONAL[@]}" + fi +done + +if [[ "$SKIP_DEPLOYS" == true ]]; then + echo "Variable sync complete (deploys were skipped)." + echo "Run scripts/railway-deploy.sh to publish new code/config." +else + echo "Variable sync complete (deploys were triggered while setting vars)." +fi