From c0cf80d96b7d42bc995396a62574d72b63b91b92 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 29 Mar 2026 23:57:03 +0000 Subject: [PATCH] docs(devops): add Track A handoff and prep gateway changes --- dashboards/admin-web/Dockerfile | 5 +- dashboards/tracker-web/Dockerfile | 5 +- docker-compose.ecosystem.yml | 66 ++---- .../docker/Caddyfile.bytelyst.example | 87 ++++++++ .../docker/DEPLOYMENT_STATUS_2026-03-29.md | 17 ++ docs/devops/single_azure_vm/docker/README.md | 20 ++ .../vercel/CODEX_PROMPTS_TRACK_A_AZURE_VM.md | 2 +- .../vercel/TRACK_A_HANDOFF_2026-03-29.md | 198 ++++++++++++++++++ 8 files changed, 352 insertions(+), 48 deletions(-) create mode 100644 docs/devops/single_azure_vm/docker/Caddyfile.bytelyst.example create mode 100644 docs/devops/vercel/TRACK_A_HANDOFF_2026-03-29.md diff --git a/dashboards/admin-web/Dockerfile b/dashboards/admin-web/Dockerfile index 22322fd2..10e7c478 100644 --- a/dashboards/admin-web/Dockerfile +++ b/dashboards/admin-web/Dockerfile @@ -42,11 +42,12 @@ ENV HUSKY=0 RUN addgroup --system --gid 1001 nodejs RUN adduser --system --uid 1001 nextjs -COPY --from=builder --chown=nextjs:nodejs /app/deploy ./ +COPY --from=builder --chown=nextjs:nodejs /app/dashboards/admin-web/.next/standalone ./ +COPY --from=builder --chown=nextjs:nodejs /app/dashboards/admin-web/.next/static ./.next/static COPY --from=builder --chown=nextjs:nodejs /app/dashboards/admin-web/public ./public USER nextjs EXPOSE 3001 -CMD ["npm", "start"] +CMD ["node", "server.js"] diff --git a/dashboards/tracker-web/Dockerfile b/dashboards/tracker-web/Dockerfile index b56190f6..f16ac5e5 100644 --- a/dashboards/tracker-web/Dockerfile +++ b/dashboards/tracker-web/Dockerfile @@ -38,11 +38,12 @@ ENV HUSKY=0 RUN addgroup --system --gid 1001 nodejs RUN adduser --system --uid 1001 nextjs -COPY --from=builder --chown=nextjs:nodejs /app/deploy ./ +COPY --from=builder --chown=nextjs:nodejs /app/dashboards/tracker-web/.next/standalone ./ +COPY --from=builder --chown=nextjs:nodejs /app/dashboards/tracker-web/.next/static ./.next/static COPY --from=builder --chown=nextjs:nodejs /app/dashboards/tracker-web/public ./public USER nextjs EXPOSE 3003 -CMD ["npm", "start"] +CMD ["node", "server.js"] diff --git a/docker-compose.ecosystem.yml b/docker-compose.ecosystem.yml index 53e6e7da..cb1a8f7a 100644 --- a/docker-compose.ecosystem.yml +++ b/docker-compose.ecosystem.yml @@ -126,6 +126,8 @@ services: gateway: image: traefik:v3.3 + profiles: + - legacy-gateway command: - '--api.insecure=true' - '--providers.docker=true' @@ -143,6 +145,25 @@ services: condition: service_started restart: unless-stopped + caddy: + image: caddy:2-alpine + container_name: caddy + ports: + - '80:80' + - '443:443' + volumes: + - ../Caddyfile:/etc/caddy/Caddyfile:ro + - caddy-data:/data + - caddy-config:/config + depends_on: + platform-service: + condition: service_healthy + extraction-service: + condition: service_healthy + mcp-server: + condition: service_healthy + restart: unless-stopped + # ═════════════════════════════════════════════════════════════════ # PLATFORM SERVICES (from this repo) # ═════════════════════════════════════════════════════════════════ @@ -151,8 +172,6 @@ services: build: context: . dockerfile: services/platform-service/Dockerfile - ports: - - '4003:4003' env_file: - .env.ecosystem environment: @@ -184,8 +203,6 @@ services: build: context: . dockerfile: services/extraction-service/Dockerfile - ports: - - '4005:4005' env_file: - .env.ecosystem environment: @@ -209,8 +226,6 @@ services: build: context: . dockerfile: services/mcp-server/Dockerfile - ports: - - '4007:4007' env_file: - .env.ecosystem environment: @@ -289,8 +304,6 @@ services: <<: *product-build context: ../learning_ai_peakpulse dockerfile: backend/Dockerfile - ports: - - '4010:4010' env_file: - .env.ecosystem environment: @@ -312,8 +325,6 @@ services: <<: *product-build context: ../learning_ai_clock dockerfile: backend/Dockerfile - ports: - - '4011:4011' env_file: - .env.ecosystem environment: @@ -335,8 +346,6 @@ services: <<: *product-build context: ../learning_ai_jarvis_jr dockerfile: backend/Dockerfile - ports: - - '4012:4012' env_file: - .env.ecosystem environment: @@ -358,8 +367,6 @@ services: <<: *product-build context: ../learning_ai_fastgap dockerfile: backend/Dockerfile - ports: - - '4013:4013' env_file: - .env.ecosystem environment: @@ -381,8 +388,6 @@ services: <<: *product-build context: ../learning_multimodal_memory_agents dockerfile: backend/Dockerfile - ports: - - '4014:4014' env_file: - .env.ecosystem environment: @@ -404,8 +409,6 @@ services: <<: *product-build context: ../learning_voice_ai_agent dockerfile: backend/Dockerfile - ports: - - '4015:4015' env_file: - .env.ecosystem environment: @@ -427,8 +430,6 @@ services: <<: *product-build context: ../learning_ai_notes dockerfile: backend/Dockerfile - ports: - - '4016:4016' env_file: - .env.ecosystem environment: @@ -451,8 +452,6 @@ services: <<: *product-build context: ../learning_ai_flowmonk dockerfile: backend/Dockerfile - ports: - - '4017:4017' env_file: - .env.ecosystem environment: @@ -474,8 +473,6 @@ services: <<: *product-build context: ../learning_ai_trails dockerfile: backend/Dockerfile - ports: - - '4018:4018' env_file: - .env.ecosystem environment: @@ -497,8 +494,6 @@ services: <<: *product-build context: ../learning_ai_local_memory_gpt dockerfile: backend/Dockerfile - ports: - - '4019:4019' extra_hosts: - 'host.docker.internal:host-gateway' env_file: @@ -822,23 +817,6 @@ services: retries: 3 restart: unless-stopped - efforise-web: - build: - <<: *product-build - context: ../learning_ai_efforise - dockerfile: client/Dockerfile - ports: - - '3080:3080' - depends_on: - efforise-backend: - condition: service_healthy - healthcheck: - test: ['CMD', 'wget', '-q', '--spider', 'http://127.0.0.1:3080'] - interval: 30s - timeout: 10s - retries: 3 - restart: unless-stopped - # ═════════════════════════════════════════════════════════════════ # VOLUMES # ═════════════════════════════════════════════════════════════════ @@ -848,3 +826,5 @@ volumes: loki-data: grafana-data: localmemgpt-data: + caddy-data: + caddy-config: diff --git a/docs/devops/single_azure_vm/docker/Caddyfile.bytelyst.example b/docs/devops/single_azure_vm/docker/Caddyfile.bytelyst.example new file mode 100644 index 00000000..09f191d9 --- /dev/null +++ b/docs/devops/single_azure_vm/docker/Caddyfile.bytelyst.example @@ -0,0 +1,87 @@ +{ + email admin@bytelyst.com +} + +api.bytelyst.com { + encode gzip + + @platform_root path /platform + redir @platform_root /platform/ 308 + handle_path /platform/* { + reverse_proxy platform-service:4003 + } + + @extraction_root path /extraction + redir @extraction_root /extraction/ 308 + handle_path /extraction/* { + reverse_proxy extraction-service:4005 + } + + @mcp_root path /mcp + redir @mcp_root /mcp/ 308 + handle_path /mcp/* { + reverse_proxy mcp-server:4007 + } + + @peakpulse_root path /peakpulse + redir @peakpulse_root /peakpulse/ 308 + handle_path /peakpulse/* { + reverse_proxy peakpulse-backend:4010 + } + + @chronomind_root path /chronomind + redir @chronomind_root /chronomind/ 308 + handle_path /chronomind/* { + reverse_proxy chronomind-backend:4011 + } + + @jarvisjr_root path /jarvisjr + redir @jarvisjr_root /jarvisjr/ 308 + handle_path /jarvisjr/* { + reverse_proxy jarvisjr-backend:4012 + } + + @nomgap_root path /nomgap + redir @nomgap_root /nomgap/ 308 + handle_path /nomgap/* { + reverse_proxy nomgap-backend:4013 + } + + @mindlyst_root path /mindlyst + redir @mindlyst_root /mindlyst/ 308 + handle_path /mindlyst/* { + reverse_proxy mindlyst-backend:4014 + } + + @lysnrai_root path /lysnrai + redir @lysnrai_root /lysnrai/ 308 + handle_path /lysnrai/* { + reverse_proxy lysnrai-backend:4015 + } + + @notelett_root path /notelett + redir @notelett_root /notelett/ 308 + handle_path /notelett/* { + reverse_proxy notelett-backend:4016 + } + + @flowmonk_root path /flowmonk + redir @flowmonk_root /flowmonk/ 308 + handle_path /flowmonk/* { + reverse_proxy flowmonk-backend:4017 + } + + @actiontrail_root path /actiontrail + redir @actiontrail_root /actiontrail/ 308 + handle_path /actiontrail/* { + reverse_proxy actiontrail-backend:4018 + } + + @localmemgpt_root path /localmemgpt + redir @localmemgpt_root /localmemgpt/ 308 + handle_path /localmemgpt/* { + reverse_proxy localmemgpt-backend:4019 + } + + respond 404 +} diff --git a/docs/devops/single_azure_vm/docker/DEPLOYMENT_STATUS_2026-03-29.md b/docs/devops/single_azure_vm/docker/DEPLOYMENT_STATUS_2026-03-29.md index 209422c7..2c8fdaa0 100644 --- a/docs/devops/single_azure_vm/docker/DEPLOYMENT_STATUS_2026-03-29.md +++ b/docs/devops/single_azure_vm/docker/DEPLOYMENT_STATUS_2026-03-29.md @@ -213,6 +213,23 @@ Impact: - dashboard image became buildable +### 6. Dashboard standalone runtime fix + +Problem: + +- `admin-web` and `tracker-web` were built with Next.js standalone output enabled +- their runtime images still attempted to start from a deployed package layout that did not contain the expected standalone entrypoint + +Fix: + +- changed both dashboard Dockerfiles to copy `.next/standalone` and `.next/static` +- switched the runtime command to `node server.js` + +Impact: + +- aligns both images with the Next.js standalone artifact layout +- removes the immediate startup failure seen in the container logs + --- ## Validation Results diff --git a/docs/devops/single_azure_vm/docker/README.md b/docs/devops/single_azure_vm/docker/README.md index 6d799f3d..d57d0651 100644 --- a/docs/devops/single_azure_vm/docker/README.md +++ b/docs/devops/single_azure_vm/docker/README.md @@ -194,6 +194,26 @@ All optional — defaults work for most setups: - **CORS errors in browser:** The generated `.env.ecosystem` sets `CORS_ORIGIN=*` for dev/test. If you restrict it, update the value to match your access URL. - **Services in development mode:** `.env.ecosystem` now sets `NODE_ENV=production` for all services. If you need debug logging, remove or change this value. +## HTTPS Gateway + +- Public backend access is intended to flow through Caddy on `https://api.bytelyst.com`, not direct backend port exposure. +- The gateway config lives at `/opt/bytelyst/Caddyfile` and is mounted into the `caddy` container. +- Backend routes are path-based and strip their prefixes before proxying: + - `/platform/*` → `platform-service:4003` + - `/extraction/*` → `extraction-service:4005` + - `/mcp/*` → `mcp-server:4007` + - `/peakpulse/*` → `peakpulse-backend:4010` + - `/chronomind/*` → `chronomind-backend:4011` + - `/jarvisjr/*` → `jarvisjr-backend:4012` + - `/nomgap/*` → `nomgap-backend:4013` + - `/mindlyst/*` → `mindlyst-backend:4014` + - `/lysnrai/*` → `lysnrai-backend:4015` + - `/notelett/*` → `notelett-backend:4016` + - `/flowmonk/*` → `flowmonk-backend:4017` + - `/actiontrail/*` → `actiontrail-backend:4018` + - `/localmemgpt/*` → `localmemgpt-backend:4019` +- Keep backend ports closed publicly once DNS and NSG rules are aligned. Docker-internal service discovery remains unchanged. + ## Known Limitations - **Remote browser access:** Product web apps use `http://localhost:` for browser-side API calls (baked at Next.js build time via `NEXT_PUBLIC_*` args). This works when browsing from the VM itself but **not from a remote browser** (e.g., laptop accessing `http://:3060`). For remote access, use SSH port-forwarding: diff --git a/docs/devops/vercel/CODEX_PROMPTS_TRACK_A_AZURE_VM.md b/docs/devops/vercel/CODEX_PROMPTS_TRACK_A_AZURE_VM.md index 3682f65e..293d3c25 100644 --- a/docs/devops/vercel/CODEX_PROMPTS_TRACK_A_AZURE_VM.md +++ b/docs/devops/vercel/CODEX_PROMPTS_TRACK_A_AZURE_VM.md @@ -15,7 +15,7 @@ | # | Prompt | Status | Commit SHA | Verified | | --- | ------------------------ | :------------: | :--------: | :------: | -| A1 | Caddy Gateway Setup | ⬜ Not started | — | ⬜ | +| A1 | Caddy Gateway Setup | 🟨 In progress | — | ⬜ | | A2 | Gitea HTTPS Exposure | ⬜ Not started | — | ⬜ | | A3 | Dashboard Containers Fix | ⬜ Not started | — | ⬜ | | A4 | NSG Lockdown | ⬜ Not started | — | ⬜ | diff --git a/docs/devops/vercel/TRACK_A_HANDOFF_2026-03-29.md b/docs/devops/vercel/TRACK_A_HANDOFF_2026-03-29.md new file mode 100644 index 00000000..84e56768 --- /dev/null +++ b/docs/devops/vercel/TRACK_A_HANDOFF_2026-03-29.md @@ -0,0 +1,198 @@ +# Track A Handoff — 2026-03-29 + +This handoff captures the current state of Track A on the Azure VM at `/opt/bytelyst/`. + +## What Was Completed + +- Fixed a blocking parse error in [`docker-compose.ecosystem.yml`](/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml) by removing a duplicate `efforise-web` service definition. +- Added [`/opt/bytelyst/Caddyfile`](/opt/bytelyst/Caddyfile) with path-based routing for the 13 backend services. +- Added a tracked template copy at [`docs/devops/single_azure_vm/docker/Caddyfile.bytelyst.example`](/opt/bytelyst/learning_ai_common_plat/docs/devops/single_azure_vm/docker/Caddyfile.bytelyst.example). +- Updated [`docker-compose.ecosystem.yml`](/opt/bytelyst/learning_ai_common_plat/docker-compose.ecosystem.yml) to: + - add a `caddy` service + - place Traefik `gateway` behind a `legacy-gateway` profile + - remove published backend ports for `4003`, `4005`, `4007`, `4010`-`4019` +- Updated dashboard Dockerfiles to use the Next.js standalone runtime: + - [`dashboards/admin-web/Dockerfile`](/opt/bytelyst/learning_ai_common_plat/dashboards/admin-web/Dockerfile) + - [`dashboards/tracker-web/Dockerfile`](/opt/bytelyst/learning_ai_common_plat/dashboards/tracker-web/Dockerfile) +- Updated deployment docs: + - [`docs/devops/single_azure_vm/docker/README.md`](/opt/bytelyst/learning_ai_common_plat/docs/devops/single_azure_vm/docker/README.md) + - [`docs/devops/single_azure_vm/docker/DEPLOYMENT_STATUS_2026-03-29.md`](/opt/bytelyst/learning_ai_common_plat/docs/devops/single_azure_vm/docker/DEPLOYMENT_STATUS_2026-03-29.md) +- Updated the Track A progress table to mark A1 in progress: + - [`docs/devops/vercel/CODEX_PROMPTS_TRACK_A_AZURE_VM.md`](/opt/bytelyst/learning_ai_common_plat/docs/devops/vercel/CODEX_PROMPTS_TRACK_A_AZURE_VM.md) + +## Blockers Found + +### 1. DNS is not ready + +From the VM, these records did not resolve: + +- `api.bytelyst.com` +- `gitea.bytelyst.com` +- `admin.bytelyst.com` +- `tracker.bytelyst.com` + +Without these, Caddy cannot obtain Let's Encrypt certificates and A1/A2/A3 HTTPS verification cannot pass. + +### 2. Azure CLI is missing + +`az account show` failed with: + +```text +az: command not found +``` + +That blocks A4 because the NSG cannot be snapshotted or edited from this VM. + +### 3. Linux build host is still using the wrong Gitea registry hostname + +Broad compose rebuilds hit repeated package fetch failures such as: + +```text +GET http://host.docker.internal:3300/api/packages/bytelyst/npm/... error (ENOTFOUND) +``` + +The VM is Linux, and many builds still default to `GITEA_NPM_HOST=host.docker.internal`. On this VM the Gitea registry is reachable at `localhost:3300` on the host and via the detected Docker host IP used by `setup.sh`. + +This blocks reliable rebuilds for A3 and potentially other services. + +## Required Preconditions Before Resuming + +1. Create DNS A records for: + - `api.bytelyst.com` + - `gitea.bytelyst.com` + - `admin.bytelyst.com` + - `tracker.bytelyst.com` +2. Confirm they resolve to the VM public IP from the VM: + +```bash +dig +short api.bytelyst.com +dig +short gitea.bytelyst.com +dig +short admin.bytelyst.com +dig +short tracker.bytelyst.com +curl -sf https://api.ipify.org && echo +``` + +3. Install Azure CLI and log in with permissions to manage the VM NSG. +4. Export a Linux-safe Gitea host before rebuilds, for example: + +```bash +export GITEA_NPM_HOST=172.17.0.1 +``` + +Use the actual Docker-reachable host IP if it differs on this VM. + +## Resume Steps + +Run from: + +```bash +cd /opt/bytelyst/learning_ai_common_plat +``` + +### A1 + +1. Confirm backend health: + +```bash +docker compose -f docker-compose.ecosystem.yml --env-file .env.ecosystem ps +docker compose -f docker-compose.ecosystem.yml --env-file .env.ecosystem ps | grep -c healthy +``` + +2. Start or refresh only the gateway-related services first: + +```bash +docker compose -f docker-compose.ecosystem.yml --env-file .env.ecosystem up -d caddy platform-service extraction-service mcp-server peakpulse-backend chronomind-backend jarvisjr-backend nomgap-backend mindlyst-backend lysnrai-backend notelett-backend flowmonk-backend actiontrail-backend localmemgpt-backend +docker logs caddy --tail 100 +``` + +3. Verify A1: + +```bash +curl -sI https://api.bytelyst.com/platform/health | head -5 +for svc in platform extraction mcp peakpulse chronomind jarvisjr nomgap mindlyst lysnrai notelett flowmonk actiontrail localmemgpt; do + echo -n "$svc: " + curl -sf https://api.bytelyst.com/$svc/health | jq -r '.status // "FAIL"' +done +curl -sf --max-time 3 http://:4003/health && echo "FAIL: port still open" || echo "PASS: port closed" +``` + +### A2 + +1. Reconfigure the standalone `gitea-npm-registry` container for HTTPS exposure. It is not managed by the ecosystem compose file. +2. Update its `ROOT_URL` to `https://gitea.bytelyst.com`. +3. Add a `gitea.bytelyst.com` block to `/opt/bytelyst/Caddyfile`. +4. Reload Caddy. +5. Verify: + +```bash +curl -sI https://gitea.bytelyst.com | head -3 +curl -sf https://gitea.bytelyst.com/api/packages/ByteLyst/npm/@bytelyst%2ferrors | jq '.name' +docker exec platform-service curl -sf http://gitea:3300/api/v1/version | jq '.version' +``` + +Note: +The current running container is: + +```bash +docker ps --format 'table {{.Names}}\t{{.Status}}\t{{.Ports}}' | grep -i gitea +``` + +Observed name during this session: + +```text +gitea-npm-registry +``` + +### A3 + +1. Keep `GITEA_NPM_HOST` exported to the Docker-reachable host IP before rebuilding. +2. Rebuild only the dashboard services first: + +```bash +export GITEA_NPM_HOST=172.17.0.1 +docker compose -f docker-compose.ecosystem.yml --env-file .env.ecosystem up -d --build admin-web tracker-web +docker compose -f docker-compose.ecosystem.yml --env-file .env.ecosystem logs admin-web --tail 100 +docker compose -f docker-compose.ecosystem.yml --env-file .env.ecosystem logs tracker-web --tail 100 +``` + +3. Add dashboard host blocks to `/opt/bytelyst/Caddyfile`: + +```caddy +admin.bytelyst.com { + reverse_proxy admin-web:3001 +} + +tracker.bytelyst.com { + reverse_proxy tracker-web:3003 +} +``` + +4. Reload Caddy and verify: + +```bash +curl -sf http://127.0.0.1:3001 | head -5 +curl -sf http://127.0.0.1:3003 | head -5 +curl -sI https://admin.bytelyst.com | head -3 +curl -sI https://tracker.bytelyst.com | head -3 +docker compose -f docker-compose.ecosystem.yml --env-file .env.ecosystem ps | grep -E "(unhealthy|Exit)" | wc -l +``` + +### A4 + +1. Install Azure CLI if still missing. +2. Identify the resource group and NSG name for this VM. +3. Snapshot rules, then reduce inbound access to only `22`, `80`, and `443`. +4. Run the full A4 verification from the Track A prompt file. + +## Suggested Commit Sequence + +After each prompt passes, update the progress table in: + +- [`docs/devops/vercel/CODEX_PROMPTS_TRACK_A_AZURE_VM.md`](/opt/bytelyst/learning_ai_common_plat/docs/devops/vercel/CODEX_PROMPTS_TRACK_A_AZURE_VM.md) + +Suggested commits: + +- `feat(gateway): replace Traefik with Caddy for HTTPS path routing` +- `feat(gateway): expose Gitea npm registry via HTTPS at gitea.bytelyst.com` +- `fix(deployment): resolve admin-web and tracker-web containers` +- `chore(security): lock down Azure NSG to 22/80/443 only`