From 4687351de013178f6163736b3857e80ccbd84c4a Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Mon, 23 Mar 2026 18:04:18 -0700 Subject: [PATCH] ci: update CI/CD configuration --- .../audit-repo-health.md | 268 +++++++++++++ .../docker-smoke-test.md | 69 ++++ .../learning_ai_common_plat/gitea-ci.md | 107 ++++++ .../learning_ai_common_plat/repos.txt | 1 + .../verify-all-backends.md | 109 ++++++ docs/devops/KUBERNETES_ROADMAP.md | 359 ++++++++++++++++++ scripts/prep-consumer.sh | 11 +- 7 files changed, 921 insertions(+), 3 deletions(-) create mode 100644 __LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/audit-repo-health.md create mode 100644 __LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/docker-smoke-test.md create mode 100644 __LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/gitea-ci.md create mode 100644 __LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/verify-all-backends.md create mode 100644 docs/devops/KUBERNETES_ROADMAP.md diff --git a/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/audit-repo-health.md b/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/audit-repo-health.md new file mode 100644 index 00000000..5625a79b --- /dev/null +++ b/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/audit-repo-health.md @@ -0,0 +1,268 @@ +--- +description: Cross-repo health audit — verify pnpm config, Dockerfiles, next.config.ts, and workspace consistency +--- + +# Cross-Repo Health Audit + +Systematically verify consistency across all ByteLyst product repos. Catches drift in pnpm config, Dockerfiles, next.config.ts, and workspace setup. + +**Run this after:** pnpm migrations, Dockerfile changes, @bytelyst/\* package additions, or periodic maintenance. + +## 1. Check packageManager field in all root package.json files + +// turbo + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" +echo "=== packageManager in root package.json ===" +for repo in \ + learning_ai_common_plat \ + learning_voice_ai_agent \ + learning_ai_clock \ + learning_ai_fastgap \ + learning_ai_jarvis_jr \ + learning_ai_peakpulse \ + learning_ai_notes \ + learning_ai_flowmonk \ + learning_ai_trails \ + learning_ai_local_memory_gpt; do + printf "%-40s " "$repo:" + grep '"packageManager"' "$REPOS_DIR/$repo/package.json" 2>/dev/null || echo "MISSING" +done +``` + +Expect: all repos show `"packageManager": "pnpm@10.6.5"`. Fix any MISSING entries. + +## 2. Check node_modules in .gitignore + +// turbo + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" +echo "=== node_modules in .gitignore ===" +for repo in \ + learning_ai_common_plat \ + learning_voice_ai_agent \ + learning_ai_clock \ + learning_ai_fastgap \ + learning_ai_jarvis_jr \ + learning_ai_peakpulse \ + learning_ai_notes \ + learning_ai_flowmonk \ + learning_ai_trails \ + learning_ai_local_memory_gpt; do + printf "%-40s " "$repo:" + grep -c 'node_modules' "$REPOS_DIR/$repo/.gitignore" 2>/dev/null || echo "MISSING" +done +``` + +Expect: all repos have at least 1 match. Fix any with 0 or MISSING. + +## 3. Check .dockerignore exists and does NOT exclude .docker-deps + +// turbo + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" +echo "=== .dockerignore health ===" +for repo in \ + learning_voice_ai_agent \ + learning_ai_clock \ + learning_ai_fastgap \ + learning_ai_jarvis_jr \ + learning_ai_peakpulse \ + learning_ai_notes \ + learning_ai_flowmonk \ + learning_ai_trails \ + learning_ai_local_memory_gpt; do + di="$REPOS_DIR/$repo/.dockerignore" + if [ ! -f "$di" ]; then + echo "$repo: MISSING .dockerignore" + elif grep -q 'docker-deps' "$di"; then + echo "$repo: BUG — .dockerignore excludes .docker-deps" + else + echo "$repo: OK" + fi +done +``` + +Expect: all OK. Any BUG entries will break Docker builds. + +## 4. Check stale package-lock.json files + +// turbo + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" +echo "=== Stale package-lock.json ===" +for repo in \ + learning_voice_ai_agent \ + learning_ai_clock \ + learning_ai_fastgap \ + learning_ai_jarvis_jr \ + learning_ai_peakpulse \ + learning_ai_notes \ + learning_ai_flowmonk \ + learning_ai_trails \ + learning_ai_local_memory_gpt; do + found=$(find "$REPOS_DIR/$repo" -name "package-lock.json" -not -path "*/node_modules/*" 2>/dev/null) + if [ -n "$found" ]; then echo "STALE: $found"; fi +done +echo "(empty = all clean)" +``` + +Expect: no output. Remove any stale lockfiles found. + +## 5. Check Dockerfiles use node:22-slim and have NODE_TLS + +// turbo + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" +echo "=== Dockerfile base image + NODE_TLS ===" +for repo in \ + learning_voice_ai_agent \ + learning_ai_clock \ + learning_ai_fastgap \ + learning_ai_jarvis_jr \ + learning_ai_peakpulse \ + learning_ai_notes \ + learning_ai_flowmonk \ + learning_ai_trails \ + learning_ai_local_memory_gpt; do + for df in $(git -C "$REPOS_DIR/$repo" ls-files '*/Dockerfile' 2>/dev/null); do + full="$REPOS_DIR/$repo/$df" + base=$(grep -m1 '^FROM' "$full" | awk '{print $2}') + tls=$(grep -c 'NODE_TLS_REJECT_UNAUTHORIZED' "$full" 2>/dev/null) + status="OK" + [[ "$base" == *alpine* ]] && status="WARN:alpine" + [[ "$tls" == "0" && "$df" != *python* ]] && status="$status WARN:no-NODE_TLS" + echo "$repo/$df: base=$base tls=$tls $status" + done +done +``` + +Expect: all use `node:22-slim`, all have `NODE_TLS` refs > 0. Fix any WARN entries. + +## 6. Check next.config.ts has transpilePackages + symlinks + +// turbo + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" +echo "=== next.config.ts: transpilePackages + symlinks ===" +for repo in \ + learning_voice_ai_agent \ + learning_ai_clock \ + learning_ai_fastgap \ + learning_ai_jarvis_jr \ + learning_ai_notes \ + learning_ai_trails \ + learning_ai_local_memory_gpt; do + for cfg in $(find "$REPOS_DIR/$repo" -maxdepth 2 -name "next.config.ts" -not -path "*/node_modules/*" 2>/dev/null); do + relpath="${cfg#$REPOS_DIR/}" + tp=$(grep -c 'transpilePackages' "$cfg") + sl=$(grep -c 'symlinks' "$cfg") + status="OK" + [[ "$tp" == "0" ]] && status="MISSING:transpilePackages" + [[ "$sl" == "0" ]] && status="$status MISSING:symlinks" + echo "$relpath: transpile=$tp symlinks=$sl $status" + done +done +``` + +Expect: all show transpile>0 and symlinks>0. Fix any MISSING entries. + +## 7. Check pnpm-workspace.yaml includes common-plat packages + +// turbo + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" +echo "=== pnpm-workspace.yaml includes common-plat ===" +for repo in \ + learning_voice_ai_agent \ + learning_ai_clock \ + learning_ai_fastgap \ + learning_ai_jarvis_jr \ + learning_ai_peakpulse \ + learning_ai_notes \ + learning_ai_flowmonk \ + learning_ai_trails \ + learning_ai_local_memory_gpt; do + ws="$REPOS_DIR/$repo/pnpm-workspace.yaml" + if [ ! -f "$ws" ]; then + echo "$repo: MISSING pnpm-workspace.yaml" + elif grep -q 'common_plat' "$ws"; then + echo "$repo: OK" + else + echo "$repo: MISSING common-plat in workspace" + fi +done +``` + +Expect: all OK. Fix any MISSING entries. + +## 8. Check docker-prep.sh uses shared prep-consumer + +// turbo + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" +echo "=== docker-prep.sh uses shared prep-consumer ===" +for repo in \ + learning_voice_ai_agent \ + learning_ai_clock \ + learning_ai_fastgap \ + learning_ai_jarvis_jr \ + learning_ai_peakpulse \ + learning_ai_notes \ + learning_ai_flowmonk \ + learning_ai_trails \ + learning_ai_local_memory_gpt; do + script="$REPOS_DIR/$repo/scripts/docker-prep.sh" + if [ ! -f "$script" ]; then + echo "$repo: NO docker-prep.sh" + elif grep -q 'prep-consumer' "$script"; then + echo "$repo: OK (shared wrapper)" + else + echo "$repo: WARN — legacy docker-prep.sh" + fi +done +``` + +Expect: all OK. Legacy scripts should be replaced with the shared wrapper. + +## 9. Check verify scripts reference correct package filter names + +// turbo + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" +echo "=== Root verify scripts ===" +for repo in \ + learning_voice_ai_agent \ + learning_ai_clock \ + learning_ai_fastgap \ + learning_ai_jarvis_jr \ + learning_ai_peakpulse \ + learning_ai_notes \ + learning_ai_flowmonk \ + learning_ai_trails \ + learning_ai_local_memory_gpt; do + printf "%-40s " "$repo:" + node -e "const p=require('$REPOS_DIR/$repo/package.json'); console.log(p.scripts?.verify || 'NONE')" 2>/dev/null +done +``` + +Review output manually — ensure `--filter` names match actual package names in sub-packages. + +## 10. Summarize findings and fix + +For each issue found: + +1. Fix the file in the affected repo +2. Commit with message: `fix(repo): ` +3. Push to origin + +Run `/gitea-ci` after all fixes to verify full CI passes. diff --git a/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/docker-smoke-test.md b/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/docker-smoke-test.md new file mode 100644 index 00000000..806a8605 --- /dev/null +++ b/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/docker-smoke-test.md @@ -0,0 +1,69 @@ +--- +description: Docker smoke test — prep, build, and verify all Dockerfiles in the current repo +--- + +# Docker Smoke Test + +Build and verify all Dockerfiles in the current product repo. Runs docker-prep, builds each image, then restores package.json files. + +**Prerequisite:** Docker Desktop must be running. The current repo must have `scripts/docker-prep.sh`. + +## 1. Identify the repo and its Dockerfiles + +// turbo + +```bash +REPO_DIR="$(pwd)" +REPO_NAME="$(basename "$REPO_DIR")" +echo "Repo: $REPO_NAME" +echo "Dockerfiles:" +find "$REPO_DIR" -maxdepth 2 -name "Dockerfile" -not -path "*/node_modules/*" | sort +``` + +## 2. Run docker-prep to pack @bytelyst/\* tarballs + +```bash +bash scripts/docker-prep.sh +``` + +## 3. Build each Dockerfile + +Build each Dockerfile found in the repo. The build context is always the repo root (`.`). +Tag images as `:smoke-test` for easy cleanup. + +For each Dockerfile found in step 1, run: + +```bash +# Example for backend: +docker build -f backend/Dockerfile -t "$(basename $(pwd))-backend:smoke-test" . 2>&1 | tail -20 + +# Example for web: +docker build -f web/Dockerfile -t "$(basename $(pwd))-web:smoke-test" . 2>&1 | tail -20 +``` + +Adapt the `-f` path based on actual Dockerfile locations from step 1. +If a build fails, stop and investigate the error before continuing. + +## 4. Restore package.json files + +```bash +bash scripts/docker-prep.sh --restore +``` + +## 5. Report results + +Summarize which images built successfully and which failed. +If all passed, the repo's Docker setup is healthy. + +## 6. (Optional) Cleanup smoke-test images + +```bash +docker images --filter "reference=*:smoke-test" --format "{{.Repository}}:{{.Tag}}" | xargs -r docker rmi +``` + +## Common Failures + +- **`.docker-deps` not found:** Run step 2 first, and ensure `.dockerignore` does NOT exclude `.docker-deps` +- **Google Fonts / TLS error:** Ensure `ENV NODE_TLS_REJECT_UNAUTHORIZED=0` is in the builder stage +- **Native module build failure:** Add `python3 make g++` to `RUN apt-get install` in the builder stage +- **`public/` not found:** Remove the `COPY public` line if the web app has no `public/` directory diff --git a/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/gitea-ci.md b/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/gitea-ci.md new file mode 100644 index 00000000..d1859026 --- /dev/null +++ b/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/gitea-ci.md @@ -0,0 +1,107 @@ +--- +description: Start Gitea local CI, check status, or push all repos to trigger CI runs +--- + +# Gitea Local CI Workflow + +Manages the self-hosted Gitea + act_runner CI infrastructure for all ByteLyst repos. +See `docs/devops/GITEA_LOCAL_CI.md` for full documentation. + +## 1. Ensure Gitea and act_runner are running + +// turbo + +```bash +brew services start gitea && brew services start act_runner && sleep 2 && brew services list | grep -E "gitea|act_runner" +``` + +## 2. Verify Gitea is responding + +// turbo + +```bash +curl -s http://localhost:3300/api/v1/version | python3 -c "import sys,json; print('Gitea', json.load(sys.stdin)['version'])" +``` + +## 3. Push all workspace repos to Gitea to trigger CI + +This pushes `main` to the `gitea` remote for every repo that has one configured. +Each push triggers the `.gitea/workflows/ci.yml` workflow on the local runner. + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" +for repo in \ + learning_ai_common_plat \ + learning_voice_ai_agent \ + learning_multimodal_memory_agents \ + learning_ai_clock \ + learning_ai_fastgap \ + learning_ai_jarvis_jr \ + learning_ai_peakpulse \ + learning_ai_notes \ + learning_ai_flowmonk \ + learning_ai_trails \ + learning_ai_local_memory_gpt; do + echo "=== $repo ===" + cd "$REPOS_DIR/$repo" + git push gitea main 2>&1 | tail -2 +done +``` + +## 4. Wait for jobs to process, then check results + +Wait ~2 minutes per repo for the runner (capacity=1) to process the queue, then check results. + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" +for repo in \ + learning_ai_common_plat \ + learning_ai_clock \ + learning_ai_trails \ + learning_ai_flowmonk \ + learning_ai_notes \ + learning_ai_fastgap \ + learning_ai_jarvis_jr \ + learning_ai_peakpulse \ + learning_ai_local_memory_gpt \ + learning_voice_ai_agent \ + learning_multimodal_memory_agents; do + echo "=== $repo ===" + curl -s -u "bytelyst:bytelyst123" "http://localhost:3300/api/v1/repos/bytelyst/$repo/actions/jobs" | python3 -c " +import sys, json +jobs = json.load(sys.stdin).get('jobs', []) +if not jobs: + print(' (no jobs)') +else: + max_run = max(j['run_id'] for j in jobs) + for j in jobs: + if j['run_id'] == max_run: + c = j.get('conclusion','pending') + icon = '✅' if c == 'success' else '❌' if c == 'failure' else '⏳' + print(f' {icon} {c:12} {j[\"name\"]}') +" 2>/dev/null +done +``` + +## 5. (Optional) View logs for a failing job + +Replace REPO and JOB_ID with the repo name and job number from step 4. + +```bash +REPO="learning_ai_clock" +JOB_ID="76" +curl -s -u "bytelyst:bytelyst123" "http://localhost:3300/api/v1/repos/bytelyst/$REPO/actions/jobs/$JOB_ID/logs" | tail -30 +``` + +## 6. (Optional) Stop Gitea services + +```bash +brew services stop act_runner && brew services stop gitea +``` + +## Troubleshooting + +- **Runner not picking up jobs:** `brew services restart act_runner` +- **Stale .next/lock:** `rm -f /Users/sd9235/code/mygh/learning_ai_common_plat/dashboards/*-web/.next/lock` +- **Permission denied on tsc:** `chmod +x /Users/sd9235/code/mygh/learning_ai_common_plat/node_modules/.bin/*` +- **Check runner log:** `tail -30 /opt/homebrew/var/log/act_runner.err` diff --git a/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/repos.txt b/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/repos.txt index df66e8dc..a7546012 100644 --- a/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/repos.txt +++ b/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/repos.txt @@ -12,6 +12,7 @@ learning_ai_peakpulse learning_ai_notes learning_ai_flowmonk learning_ai_trails +learning_ai_local_memory_gpt learning_ai_smart_auth learning_ai_auth_app learning_ai_productivity_web diff --git a/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/verify-all-backends.md b/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/verify-all-backends.md new file mode 100644 index 00000000..30c43512 --- /dev/null +++ b/__LOCAL_LLMs/AI_IDE_CHAT_HISTORY/WINDSURF/repo_workflows/learning_ai_common_plat/verify-all-backends.md @@ -0,0 +1,109 @@ +--- +description: Quick local typecheck + test + build across all product backends (fast complement to /gitea-ci) +--- + +# Verify All Backends (Local) + +Run typecheck, test, and build across all product backends locally — without needing Gitea. +Use this for fast pre-push validation. For full CI verification, use `/gitea-ci` instead. + +**When to use:** + +- After modifying a `@bytelyst/*` shared package +- Before a bulk push to all repos +- Quick health check without waiting for Gitea runner queue + +## 1. Build common-plat packages first (dependency for all backends) + +```bash +cd /Users/sd9235/code/mygh/learning_ai_common_plat && pnpm build +``` + +## 2. Verify all product backends + +Run typecheck + test + build for each backend. Stops on first failure. + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" +PASSED=0 +FAILED=0 +FAILURES="" + +for entry in \ + "learning_ai_notes:@notelett/backend" \ + "learning_ai_local_memory_gpt:@localmemgpt/backend" \ + "learning_ai_trails:@actiontrail/backend" \ + "learning_ai_fastgap:@nomgap/backend" \ + "learning_ai_clock:@chronomind/backend" \ + "learning_ai_jarvis_jr:@jarvisjr/backend" \ + "learning_ai_peakpulse:@peakpulse/backend" \ + "learning_voice_ai_agent:@lysnrai/backend" \ + "learning_ai_flowmonk:@flowmonk/backend"; do + + repo="${entry%%:*}" + filter="${entry##*:}" + echo "" + echo "━━━ $repo ($filter) ━━━" + + cd "$REPOS_DIR/$repo" + if pnpm --filter "$filter" run typecheck 2>&1 | tail -3 && \ + pnpm --filter "$filter" run test 2>&1 | tail -5 && \ + pnpm --filter "$filter" run build 2>&1 | tail -3; then + echo "✅ $repo PASSED" + PASSED=$((PASSED + 1)) + else + echo "❌ $repo FAILED" + FAILED=$((FAILED + 1)) + FAILURES="$FAILURES\n - $repo" + fi +done + +echo "" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "✅ Passed: $PASSED" +echo "❌ Failed: $FAILED" +if [ -n "$FAILURES" ]; then + echo -e "Failures:$FAILURES" +fi +``` + +## 3. (Optional) Verify all web apps too + +Web builds take longer. Only run if you suspect web-side breakage. + +```bash +REPOS_DIR="/Users/sd9235/code/mygh" + +for entry in \ + "learning_ai_notes:@notelett/web" \ + "learning_ai_local_memory_gpt:@localmemgpt/web" \ + "learning_ai_trails:@actiontrail/web" \ + "learning_ai_fastgap:@nomgap/web" \ + "learning_ai_clock:web" \ + "learning_ai_jarvis_jr:jarvisjr-web" \ + "learning_voice_ai_agent:user-dashboard-web" \ + "learning_ai_flowmonk:@flowmonk/web"; do + + repo="${entry%%:*}" + filter="${entry##*:}" + echo "" + echo "━━━ $repo web ($filter) ━━━" + + cd "$REPOS_DIR/$repo" + if pnpm --filter "$filter" run typecheck 2>&1 | tail -3 && \ + pnpm --filter "$filter" run build 2>&1 | tail -5; then + echo "✅ $repo web PASSED" + else + echo "❌ $repo web FAILED" + fi +done +``` + +## 4. After fixing issues + +If any backend/web failed: + +1. Fix the issue in the affected repo +2. Re-run the failing step only to confirm +3. Commit with: `fix(scope): description` +4. Run `/gitea-ci` for full CI verification diff --git a/docs/devops/KUBERNETES_ROADMAP.md b/docs/devops/KUBERNETES_ROADMAP.md new file mode 100644 index 00000000..e838f8cd --- /dev/null +++ b/docs/devops/KUBERNETES_ROADMAP.md @@ -0,0 +1,359 @@ +# ByteLyst Ecosystem — Kubernetes Roadmap + +This document is the standalone roadmap for moving the ByteLyst ecosystem from Docker Compose on a single VM to local Kubernetes practice and eventually production-grade Kubernetes deployment. + +## Scope + +Use this roadmap for: + +- Docker Compose → Docker Desktop Kubernetes / K3s transition planning +- local Kubernetes validation strategy +- Helm/chart planning +- Kubernetes best practices for deployments, security, probes, ingress, and scaling +- secrets progression from `.env.ecosystem` to Kubernetes `Secret` objects and later Azure Key Vault integration +- CI/CD expectations for image promotion and chart versioning + +This document does **not** replace `docs/devops/SINGLE_VM_DEPLOYMENT.md`. + +`SINGLE_VM_DEPLOYMENT.md` remains the source of truth for: + +- single-VM deployment scope +- Docker Compose ecosystem architecture +- Dockerization and package-manager-aware deployment guidance +- current implementation status and audit findings + +## Current State + +### Completed foundation + +- Docker Compose ecosystem architecture is documented +- product repos have Dockerfiles and `docker-prep.sh` +- shared services have been built and validated in the ecosystem stack +- LocalMemGPT Linux-host Ollama access is addressed in Compose via `extra_hosts` +- deployment docs now separate Compose/source-of-truth concerns from Kubernetes roadmap concerns + +### Not yet completed + +- standalone local Kubernetes assets +- Helm charts / values structure in-repo +- Kubernetes manifests for the ecosystem +- local K8s deployment script implementation +- full K3s / Docker Desktop K8s validation + +## Phase Plan + +### Phase 1 — Docker Compose baseline + +Goal: keep Compose as the operational baseline while Docker/build/runtime contracts stabilize. + +Success criteria: + +- all ecosystem images build successfully +- all required services start in Docker Compose +- health endpoints are reachable for shared services and product backends +- major host/container networking assumptions are documented + +### Phase 2 — Local Kubernetes practice + +Goal: run the same ecosystem ideas on a single-node Kubernetes environment for production-readiness practice. + +Two supported paths: + +#### Option A: Docker Desktop Kubernetes + +Best for: + +- macOS / Windows development +- quick iteration +- visual debugging + +Characteristics: + +- built-in `kind`-style cluster +- Docker-built images are immediately visible to the cluster +- easiest local path for validating manifests and Helm shape + +#### Option B: K3s + +Best for: + +- Linux VMs +- Hetzner or cloud-hosted single-node practice +- future multi-node growth + +Characteristics: + +- lightweight CNCF-certified Kubernetes distro +- built-in Traefik ingress +- built-in local-path storage class +- can evolve from single-node to multi-node more naturally than Docker Desktop + +### Phase 3 — Production-grade Kubernetes shape + +Goal: make local K8s patterns production-ready enough to port to AKS/EKS/GKE later without redesign. + +Key outcomes: + +- health probes standardized +- rolling update behavior standardized +- security context standardized +- ingress and SSE/WebSocket behavior standardized +- Helm values layering defined +- secret management progression defined + +### Phase 4 — Managed Kubernetes target + +Goal: preserve the same deployment model while moving to managed infrastructure. + +Expected direction: + +- managed ingress controller and TLS +- chart/image promotion flow +- Azure Key Vault CSI integration +- HPA and environment-specific overlays + +## Local Kubernetes Best Practices + +### 1. Deployment rollout safety + +Use zero-downtime defaults: + +```yaml +spec: + strategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 0 + maxSurge: 1 + template: + spec: + terminationGracePeriodSeconds: 45 + containers: + - lifecycle: + preStop: + exec: + command: ['sleep', '5'] +``` + +Guidance: + +- never use aggressive `maxUnavailable` values for user-facing services +- match `terminationGracePeriodSeconds` to graceful shutdown behavior +- use `preStop` delay to give the load balancer time to drain + +### 2. Pod security context + +Default posture: + +```yaml +securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true +``` + +If writable paths are needed: + +```yaml +volumes: + - name: tmp + emptyDir: {} + - name: cache + emptyDir: {} +volumeMounts: + - name: tmp + mountPath: /tmp + - name: cache + mountPath: /home/node/.cache +``` + +Guidance: + +- Fastify backends should generally tolerate read-only root filesystems +- Next.js standalone servers may need writable `/tmp` + +### 3. Health probes + +Use dedicated `/health` endpoints: + +```yaml +livenessProbe: + httpGet: + path: /health + port: 4003 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 +readinessProbe: + httpGet: + path: /health + port: 4003 + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 5 +``` + +Guidance: + +- do not use heavy endpoints like `/openapi.json` for liveness +- keep timeouts short enough to expose real failures quickly + +### 4. Ingress for SSE / WebSocket traffic + +For streaming or long-lived connections: + +```yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/proxy-read-timeout: '1800' + nginx.ingress.kubernetes.io/proxy-send-timeout: '1800' + nginx.ingress.kubernetes.io/proxy-buffering: 'off' + nginx.ingress.kubernetes.io/proxy-http-version: '1.1' + nginx.ingress.kubernetes.io/configuration-snippet: | + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; +``` + +Applies to: + +- FlowMonk SSE +- LocalMemGPT streaming +- future realtime features + +### 5. HPA API choice + +Use: + +```yaml +apiVersion: autoscaling/v2 +``` + +Avoid: + +```yaml +apiVersion: autoscaling/v1 +``` + +## Docker and Image Guidance for K8s Readiness + +| Practice | Do | Avoid | +| ------------------- | -------------------------------------------- | --------------------------------------------------------- | +| **ENTRYPOINT form** | `ENTRYPOINT ["node", "dist/server.js"]` | shell-form entrypoints | +| **COPY scope** | selective `COPY` steps | broad `COPY . .` | +| **Layer count** | combine related `RUN` steps | fragmented install layers | +| **Non-root** | run as `node` or non-root UID | root runtime | +| **Local variant** | allow local Dockerfile variants where needed | one Dockerfile that only works in one network environment | +| **Build args** | use `ARG`/`ENV` deliberately | hardcoded deployment assumptions | + +## Helm Values Layering + +Recommended structure: + +```text +values.yaml +├── env/local.yaml +├── env/dev.yaml +└── env/prod.yaml +``` + +Recommended usage: + +```bash +helm upgrade --install bytelyst ./helm/bytelyst-ecosystem -f helm/bytelyst-ecosystem/values.yaml -f helm/bytelyst-ecosystem/env/local.yaml +helm upgrade --install bytelyst ./helm/bytelyst-ecosystem -f helm/bytelyst-ecosystem/values.yaml -f helm/bytelyst-ecosystem/env/dev.yaml +helm upgrade --install bytelyst ./helm/bytelyst-ecosystem -f helm/bytelyst-ecosystem/values.yaml -f helm/bytelyst-ecosystem/env/prod.yaml +``` + +## Namespace Strategy + +Use helpers rather than hardcoded namespaces: + +```yaml +{ { include "myapp.namespace" . } } +``` + +Avoid: + +```yaml +{ { .Values.namespace } } +``` + +## Secrets Progression + +| Phase | Strategy | Complexity | +| ----------- | --------------------------------------------- | ---------- | +| **Phase 1** | `.env.ecosystem` file (gitignored) | Trivial | +| **Phase 2** | Native Kubernetes `Secret` objects | Low | +| **Phase 3** | Azure Key Vault via CSI `SecretProviderClass` | Medium | +| **Phase 4** | AKV + operator/CRD auto-sync model | High | + +## CI/CD Expectations + +| Practice | Expectation | +| -------------------- | --------------------------------------------------------------- | +| **Semantic release** | keep `feat:` / `fix:` conventions usable for release automation | +| **Image promotion** | build once, promote later; do not rebuild for prod | +| **Branch pipelines** | branch-specific quality and deploy stages | +| **Security gates** | SAST/SCA in pipeline | +| **Quality gates** | tests, coverage, type safety, build verification | +| **Chart versioning** | publish/version charts independently | + +## Local K8s Deployment Workflow Shape + +A future local K8s script should do the following: + +1. detect Docker Desktop K8s vs K3s +2. build required images +3. load/import images into the local cluster runtime when needed +4. create namespace +5. create secrets from `.env.ecosystem` +6. deploy Helm chart with local overlay +7. wait for rollout +8. print verification commands and port-forward hints + +## Recommended Next Items + +### Next now + +- run full Docker Compose ecosystem validation end-to-end +- capture blockers by service +- decide whether K8s phase starts with Docker Desktop K8s or K3s first + +### Next after Compose validation + +- define `helm/bytelyst-ecosystem/` layout +- define namespace and secret model +- draft minimal shared-service-first Kubernetes manifests or chart values +- create local K8s deploy helper script + +### Hold for later + +- full Helm/K3s implementation across the ecosystem +- managed cluster rollout details +- advanced autoscaling and production ingress hardening + +## Quick Reference + +| Practice | Compose | Local K8s | Prod K8s | +| ---------------------------- | --------------------------------------- | --------- | -------- | +| Zero-downtime rolling update | N/A | Apply | Apply | +| Pod security context | N/A | Apply | Apply | +| Health probes | use Docker `healthcheck` where relevant | Apply | Apply | +| SSE/WebSocket ingress tuning | N/A | If needed | Apply | +| HPA v2 | N/A | Optional | Apply | +| Exec-form entrypoint | Apply now | Apply | Apply | +| Selective COPY | Apply now | Apply | Apply | +| Non-root user | Apply now | Apply | Apply | +| Values layering | N/A | Apply | Apply | +| AKV CSI | N/A | N/A | Apply | +| Image promotion | N/A | N/A | Apply | + +## Status + +- standalone Kubernetes roadmap: **created** +- Compose source-of-truth split: **done** +- Helm/K3s implementation: **held pending validation** diff --git a/scripts/prep-consumer.sh b/scripts/prep-consumer.sh index 2a569579..07108930 100755 --- a/scripts/prep-consumer.sh +++ b/scripts/prep-consumer.sh @@ -149,6 +149,7 @@ fs.writeFileSync( NODE mkdir -p "$PACK_SRC_DIR" +TARGET_BASENAME="$(basename "$TARGET_DIR")" COUNT=0 while IFS= read -r scoped_name; do @@ -170,22 +171,26 @@ while IFS= read -r scoped_name; do cp -R "$PKG_DIR" "$TMP_PKG_DIR" rm -rf "$TMP_PKG_DIR/node_modules" - node - "$TMP_PKG_DIR/package.json" "$PLAN_FILE" <<'NODE' + node - "$TMP_PKG_DIR/package.json" "$PLAN_FILE" "$TARGET_BASENAME" <<'NODE' const fs = require('fs'); const packageJsonPath = process.argv[2]; const planPath = process.argv[3]; +const targetBasename = process.argv[4]; const pkg = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8')); const plan = JSON.parse(fs.readFileSync(planPath, 'utf8')); +const tarballRefFor = (scopedName) => + `file:/app/${targetBasename}/.docker-deps/${scopedName.replace('@bytelyst/', 'bytelyst-')}-${plan.versions[scopedName]}.tgz`; + for (const depField of ['dependencies', 'optionalDependencies', 'peerDependencies']) { for (const [depName, depVersion] of Object.entries(pkg[depField] ?? {})) { if (!depName.startsWith('@bytelyst/')) { continue; } - if (typeof depVersion === 'string' && depVersion.startsWith('workspace:') && plan.versions[depName]) { - pkg[depField][depName] = plan.versions[depName]; + if (plan.versions[depName]) { + pkg[depField][depName] = tarballRefFor(depName); } } }