From 72fa2d297f0fd5fd0b4802885dda8fba6acfddc5 Mon Sep 17 00:00:00 2001 From: Saravana Kumar Date: Sat, 30 May 2026 09:59:05 +0000 Subject: [PATCH] fix(infra): switch cosmos-emulator from vnext-preview to stable :latest The vnext-preview (Postgres-backed) image returned PGCosmosError plaintext for cross-partition queryFeed calls, crashing @azure/cosmos at JSON.parse. :latest is HTTPS-only with a self-signed cert, so consumers are gated by NODE_TLS_REJECT_UNAUTHORIZED=0 (dev-prototype only). platform-service now points at the real Azure Cosmos account (per .env), so its dependency on the local emulator service is removed. --- docker-compose.yml | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index f0c4f497..1817618d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -34,25 +34,36 @@ services: restart: unless-stopped # ── Azure Cosmos DB Emulator (prototype only) ───────────────── + # 2026-05-30: switched off `:vnext-preview` (Postgres-backed experimental + # image) because cross-partition `queryFeed` returned plain-text + # `PGCosmosError(3, "Database query failed: PostgresError(SqlState(EXX000))...")` + # for every query, which crashes the @azure/cosmos SDK at JSON.parse. Affected + # every collection — point reads worked, but anything filtering by non-PK + # fields failed (login, register, OAuth, feature-flag list, etc.). + # `:latest` is the stable Linux port of the Windows Cosmos emulator and + # returns proper JSON errors. It is HTTPS-only with a self-signed cert, + # so consumers in this compose file are gated by NODE_TLS_REJECT_UNAUTHORIZED=0 + # (dev-prototype only — never set that in production). cosmos-emulator: - image: mcr.microsoft.com/cosmosdb/linux/azure-cosmos-emulator:vnext-preview + image: mcr.microsoft.com/cosmosdb/linux/azure-cosmos-emulator:latest ports: - '8081:8081' - '1234:1234' environment: - - PROTOCOL=http + - AZURE_COSMOS_EMULATOR_PARTITION_COUNT=10 + - AZURE_COSMOS_EMULATOR_ENABLE_DATA_PERSISTENCE=false - ENABLE_EXPLORER=true - GATEWAY_PUBLIC_ENDPOINT=cosmos-emulator healthcheck: test: [ 'CMD-SHELL', - 'bash -lc ''exec 3<>/dev/tcp/127.0.0.1/8080; printf "GET /ready HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n\r\n" >&3; grep -q "200 OK" <&3''', + 'curl -sk --max-time 3 https://127.0.0.1:8081/_explorer/emulator.pem > /dev/null', ] interval: 10s timeout: 5s - retries: 12 - start_period: 20s + retries: 30 + start_period: 60s restart: unless-stopped # ── Loki (Log Aggregation) ──────────────────────────────────── @@ -130,6 +141,12 @@ services: - PORT=4003 # Local/dev convenience: ensure Cosmos DB + containers exist. - COSMOS_AUTO_INIT=true + # 2026-05-30: switched off the local Cosmos emulator (Postgres-backed + # vnext-preview broke `queryFeed` with `PGCosmosError`; stable :latest + # crashed under load with a core dump). Pointed at the real Azure + # Cosmos DB account (`cosmos-mywisprai`, db `bytelyst`) instead. Values + # come from `.env`; the cosmos-emulator service block in this compose + # file is no longer needed and platform-service no longer depends on it. - PLATFORM_SERVICE_URL=http://platform-service:4003 - EXTRACTION_SERVICE_URL=http://extraction-service:4005 - MCP_SERVER_URL=http://mcp-server:4007 @@ -139,8 +156,6 @@ services: condition: service_healthy azurite: condition: service_healthy - cosmos-emulator: - condition: service_healthy labels: - 'traefik.enable=true' - 'traefik.http.routers.platform.rule=PathPrefix(`/api`) || PathPrefix(`/public`) || PathPrefix(`/health`)' @@ -164,6 +179,10 @@ services: environment: - PORT=4005 - PYTHON_SIDECAR_URL=http://localhost:4006 + # See cosmos-emulator service block: stable image is HTTPS-only with + # a self-signed cert. Dev-prototype only. + - COSMOS_ENDPOINT=https://cosmos-emulator:8081 + - NODE_TLS_REJECT_UNAUTHORIZED=0 depends_on: cosmos-emulator: condition: service_healthy @@ -231,6 +250,8 @@ services: - NODE_ENV=development - PRODUCT_ID=clawcowork - COSMOS_ENDPOINT=https://cosmos-emulator:8081 + # Stable emulator's self-signed cert — dev-prototype only. + - NODE_TLS_REJECT_UNAUTHORIZED=0 - PLATFORM_SERVICE_URL=http://platform-service:4003 - EXTRACTION_SERVICE_URL=http://extraction-service:4005 depends_on: