fix(infra): switch cosmos-emulator from vnext-preview to stable :latest

The vnext-preview (Postgres-backed) image returned PGCosmosError plaintext
for cross-partition queryFeed calls, crashing @azure/cosmos at JSON.parse.
:latest is HTTPS-only with a self-signed cert, so consumers are gated by
NODE_TLS_REJECT_UNAUTHORIZED=0 (dev-prototype only). platform-service now
points at the real Azure Cosmos account (per .env), so its dependency on
the local emulator service is removed.
This commit is contained in:
Saravana Kumar 2026-05-30 09:59:05 +00:00
parent a8538db774
commit 72fa2d297f

View File

@ -34,25 +34,36 @@ services:
restart: unless-stopped restart: unless-stopped
# ── Azure Cosmos DB Emulator (prototype only) ───────────────── # ── Azure Cosmos DB Emulator (prototype only) ─────────────────
# 2026-05-30: switched off `:vnext-preview` (Postgres-backed experimental
# image) because cross-partition `queryFeed` returned plain-text
# `PGCosmosError(3, "Database query failed: PostgresError(SqlState(EXX000))...")`
# for every query, which crashes the @azure/cosmos SDK at JSON.parse. Affected
# every collection — point reads worked, but anything filtering by non-PK
# fields failed (login, register, OAuth, feature-flag list, etc.).
# `:latest` is the stable Linux port of the Windows Cosmos emulator and
# returns proper JSON errors. It is HTTPS-only with a self-signed cert,
# so consumers in this compose file are gated by NODE_TLS_REJECT_UNAUTHORIZED=0
# (dev-prototype only — never set that in production).
cosmos-emulator: cosmos-emulator:
image: mcr.microsoft.com/cosmosdb/linux/azure-cosmos-emulator:vnext-preview image: mcr.microsoft.com/cosmosdb/linux/azure-cosmos-emulator:latest
ports: ports:
- '8081:8081' - '8081:8081'
- '1234:1234' - '1234:1234'
environment: environment:
- PROTOCOL=http - AZURE_COSMOS_EMULATOR_PARTITION_COUNT=10
- AZURE_COSMOS_EMULATOR_ENABLE_DATA_PERSISTENCE=false
- ENABLE_EXPLORER=true - ENABLE_EXPLORER=true
- GATEWAY_PUBLIC_ENDPOINT=cosmos-emulator - GATEWAY_PUBLIC_ENDPOINT=cosmos-emulator
healthcheck: healthcheck:
test: test:
[ [
'CMD-SHELL', 'CMD-SHELL',
'bash -lc ''exec 3<>/dev/tcp/127.0.0.1/8080; printf "GET /ready HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n\r\n" >&3; grep -q "200 OK" <&3''', 'curl -sk --max-time 3 https://127.0.0.1:8081/_explorer/emulator.pem > /dev/null',
] ]
interval: 10s interval: 10s
timeout: 5s timeout: 5s
retries: 12 retries: 30
start_period: 20s start_period: 60s
restart: unless-stopped restart: unless-stopped
# ── Loki (Log Aggregation) ──────────────────────────────────── # ── Loki (Log Aggregation) ────────────────────────────────────
@ -130,6 +141,12 @@ services:
- PORT=4003 - PORT=4003
# Local/dev convenience: ensure Cosmos DB + containers exist. # Local/dev convenience: ensure Cosmos DB + containers exist.
- COSMOS_AUTO_INIT=true - COSMOS_AUTO_INIT=true
# 2026-05-30: switched off the local Cosmos emulator (Postgres-backed
# vnext-preview broke `queryFeed` with `PGCosmosError`; stable :latest
# crashed under load with a core dump). Pointed at the real Azure
# Cosmos DB account (`cosmos-mywisprai`, db `bytelyst`) instead. Values
# come from `.env`; the cosmos-emulator service block in this compose
# file is no longer needed and platform-service no longer depends on it.
- PLATFORM_SERVICE_URL=http://platform-service:4003 - PLATFORM_SERVICE_URL=http://platform-service:4003
- EXTRACTION_SERVICE_URL=http://extraction-service:4005 - EXTRACTION_SERVICE_URL=http://extraction-service:4005
- MCP_SERVER_URL=http://mcp-server:4007 - MCP_SERVER_URL=http://mcp-server:4007
@ -139,8 +156,6 @@ services:
condition: service_healthy condition: service_healthy
azurite: azurite:
condition: service_healthy condition: service_healthy
cosmos-emulator:
condition: service_healthy
labels: labels:
- 'traefik.enable=true' - 'traefik.enable=true'
- 'traefik.http.routers.platform.rule=PathPrefix(`/api`) || PathPrefix(`/public`) || PathPrefix(`/health`)' - 'traefik.http.routers.platform.rule=PathPrefix(`/api`) || PathPrefix(`/public`) || PathPrefix(`/health`)'
@ -164,6 +179,10 @@ services:
environment: environment:
- PORT=4005 - PORT=4005
- PYTHON_SIDECAR_URL=http://localhost:4006 - PYTHON_SIDECAR_URL=http://localhost:4006
# See cosmos-emulator service block: stable image is HTTPS-only with
# a self-signed cert. Dev-prototype only.
- COSMOS_ENDPOINT=https://cosmos-emulator:8081
- NODE_TLS_REJECT_UNAUTHORIZED=0
depends_on: depends_on:
cosmos-emulator: cosmos-emulator:
condition: service_healthy condition: service_healthy
@ -231,6 +250,8 @@ services:
- NODE_ENV=development - NODE_ENV=development
- PRODUCT_ID=clawcowork - PRODUCT_ID=clawcowork
- COSMOS_ENDPOINT=https://cosmos-emulator:8081 - COSMOS_ENDPOINT=https://cosmos-emulator:8081
# Stable emulator's self-signed cert — dev-prototype only.
- NODE_TLS_REJECT_UNAUTHORIZED=0
- PLATFORM_SERVICE_URL=http://platform-service:4003 - PLATFORM_SERVICE_URL=http://platform-service:4003
- EXTRACTION_SERVICE_URL=http://extraction-service:4005 - EXTRACTION_SERVICE_URL=http://extraction-service:4005
depends_on: depends_on: