fix(infra): switch cosmos-emulator from vnext-preview to stable :latest

The vnext-preview (Postgres-backed) image returned PGCosmosError plaintext
for cross-partition queryFeed calls, crashing @azure/cosmos at JSON.parse.
:latest is HTTPS-only with a self-signed cert, so consumers are gated by
NODE_TLS_REJECT_UNAUTHORIZED=0 (dev-prototype only). platform-service now
points at the real Azure Cosmos account (per .env), so its dependency on
the local emulator service is removed.
This commit is contained in:
Saravana Kumar 2026-05-30 09:59:05 +00:00
parent a8538db774
commit 72fa2d297f

View File

@ -34,25 +34,36 @@ services:
restart: unless-stopped
# ── Azure Cosmos DB Emulator (prototype only) ─────────────────
# 2026-05-30: switched off `:vnext-preview` (Postgres-backed experimental
# image) because cross-partition `queryFeed` returned plain-text
# `PGCosmosError(3, "Database query failed: PostgresError(SqlState(EXX000))...")`
# for every query, which crashes the @azure/cosmos SDK at JSON.parse. Affected
# every collection — point reads worked, but anything filtering by non-PK
# fields failed (login, register, OAuth, feature-flag list, etc.).
# `:latest` is the stable Linux port of the Windows Cosmos emulator and
# returns proper JSON errors. It is HTTPS-only with a self-signed cert,
# so consumers in this compose file are gated by NODE_TLS_REJECT_UNAUTHORIZED=0
# (dev-prototype only — never set that in production).
cosmos-emulator:
image: mcr.microsoft.com/cosmosdb/linux/azure-cosmos-emulator:vnext-preview
image: mcr.microsoft.com/cosmosdb/linux/azure-cosmos-emulator:latest
ports:
- '8081:8081'
- '1234:1234'
environment:
- PROTOCOL=http
- AZURE_COSMOS_EMULATOR_PARTITION_COUNT=10
- AZURE_COSMOS_EMULATOR_ENABLE_DATA_PERSISTENCE=false
- ENABLE_EXPLORER=true
- GATEWAY_PUBLIC_ENDPOINT=cosmos-emulator
healthcheck:
test:
[
'CMD-SHELL',
'bash -lc ''exec 3<>/dev/tcp/127.0.0.1/8080; printf "GET /ready HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n\r\n" >&3; grep -q "200 OK" <&3''',
'curl -sk --max-time 3 https://127.0.0.1:8081/_explorer/emulator.pem > /dev/null',
]
interval: 10s
timeout: 5s
retries: 12
start_period: 20s
retries: 30
start_period: 60s
restart: unless-stopped
# ── Loki (Log Aggregation) ────────────────────────────────────
@ -130,6 +141,12 @@ services:
- PORT=4003
# Local/dev convenience: ensure Cosmos DB + containers exist.
- COSMOS_AUTO_INIT=true
# 2026-05-30: switched off the local Cosmos emulator (Postgres-backed
# vnext-preview broke `queryFeed` with `PGCosmosError`; stable :latest
# crashed under load with a core dump). Pointed at the real Azure
# Cosmos DB account (`cosmos-mywisprai`, db `bytelyst`) instead. Values
# come from `.env`; the cosmos-emulator service block in this compose
# file is no longer needed and platform-service no longer depends on it.
- PLATFORM_SERVICE_URL=http://platform-service:4003
- EXTRACTION_SERVICE_URL=http://extraction-service:4005
- MCP_SERVER_URL=http://mcp-server:4007
@ -139,8 +156,6 @@ services:
condition: service_healthy
azurite:
condition: service_healthy
cosmos-emulator:
condition: service_healthy
labels:
- 'traefik.enable=true'
- 'traefik.http.routers.platform.rule=PathPrefix(`/api`) || PathPrefix(`/public`) || PathPrefix(`/health`)'
@ -164,6 +179,10 @@ services:
environment:
- PORT=4005
- PYTHON_SIDECAR_URL=http://localhost:4006
# See cosmos-emulator service block: stable image is HTTPS-only with
# a self-signed cert. Dev-prototype only.
- COSMOS_ENDPOINT=https://cosmos-emulator:8081
- NODE_TLS_REJECT_UNAUTHORIZED=0
depends_on:
cosmos-emulator:
condition: service_healthy
@ -231,6 +250,8 @@ services:
- NODE_ENV=development
- PRODUCT_ID=clawcowork
- COSMOS_ENDPOINT=https://cosmos-emulator:8081
# Stable emulator's self-signed cert — dev-prototype only.
- NODE_TLS_REJECT_UNAUTHORIZED=0
- PLATFORM_SERVICE_URL=http://platform-service:4003
- EXTRACTION_SERVICE_URL=http://extraction-service:4005
depends_on: