diff --git a/docker-compose.yml b/docker-compose.yml index 1817618d..4ebaacc2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -33,38 +33,28 @@ services: retries: 6 restart: unless-stopped - # ── Azure Cosmos DB Emulator (prototype only) ───────────────── - # 2026-05-30: switched off `:vnext-preview` (Postgres-backed experimental - # image) because cross-partition `queryFeed` returned plain-text - # `PGCosmosError(3, "Database query failed: PostgresError(SqlState(EXX000))...")` - # for every query, which crashes the @azure/cosmos SDK at JSON.parse. Affected - # every collection — point reads worked, but anything filtering by non-PK - # fields failed (login, register, OAuth, feature-flag list, etc.). - # `:latest` is the stable Linux port of the Windows Cosmos emulator and - # returns proper JSON errors. It is HTTPS-only with a self-signed cert, - # so consumers in this compose file are gated by NODE_TLS_REJECT_UNAUTHORIZED=0 - # (dev-prototype only — never set that in production). - cosmos-emulator: - image: mcr.microsoft.com/cosmosdb/linux/azure-cosmos-emulator:latest - ports: - - '8081:8081' - - '1234:1234' - environment: - - AZURE_COSMOS_EMULATOR_PARTITION_COUNT=10 - - AZURE_COSMOS_EMULATOR_ENABLE_DATA_PERSISTENCE=false - - ENABLE_EXPLORER=true - - GATEWAY_PUBLIC_ENDPOINT=cosmos-emulator - healthcheck: - test: - [ - 'CMD-SHELL', - 'curl -sk --max-time 3 https://127.0.0.1:8081/_explorer/emulator.pem > /dev/null', - ] - interval: 10s - timeout: 5s - retries: 30 - start_period: 60s - restart: unless-stopped + # ── Azure Cosmos DB Emulator — REMOVED 2026-05-30 ───────────── + # + # Both image variants we tried were unfit for the prototype: + # - `:vnext-preview` (Postgres-backed experimental): cross-partition + # `queryFeed` returned plain-text PGCosmosError strings instead of + # JSON, crashing @azure/cosmos at JSON.parse on every login, + # register, OAuth, and feature-flag list call + # - `:latest` (stable Linux port of Windows emulator): HTTPS-only + # with a self-signed cert and core-dumped under modest load, + # leaving services hung waiting on never-resolving futures + # + # Replacement: real Azure Cosmos DB account `cosmos-mywisprai` in + # `rg-mywisprai` (West US 2), database `bytelyst`. All services pick + # up the connection from `.env` (`COSMOS_ENDPOINT`, `COSMOS_KEY`, + # `COSMOS_DATABASE`) via their `env_file: .env` entries below. + # + # If you need a local-only stack for offline development, prefer: + # 1. Mocked Cosmos in tests (already wired across the workspace), or + # 2. A scoped Cosmos account on a free Azure subscription with a + # throwaway database + # Do NOT resurrect the emulator service block without verifying both + # of the above failure modes have been fixed upstream. # ── Loki (Log Aggregation) ──────────────────────────────────── loki: @@ -179,13 +169,7 @@ services: environment: - PORT=4005 - PYTHON_SIDECAR_URL=http://localhost:4006 - # See cosmos-emulator service block: stable image is HTTPS-only with - # a self-signed cert. Dev-prototype only. - - COSMOS_ENDPOINT=https://cosmos-emulator:8081 - - NODE_TLS_REJECT_UNAUTHORIZED=0 - depends_on: - cosmos-emulator: - condition: service_healthy + # COSMOS_* come from `.env` (real Cosmos account; see top of file). labels: - 'traefik.enable=true' - 'traefik.http.routers.extraction.rule=PathPrefix(`/api/extract`) || PathPrefix(`/api/tasks`)' @@ -249,14 +233,10 @@ services: - PORT=4009 - NODE_ENV=development - PRODUCT_ID=clawcowork - - COSMOS_ENDPOINT=https://cosmos-emulator:8081 - # Stable emulator's self-signed cert — dev-prototype only. - - NODE_TLS_REJECT_UNAUTHORIZED=0 + # COSMOS_* come from `.env` (real Cosmos account; see top of file). - PLATFORM_SERVICE_URL=http://platform-service:4003 - EXTRACTION_SERVICE_URL=http://extraction-service:4005 depends_on: - cosmos-emulator: - condition: service_healthy platform-service: condition: service_healthy labels: diff --git a/services/cowork-service/src/lib/ipc-bridge.ts b/services/cowork-service/src/lib/ipc-bridge.ts index db494efd..cf0fa9b8 100644 --- a/services/cowork-service/src/lib/ipc-bridge.ts +++ b/services/cowork-service/src/lib/ipc-bridge.ts @@ -77,6 +77,19 @@ export class IpcBridge { this.log.error(`IPC child process error: ${err.message}`); }); + // Without these listeners, an EPIPE on stdin (child died mid-write) or + // a stdout/stderr error during teardown becomes an unhandled stream + // error and crashes the test runner / parent process. + this.child.stdin?.on('error', err => { + this.log.error(`IPC child stdin error: ${err.message}`); + }); + this.child.stdout?.on('error', err => { + this.log.error(`IPC child stdout error: ${err.message}`); + }); + this.child.stderr?.on('error', err => { + this.log.error(`IPC child stderr error: ${err.message}`); + }); + this.child.on('exit', (code, signal) => { this.log.info(`IPC child process exited: code=${code} signal=${signal}`); this.rejectAllPending(new Error(`IPC child process exited (code=${code})`));