diff --git a/docs/BACKEND_API_DEPRECATION.md b/docs/BACKEND_API_DEPRECATION.md new file mode 100644 index 0000000..855364f --- /dev/null +++ b/docs/BACKEND_API_DEPRECATION.md @@ -0,0 +1,163 @@ +# Backend API — Deprecation and Legacy Compatibility + +## Purpose + +This document tracks the lifecycle status of all backend API endpoints: +which endpoints are current, which are transitional, and which have been +superseded or are candidates for removal. + +--- + +## Current API Surface + +All endpoints require Bearer token authentication (`requireAuth`) unless marked Public. +Admin endpoints additionally require `requireAdmin`. + +### Health & Observability (Public) + +| Method | Path | Status | Notes | +|---|---|---|---| +| GET | `/health` | Current | Basic liveness — returns uptime + loop health | +| GET | `/health/live` | Current | Kubernetes-style liveness probe | +| GET | `/health/ready` | Current | Kubernetes-style readiness probe | +| GET | `/internal/health` | Current | Internal full health dump (not auth-gated; firewall-restrict in prod) | +| GET | `/metrics` | Current | Prometheus metrics via `prom-client` | + +### Trading State + +| Method | Path | Status | Notes | +|---|---|---|---| +| GET | `/api/state` | Current | Full BotState snapshot for authenticated user | +| GET | `/api/lifecycle/canonical` | Current | Canonical lifecycle event log for a trade | +| GET | `/api/alerts` | Current | Recent alert list | +| GET | `/api/symbol/:symbol` | Current | Per-symbol signal and indicator state | + +### Trading Controls (Admin) + +| Method | Path | Status | Notes | +|---|---|---|---| +| GET | `/internal/trading/status` | Current | Current trading control mode | +| POST | `/internal/trading/pause` | Current | Pause all new trade entries (admin only) | +| POST | `/internal/trading/resume` | Current | Resume trading (admin only) | +| POST | `/api/admin/revert-backfill-batch` | Current | Revert an exit backfill batch (admin only) | + +### Config & Feature Flags + +| Method | Path | Status | Notes | +|---|---|---|---| +| GET | `/api/config` | Current | Current runtime config subset (public fields only) | +| GET | `/api/feature-flags` | Current | Authoritative feature-flag contract (backtest, tabs) | +| GET | `/api/admin/config/dynamic` | Current | Full dynamic config (admin only) | +| PUT | `/api/admin/config/dynamic` | Current | Update dynamic config entries (admin only) | + +### User Profile & Identity + +| Method | Path | Status | Notes | +|---|---|---|---| +| GET | `/api/me/profile` | Current | Authenticated user's platform profile | +| PATCH | `/api/me/profile` | Current | Update user profile fields | + +### Trading Profiles (Strategies) + +| Method | Path | Status | Notes | +|---|---|---|---| +| GET | `/api/profiles` | Current | List trading profiles for authenticated user | +| POST | `/api/profiles` | Current | Create a new trading profile | +| PUT | `/api/profiles/:id` | Current | Replace a trading profile | +| PATCH | `/api/profiles/:id/active` | Current | Toggle profile active/inactive | +| DELETE | `/api/profiles/:id` | Current | Delete a trading profile | + +### Orders & Positions + +| Method | Path | Status | Notes | +|---|---|---|---| +| GET | `/api/positions/bootstrap` | Current | Initial position state for dashboard bootstrap | +| GET | `/api/trade-history` | Current | Closed trade history for authenticated user | +| GET | `/api/manual-entries` | Current | Manual trade entries for authenticated user | +| POST | `/api/manual-entries` | Current | Create a manual trade entry | +| PUT | `/api/manual-entries/:id` | Current | Update a manual trade entry | +| DELETE | `/api/manual-entries/:id` | Current | Delete a manual trade entry | +| POST | `/api/trade` | Current | Submit a live trade action (entry/exit signal) | +| POST | `/api/close` | Current | Close a position | + +### Marketplace + +| Method | Path | Status | Notes | +|---|---|---|---| +| GET | `/api/marketplace-presets` | Current | List publicly available strategy presets | +| POST | `/api/marketplace-presets` | Current | Create a marketplace preset (admin only) | + +### Backtest + +| Method | Path | Status | Notes | +|---|---|---|---| +| POST | `/api/backtest/run` | Current | Run a backtest (gated by `ENABLE_BACKTEST` flag) | + +### AI & Diagnostics + +| Method | Path | Status | Notes | +|---|---|---|---| +| GET | `/api/ai/health` | Current | AI provider health and fallback chain status | +| POST | `/api/chat` | Current | AI strategy assistant chat endpoint | + +### Events & Reconciliation (Admin) + +| Method | Path | Status | Notes | +|---|---|---|---| +| DELETE | `/api/events` | Current | Clear operational event log (admin only) | +| GET | `/api/admin/audit` | Current | Trade audit event log — requires `audit-events` Cosmos container (admin only) | +| GET | `/api/reconciliation/backfill/audit` | Current | Backfill audit records (admin only) | +| GET | `/api/reconciliation/backfill/batches` | Current | Backfill batch summary (admin only) | + +--- + +## Transitional / Internal-Only Endpoints + +| Path | Notes | +|---|---| +| `/internal/health` | Not auth-gated. Must be firewall-restricted in production. Not for external clients. | +| `/internal/trading/status` | `/internal/` prefix signals operator tooling only. Not part of the public client API contract. | +| `/internal/trading/pause` | Same as above. | +| `/internal/trading/resume` | Same as above. | + +--- + +## Deprecated Endpoints + +There are no formally deprecated endpoints in the current monorepo runtime. + +Legacy Supabase-backed API variants (direct browser-side table reads) existed in the old +`bytelyst-trading-dashboard-web` repo and were replaced with backend API calls during +Phase 3 migration. Those call paths no longer exist in this codebase. + +--- + +## Planned Additions (Not Yet Implemented) + +| Endpoint | Status | Notes | +|---|---|---| +| `GET /api/admin/audit` | Implemented | Queries Cosmos `audit-events` container — activate by creating the container (see docs/BACKEND_AUDIT_SCHEMA.md) | +| `GET /api/push/register` | Planned | Register mobile push token — blocked on push provider selection | +| `DELETE /api/push/register` | Planned | Deregister mobile push token — blocked on push provider selection | + +--- + +## Versioning Policy + +- No API version prefix (`/v1/`) is currently in use. All endpoints are implicitly v1. +- Breaking changes require a deprecation notice in this document and a migration period + of at least one full deploy cycle before removal. +- The `x-request-id` header is the primary correlation key for client/backend tracing. + +--- + +## WebSocket Namespaces + +| Namespace | Purpose | Auth | +|---|---|---| +| `/` (root) | Backward-compatible default; all authenticated users | Bearer token required | +| `/trading` | Named user-facing namespace; receives user-scoped BotState | Bearer token required | +| `/admin` | Admin-only namespace; receives full cross-user state | Bearer token + admin role required | + +Web and mobile clients connect to `/trading` by default. +The root namespace is retained for backward compatibility. diff --git a/docs/BACKEND_AUDIT_SCHEMA.md b/docs/BACKEND_AUDIT_SCHEMA.md new file mode 100644 index 0000000..ede03dc --- /dev/null +++ b/docs/BACKEND_AUDIT_SCHEMA.md @@ -0,0 +1,142 @@ +# Backend Admin Audit Schema + +## Purpose + +This document defines the `TradeAuditEvent` schema used by the trading backend to log all +admin-facing, operator-facing, and safety-critical state changes. Every audit event is +written to the structured logger via `logger.info('[AUDIT] {...}')` with a UTC timestamp +prepended. + +Audit events are not yet persisted to Cosmos — they are log-only. Downstream log aggregation +(e.g., Azure Monitor, Log Analytics) is the durable store for audit history. + +--- + +## TradeAuditEvent Schema + +```typescript +interface TradeAuditEvent { + event: string; // Required. Identifies the audit event type (see catalogue below). + userId?: string; // Auth user ID performing or triggering the action. + profileId?: string; // Trading profile ID relevant to the action (if applicable). + symbol?: string; // Asset symbol relevant to the action (if applicable). + outcome?: 'accepted' // Action was applied. + | 'rejected' // Action was blocked by a rule or safety guard. + | 'error'; // Action failed due to a runtime error. + reason?: string; // Human-readable explanation for the outcome. + details?: Record; // Structured metadata specific to the event type. +} +``` + +Log line format (written via `logger.info`): + +```json +{ + "ts": "2026-04-07T10:00:00.000Z", + "event": "manual_order_created", + "userId": "user-abc", + "profileId": "profile-xyz", + "symbol": "BTC/USDT", + "outcome": "accepted", + "reason": "within risk limits", + "details": { "side": "BUY", "qty": 0.01, "allocatedCapital": 1000 } +} +``` + +--- + +## Event Catalogue + +All events are emitted via `auditTradeEvent()` in `backend/src/services/apiServer.ts`. + +| Event | Trigger | Key details fields | +|---|---|---| +| `manual_order_created` | POST `/api/orders/manual` — order submitted | side, qty, symbol, profileId | +| `manual_order_rejected` | Manual order blocked by risk/capital guard | reason, guard name | +| `manual_order_error` | Manual order failed at execution | error message | +| `profile_control_create` | Chat AI creates a new trading profile | profileName, allocatedCapital | +| `profile_control_update` | Chat AI updates an existing profile | profileId, updatedFields | +| `profile_control_error` | Chat AI profile action fails | error message | +| `chat_profile_control` | Generic chat-initiated profile action | action type, profileId | +| `trading_paused` | POST `/internal/trading/pause` — admin pauses trading | pausedBy, reason | +| `trading_resumed` | POST `/internal/trading/resume` — admin resumes trading | resumedBy | +| `backfill_reverted` | POST `/internal/trading/revert-backfill` — admin reverts exit backfill | symbol, profileId | +| `reconciliation_audit` | GET `/api/reconciliation/backfill/audit` — admin reads reconciliation audit | — | +| `position_closed_manual` | Admin/operator manually closes a position | symbol, profileId, tradeId | +| `order_failure` | Order execution failure recorded | side, qty, reason, tradeId | + +> Not all events use every field. Absent fields are omitted from the log payload. + +--- + +## Audit Middleware + +The `auditTradeEvent()` private method in `ApiServer` writes directly to the logger: + +```typescript +private auditTradeEvent(evt: TradeAuditEvent): void { + const payload = { ts: new Date().toISOString(), ...evt }; + logger.info(`[AUDIT] ${JSON.stringify(payload)}`); +} +``` + +All audit calls are synchronous and never throw — audit failure must not block the primary +operation. + +--- + +## Admin-Scoped Endpoints + +Routes that require `requireAdmin` middleware are the primary sources of audit events: + +- `POST /internal/trading/pause` +- `POST /internal/trading/resume` +- `POST /internal/trading/revert-backfill` +- `GET /api/reconciliation/backfill/audit` +- `POST /api/admin/dynamic-config` + +All admin actions must produce an audit event with `userId` set from `authUserId` on the +authenticated request. + +--- + +## Persistence + +Audit events are written to two sinks simultaneously: + +1. **Structured log** — always, via `logger.info('[AUDIT] ...')`. Zero runtime dependency. +2. **Cosmos `audit-events` container** — best-effort, via `persistAuditEvent()` in + `auditRepository.ts`. Silently skipped if Cosmos is not configured or the write fails. + +### Activating Cosmos Persistence + +Create the container in your Cosmos database: + +| Setting | Value | +|---|---| +| Container name | `audit-events` | +| Partition key | `/productId` | +| TTL (Time To Live) | `7776000` seconds (90 days) | +| Throughput | Shared or dedicated — start with 400 RU/s | + +Once the container exists and `COSMOS_ENDPOINT` / `COSMOS_KEY` are configured, all +`auditTradeEvent()` calls will persist records automatically. + +## Admin Audit Endpoint + +`GET /api/admin/audit` (admin-only) queries the `audit-events` container: + +``` +GET /api/admin/audit?userId=user-123&event=manual_order_created&since=1712500000000&limit=50 +``` + +Query parameters (all optional): + +| Parameter | Type | Description | +|---|---|---| +| `userId` | string | Filter by user ID | +| `event` | string | Filter by event name (exact match) | +| `since` | number | Unix epoch ms — return events newer than this timestamp | +| `limit` | number | Max records to return (default 100, max 500) | + +Response: `{ records: AuditEventDocument[], count: number }` diff --git a/docs/CONVENTIONS.md b/docs/CONVENTIONS.md new file mode 100644 index 0000000..9158630 --- /dev/null +++ b/docs/CONVENTIONS.md @@ -0,0 +1,227 @@ +# Repository Conventions + +## Purpose + +This document defines naming conventions, directory structure rules, and import boundaries +for `learning_ai_invt_trdg`. It is the authoritative reference for where code lives and +what each package is allowed to import. + +--- + +## Package Names + +| Package | Name in `package.json` | `pnpm --filter` alias | +|---|---|---| +| Backend | `@bytelyst/trading-backend` | `trading-backend` | +| Web | `@bytelyst/trading-web` | `trading-web` | +| Mobile | `@bytelyst/trading-mobile` | `trading-mobile` | +| Shared | `@bytelyst/trading-shared` (internal — not published) | — | + +--- + +## Directory Structure + +``` +learning_ai_invt_trdg/ +├── backend/ +│ ├── src/ +│ │ ├── backtest/ # Backtesting engine and guards +│ │ ├── config/ # Environment config loader (config/index.ts is the only place process.env is read) +│ │ ├── connectors/ # Exchange connectors (Alpaca, CCXT) + factory +│ │ ├── domain/ # Trading domain enums, value types, and pure domain logic +│ │ ├── repositories/ # Cosmos DB data-access layer (one file per entity) +│ │ ├── scripts/ # Internal one-time scripts and verification contracts (not imported by src/) +│ │ ├── services/ # Application services (apiServer, TradeExecutor, aiClient, etc.) +│ │ ├── strategies/ # 7-rule ProStrategyEngine and individual rule implementations +│ │ └── utils/ # Generic utilities (logger, symbolMapper, etc.) +│ ├── *.ts # Root-level verification / reconciliation scripts (not part of the runtime) +│ └── package.json +├── web/ +│ └── src/ +│ ├── backtest/ # Backtest feature: hook, flags, UI components +│ ├── components/ # Shared React components (AuthContext, Login, ChatControl, etc.) +│ ├── hooks/ # React hooks (useWebSocket, useTabFeatureFlags, etc.) +│ ├── lib/ # Utility modules: authSession, runtime, profileApi, etc. +│ ├── tabs/ # Top-level tab components (one file per tab) +│ ├── test/ # Vitest test setup and fixtures +│ ├── App.tsx # Root component: tab routing, auth gate, feature flags +│ └── main.tsx # Vite entry point +├── mobile/ +│ ├── app/ # Expo Router file-based routes +│ │ ├── (tabs)/ # Tab screens: index, positions, history, strategies, settings +│ │ ├── _layout.tsx # Root layout (auth gate, kill-switch) +│ │ ├── chat.tsx # AI chat surface +│ │ └── marketplace.tsx # Marketplace surface +│ ├── components/ # React Native shared components +│ ├── hooks/ # React Native hooks +│ ├── lib/ # runtime, telemetry, authSession equivalents +│ ├── providers/ # MobileAuthProvider, TradingDataProvider +│ └── utils/ # Utility functions +├── shared/ # Cross-surface constants and TypeScript interfaces +│ ├── feature-flags.ts # TradingFeatureFlagsResponse, TabFeatureFlags, BacktestFeatureFlags +│ ├── realtime.ts # SOCKET_NAMESPACES, buildTradingSocketOptions, isUnauthorizedSocketError +│ ├── request-id.ts # createRequestId() +│ ├── runtime.ts # getRuntimeEnvironment() +│ ├── product.ts # productConfig — canonical product identity +│ ├── control-plane.ts # Control-plane state types +│ ├── platform-web.ts # Web-specific platform helpers +│ └── platform-mobile.ts # Mobile-specific platform helpers +├── docs/ # All project documentation +├── scripts/ # Root-level build/verify scripts +└── vendor/ # Local vendored @bytelyst/* packages (symlinked or copied) +``` + +--- + +## Import Boundary Rules + +### Rule 1 — `shared/` is the only cross-surface code + +No surface may import directly from another surface's `src/`. + +| Importer | May import from | +|---|---| +| `backend/src/` | `shared/`, `vendor/bytelyst/*`, npm packages | +| `web/src/` | `shared/`, `vendor/bytelyst/*`, npm packages | +| `mobile/` | `shared/`, `vendor/bytelyst/*`, npm packages | +| `shared/` | npm packages only — never from backend, web, or mobile | + +**Violation example (never do this):** +```ts +// In web/src — WRONG +import { TradeExecutor } from '../../../backend/src/services/TradeExecutor.js'; +``` + +### Rule 2 — `config/index.ts` is the only `process.env` reader in the backend + +All environment variable access in `backend/src/` goes through `config/index.ts`. +No other file calls `process.env` directly. This makes the config surface auditable +and testable in isolation. + +### Rule 3 — `domain/` contains no I/O + +`backend/src/domain/` is pure TypeScript: enums, value types, and functions with no +database calls, no HTTP, and no filesystem access. It may be imported by any other +backend module. + +### Rule 4 — `repositories/` only talk to Cosmos + +`backend/src/repositories/` (or `services/*Repository.ts`) are the only files that +import `@azure/cosmos` or call Cosmos client methods. Services and strategies call +repositories; they do not call Cosmos directly. + +### Rule 5 — `scripts/` are not imported at runtime + +`backend/src/scripts/` contains verification contracts (e.g., `verifyWebsocketContract.ts`) +that run as standalone processes. They are never `import`-ed by `backend/src/index.ts` +or any other runtime module. Root-level `backend/*.ts` scripts follow the same rule. + +### Rule 6 — `shared/` exports must be serialisable + +Everything exported from `shared/` must be importable in all three environments: +Node.js (backend), browser (web), and React Native (mobile). No Node.js built-ins, +no DOM APIs, no React Native-specific APIs. + +### Rule 7 — Trading domain logic stays in the backend + +No trading strategy logic, risk rules, capital guard calculations, or execution logic +may be moved into `shared/`, `web/`, or `mobile/`. Backend is authoritative. + +--- + +## Naming Conventions + +### Files + +| Context | Convention | Example | +|---|---|---| +| TypeScript source | `camelCase.ts` | `apiServer.ts`, `healthTracker.ts` | +| React components | `PascalCase.tsx` | `OverviewTab.tsx`, `AuthContext.tsx` | +| React hooks | `use` prefix, `camelCase.ts` | `useWebSocket.ts`, `useTabFeatureFlags.ts` | +| Test files | `.test.ts` / `.test.tsx` | `App.dom.test.tsx` | +| Verification scripts | `verify*.ts` / `test*.ts` | `verifyApiContract.ts` | +| Docs | `SCREAMING_SNAKE.md` | `CONVENTIONS.md`, `OPERATIONS.md` | + +### Identifiers + +| Context | Convention | Example | +|---|---|---| +| Classes | `PascalCase` | `ApiServer`, `TradeExecutor` | +| Interfaces / types | `PascalCase` | `TradingFeatureFlagsResponse`, `BotState` | +| Constants (module-level) | `SCREAMING_SNAKE_CASE` | `SOCKET_NAMESPACES`, `BACKTEST_FLAG_KEYS` | +| Functions | `camelCase` | `buildTradingSocketOptions`, `createRequestId` | +| React components | `PascalCase` | `OverviewTab`, `LivePulseTicker` | +| Environment variables | `SCREAMING_SNAKE_CASE` | `COSMOS_ENDPOINT`, `TAB_MARKETPLACE_ENABLED` | + +### API Endpoints + +| Scope | Prefix | Auth | +|---|---|---| +| Public product API | `/api/` | `requireAuth` (platform JWT) | +| Internal operator tooling | `/internal/` | `requireAuth` — firewall-restrict in prod | +| Admin-only mutations | `/api/admin/` | `requireAuth` + `requireAdmin` | +| Health / observability | `/health`, `/metrics` | None — firewall-restrict in prod | + +### Socket.IO Events (backend → client) + +| Event | Payload type | Notes | +|---|---|---| +| `state` | `BotState` | Full state on connect | +| `symbol_update` | `{ symbol, data }` | Per-symbol signal/indicator update | +| `positions_update` | `Position[]` | Full position list replacement | +| `orders_update` | `Order[]` | Full order list replacement | +| `history_update` | `HistoryRow` | Single new closed trade | +| `new_alert` | `Alert` | Single new alert | +| `health_update` | `HealthSnapshot` | Loop health and trading control state | +| `account_snapshot` | `AccountSnapshot` | Exchange account balance snapshot | +| `order_failure` | `OrderFailureRecord` | Single order failure notification | +| `operational_event` | `OperationalEvent` | Structured operational log entry | +| `operational_event_cleared` | — | Signals log has been cleared | + +--- + +## Feature Flag Conventions + +- Feature flags are defined in `shared/feature-flags.ts` as TypeScript interfaces. +- Flag key constants (`BACKTEST_FLAG_KEYS`, `TAB_FLAG_KEYS`) are the single source of + truth for env var names — never hardcode the string elsewhere. +- Backend reads env vars via `config/index.ts`; web/mobile read via `GET /api/feature-flags`. +- Admin accounts bypass all feature flags — they always see everything. +- New flags default to **enabled** (opt-out model) unless gating a genuinely unreleased feature. + +--- + +## Request Tracing Convention + +Every HTTP request from web or mobile must include: + +``` +x-request-id: +Authorization: Bearer +``` + +Backend echoes `x-request-id` in all responses. Use this header as the primary correlation +key when tracing a request across client logs and backend logs. + +--- + +## Test / Verification Script Convention + +Backend verification scripts follow this pattern: + +```ts +// verify*.ts or test*.ts at backend/ root or backend/src/scripts/ +import assert from 'node:assert/strict'; + +function main() { + // ... assertions ... + console.log('[PASS] Description of what passed.'); +} + +main(); +``` + +- Exit with a non-zero code on `AssertionError` (Node.js default behaviour). +- Print `[PASS] ...` on success; never `console.log` progress in the happy path. +- Never connect to real infrastructure — use in-memory fixtures only. +- Wire into `package.json` scripts as `check:*` and into `npm run test` or `npm run lint`. diff --git a/docs/CUTOVER_MOBILE.md b/docs/CUTOVER_MOBILE.md new file mode 100644 index 0000000..a617815 --- /dev/null +++ b/docs/CUTOVER_MOBILE.md @@ -0,0 +1,251 @@ +# Mobile Internal Beta Checklist (Stage 3 Cutover) + +## Purpose + +This document is the step-by-step runbook for releasing the monorepo mobile app +(`mobile/`) to internal testers before broader rollout. + +**Prerequisite:** Stage 2 (web internal adoption) must be complete and stable through +at least one full backend deploy cycle before beginning Stage 3. +See `docs/CUTOVER_WEB.md`. + +--- + +## Pre-Flight Gate + +Do not begin mobile beta until all of the following are true. + +### Go / No-Go Checks + +```bash +# From monorepo root +pnpm verify # must be green +pnpm lint # must be green +pnpm smoke:release # must pass — validates mobile platform contract compilation +``` + +### Backend + Web Stability + +- [ ] Stage 2 (web internal adoption) signed off with no rollback triggers +- [ ] Backend has been stable through at least one full deploy cycle since web cutover +- [ ] `GET /health/live` returns 200 from the deployment that mobile will target +- [ ] WebSocket `/trading` namespace is reachable from an external network (not just localhost) +- [ ] Feature flags endpoint `GET /api/feature-flags` returns valid response + +### Mobile Build Readiness + +- [ ] `EXPO_PUBLIC_PRODUCT_ID`, `EXPO_PUBLIC_PLATFORM_URL`, `EXPO_PUBLIC_TRADING_API_URL` set correctly for the target environment +- [ ] App compiles cleanly: `pnpm --filter @bytelyst/trading-mobile typecheck` +- [ ] Internal distribution channel ready (TestFlight for iOS, Internal Testing for Android, or Expo Go for dev) +- [ ] Internal testers identified (typically the same operators who validated web) + +### Rollback Readiness + +- [ ] Web dashboard remains fully operational as the primary operator interface +- [ ] Mobile beta can be pulled from distribution without affecting backend or web +- [ ] You know who owns the rollback decision + +--- + +## Step 1 — Build and Distribute the Beta + +### Option A — Expo Go (fastest for internal testing) + +```bash +cd mobile +pnpm dev # starts Expo dev server; testers scan QR code with Expo Go app +``` + +### Option B — Native build via EAS (recommended for realistic testing) + +```bash +# Requires EAS CLI and Expo account +npx eas build --platform all --profile preview +npx eas submit --platform all # submit to TestFlight / Google Play Internal Testing +``` + +Verify distribution: + +- [ ] Each internal tester can install the app on their device +- [ ] App launches without crash on both iOS and Android (or whichever platforms are in scope) + +--- + +## Step 2 — Fresh Install Sign-In + +Each tester performs a fresh install (no prior session): + +- [ ] App launches to the sign-in screen (no blank screen, no crash) +- [ ] Sign in with platform credentials — same credentials as web +- [ ] Session established — app navigates to the Overview tab +- [ ] Confirm auth token is a platform JWT (check tester's device Expo logs) +- [ ] `GET /api/me/profile` returns the correct user profile + +--- + +## Step 3 — Session Restore + +- [ ] Close the app completely (background + remove from recents) +- [ ] Reopen the app +- [ ] App restores session without re-prompting for sign-in +- [ ] Overview tab loads with live state (not a loading spinner indefinitely) +- [ ] Confirm secure storage is working: token survives app restart + +--- + +## Step 4 — Kill-Switch and Degraded-State Handling + +### Kill-switch + +- [ ] Toggle maintenance mode on via platform-service +- [ ] Reopen (or refresh) the mobile app — it must block access with a maintenance/unavailable screen +- [ ] Toggle maintenance mode off +- [ ] App recovers without requiring a reinstall or manual sign-in + +### Degraded backend + +- [ ] Stop the backend service temporarily +- [ ] App shows a degraded/offline state clearly (not a crash or blank screen) +- [ ] Restart the backend — app reconnects automatically and restores live state + +--- + +## Step 5 — Core Feature Validation + +Each tester validates their own user scope across all five tabs: + +### Overview (index.tsx) +- [ ] Portfolio summary loads with current positions and account snapshot +- [ ] Live ticker or price updates are visible within 60 seconds +- [ ] Trading control badge shows correct mode (RUNNING / PAUSED) +- [ ] WebSocket connects to `/trading` namespace (check backend logs: `[API][/trading] Client connected`) + +### Positions (positions.tsx) +- [ ] Open positions list matches what the web dashboard shows +- [ ] Each position shows symbol, side, size, entry price, unrealised PnL +- [ ] Positions update live when the backend broadcasts `positions_update` + +### History (history.tsx) +- [ ] Closed trade history loads and matches web dashboard +- [ ] Scroll loads older history correctly (if pagination is implemented) + +### Strategies (strategies.tsx) +- [ ] Trading profiles list loads for the authenticated user +- [ ] Profile names, symbols, and active state are correct + +### Settings (settings.tsx) +- [ ] User email and account info display correctly +- [ ] Sign-out works — returns to the sign-in screen and clears the session +- [ ] Notification preference toggles are visible (even if push delivery is deferred) + +--- + +## Step 6 — Real-Time Update Verification + +Leave the app running on the Overview tab for at least 5 minutes: + +- [ ] Symbol prices update at least once during the observation window +- [ ] Health status reflects current backend loop health +- [ ] If a trade closes during the observation window, it appears in History without a manual refresh +- [ ] No unhandled exceptions appear in Expo logs during this period + +### Polling fallback + +- [ ] Disable WebSocket connectivity (e.g., block the Socket.IO port via network settings) +- [ ] App falls back to polling — data still refreshes (may be slower) +- [ ] Restore connectivity — app reconnects to WebSocket automatically + +--- + +## Step 7 — Offline / Poor-Connectivity Behaviour + +- [ ] Enable airplane mode while the app is running +- [ ] App shows a clear offline or disconnected indicator (not a crash) +- [ ] Restore network — app reconnects and reloads state without manual intervention +- [ ] No stale or misleading data is shown during the offline period + +--- + +## Step 8 — Safe Operator Controls + +Mobile v1 is intentionally monitor-first. Validate that the scope is correctly limited: + +- [ ] No destructive trading actions are available in the mobile UI (no "close position", no "pause trading" button) +- [ ] No strategy editing available in mobile (strategies tab is read-only) +- [ ] No admin panel or diagnostics UI visible to non-admin testers +- [ ] Confirm no API calls are made to `POST /api/trade`, `POST /api/close`, or `/internal/trading/*` from mobile + +--- + +## Step 9 — Parallel Run Period (Recommended: 3–7 days) + +Run the mobile beta in parallel with the web dashboard: + +- [ ] Testers use mobile for monitoring; web remains primary for operator actions +- [ ] Testers report any data discrepancy between mobile and web +- [ ] Collect feedback on performance, UX clarity, and any missing information +- [ ] Monitor backend logs for unusual error spikes correlated with mobile connections + +--- + +## Rollback Triggers + +Pull the beta build from distribution immediately if any of the following occur: + +| Condition | Action | +|---|---| +| Sign-in or session restore fails | Remove from distribution | +| Tenant data leak — tester sees another user's data | Remove immediately + page oncall | +| App crashes on launch for any tester | Remove and diagnose | +| App shows incorrect or stale positions/history vs. web | Investigate before proceeding | +| Backend error rate increases after mobile connections are established | Rollback and diagnose | + +### Rollback Steps + +1. Remove the beta build from TestFlight / Play Internal Testing +2. Notify testers to uninstall or stop using the app +3. File an incident report — include device, OS version, Expo logs, and `x-request-id` from any correlated backend errors +4. Do not modify Cosmos state as first-response rollback +5. Resolve the root cause before re-attempting the beta + +--- + +## Post-Beta Monitoring (Ongoing during beta period) + +### Watch during the beta + +- [ ] Platform auth failure rate is baseline — no spike correlated with mobile sessions +- [ ] Token refresh failures from mobile clients are zero +- [ ] WebSocket `/trading` namespace connection count is stable (no reconnect storm) +- [ ] Backend `401` / `403` errors are baseline — no increase from mobile auth +- [ ] Cosmos read latency is normal — mobile polling does not add unexpected load +- [ ] Mobile degraded/offline state frequency is low (expected: near zero on good networks) + +--- + +## Beta Sign-Off + +Complete after the parallel run period with no rollback triggers: + +- [ ] All internal testers confirm the app is working correctly on their devices +- [ ] No open P0/P1 bugs from the beta period +- [ ] Monitoring checks above are green for the duration of the beta +- [ ] Feedback collected and triaged (decide what goes into a follow-up release vs. pre-GA fix) +- [ ] ROADMAP.md: mark "Mobile internal beta" as `[x]` Done +- [ ] OPERATIONS.md: update Staged Cutover section to reflect Stage 3 complete + +--- + +## Next Stage + +After mobile internal beta is signed off: + +**Stage 4 — Controlled Operator Rollout** +- Expand to a small group of external operators (e.g., power users, design partners) +- Gate: mobile beta signed off + web adoption stable for ≥ 1 week +- Monitor: same checks as post-beta, extended for 24–48 hours after each rollout increment + +**Stage 5 — Production Cutover** +- Switch full operational ownership from legacy repos to this monorepo +- Archive legacy `bytelyst-trading-dashboard-web`, `bytelyst-trading-bot-service`, and `bytelyst-trading-dashboard-mob` +- Gate: Stage 4 stable + rollback owners confirmed + monitoring dashboards in place diff --git a/docs/CUTOVER_WEB.md b/docs/CUTOVER_WEB.md new file mode 100644 index 0000000..0761163 --- /dev/null +++ b/docs/CUTOVER_WEB.md @@ -0,0 +1,227 @@ +# Web Internal Adoption Checklist (Stage 2 Cutover) + +## Purpose + +This document is the step-by-step runbook for switching internal operators from the +legacy `bytelyst-trading-dashboard-web` to the new monorepo web dashboard (`web/`). + +It covers the pre-flight gate, deployment, validation, rollback triggers, and +post-adoption monitoring. Complete every step in order. + +--- + +## Pre-Flight Gate + +Do not begin cutover until all of the following are true. + +### Go / No-Go Checks + +Run from the monorepo root: + +```bash +pnpm verify # typecheck + test + build — must be green +pnpm lint # backend contract + security guards + web/mobile lint — must be green +pnpm smoke:release # auth + kill-switch smoke tests — must pass +``` + +Backend-specific: + +```bash +cd backend +npm run check:api-contract # feature-flag shapes, audit events, namespace constants +npm run check:websocket-contract # BotState lifecycle consistency +npm run check:security-guards # tenant isolation — must be green +npm run check:tenant-isolation # row-level access — must be green +``` + +### Environment Checks + +- [ ] Backend is deployed and reachable (`GET /health/live` returns 200) +- [ ] Cosmos DB containers readable and writable (`dynamic_config`, `trading-profiles`, `trading-control`, `snapshots`, `capital-ledger`) +- [ ] Platform-service is reachable from the deployment environment +- [ ] `PLATFORM_AUTH_ENABLED=true` is set on the backend deployment +- [ ] `VITE_TRADING_API_URL` points to the deployed backend (not localhost) +- [ ] `VITE_PLATFORM_URL` points to the live platform-service +- [ ] `CORS_ALLOWED_ORIGINS` on the backend includes the new web dashboard origin +- [ ] Feature flags set correctly for the rollout population: + - `TAB_MARKETPLACE_ENABLED` — set per rollout plan + - `TAB_MEMBERSHIP_ENABLED` — set per rollout plan + - `ENABLE_BACKTEST` — set per rollout plan + +### Rollback Readiness + +- [ ] The legacy web dashboard URL is still live and working +- [ ] You know who owns the rollback decision and how to reach them +- [ ] Backend trade-halt control is reachable (`POST /internal/trading/pause`) + +--- + +## Step 1 — Deploy the Web Dashboard + +```bash +# From monorepo root — production build +pnpm build + +# Or using Docker +pnpm docker:up +``` + +Verify the deployment: + +- [ ] Web dashboard loads at the new URL without a blank screen +- [ ] Browser console shows no errors on load +- [ ] Network tab shows no 4xx/5xx on initial API calls + +--- + +## Step 2 — Internal Operator Sign-In + +Have each internal operator complete the sign-in sequence: + +- [ ] Navigate to the new web dashboard URL +- [ ] Sign in using platform credentials (same as the legacy dashboard) +- [ ] Session restores correctly after browser refresh (no re-login required) +- [ ] Auth token is a platform JWT (check via browser devtools: `Authorization: Bearer ...` on API calls) +- [ ] `GET /api/me/profile` returns the correct user profile and role + +--- + +## Step 3 — Core Feature Validation + +Each operator validates their own user scope: + +### Trading State +- [ ] Overview tab loads with live bot state (not stale/empty) +- [ ] WebSocket connection shows "Connected" in the header +- [ ] Socket connects to `/trading` namespace (check backend logs: `[API][/trading] Client connected`) +- [ ] Positions tab shows current open positions +- [ ] Trade History tab shows closed trade history +- [ ] My Strategies tab lists the operator's trading profiles + +### Real-Time Updates +- [ ] Leave the dashboard open for 60 seconds; confirm symbol prices update live +- [ ] Trigger a manual order or profile toggle; confirm the state updates without refresh + +### Admin Operators (role = admin only) +- [ ] Signals tab is visible and loads correctly +- [ ] Entries tab is visible and loads correctly +- [ ] Admin Panel tab is visible +- [ ] Strategy Clusters tab is visible +- [ ] Admin Panel → Trading Control: pause and resume work correctly +- [ ] Backend logs show `[AUDIT]` entries for pause/resume actions +- [ ] "Preview as Customer" toggle hides admin-only tabs correctly + +### Kill-Switch Behaviour +- [ ] If platform-service maintenance mode is toggled on, web blocks access with correct UI +- [ ] After maintenance mode is lifted, web recovers without a page reload + +--- + +## Step 4 — Config and Feature Flag Validation + +- [ ] `GET /api/feature-flags` returns the correct `backtest`, `tabs.marketplace`, and `tabs.membership` values +- [ ] Backtesting tab visibility matches `ENABLE_BACKTEST` and `BACKTEST_CUSTOMER_ENABLED` config +- [ ] Marketplace tab visibility matches `TAB_MARKETPLACE_ENABLED` config +- [ ] Membership tab visibility matches `TAB_MEMBERSHIP_ENABLED` config +- [ ] Dynamic config changes via Admin Panel → Config are persisted to Cosmos and visible after a page refresh + +--- + +## Step 5 — Request Tracing Spot Check + +Pick any operator action (e.g., load trade history): + +- [ ] Browser devtools shows `x-request-id` header on the request +- [ ] Backend response echoes the same `x-request-id` +- [ ] Search backend logs for that `x-request-id` — the full request trace appears + +--- + +## Step 6 — Parallel Run Period (Recommended: 1–3 days) + +Run the new and legacy dashboards in parallel before switching traffic fully: + +- [ ] Operators use the new dashboard as primary +- [ ] Legacy dashboard remains accessible as a fallback +- [ ] No trading state mutations go through the legacy dashboard during this period +- [ ] Monitor for discrepancies between what new and legacy dashboards show + +--- + +## Step 7 — Traffic Cutover + +Once parallel run is complete with no issues: + +- [ ] Update any bookmarks, internal links, or runbooks to point to the new URL +- [ ] Communicate to all internal users that the new dashboard is now primary +- [ ] Disable or redirect the legacy dashboard URL (do not delete it yet) + +--- + +## Rollback Triggers + +Stop cutover and revert to the legacy dashboard immediately if any of the following occur: + +| Condition | Action | +|---|---| +| Sign-in or session restore fails for any operator | Rollback | +| Tenant data leak — operator sees another user's positions or history | Rollback immediately + page oncall | +| Trading control (pause/resume) does not apply correctly | Rollback | +| Dynamic config writes fail silently | Rollback | +| WebSocket disconnects repeatedly with no recovery | Rollback | +| Missing data in positions or trade history vs. legacy dashboard | Investigate before proceeding | + +### Rollback Steps + +1. Restore the legacy dashboard URL as primary (flip DNS or update internal links) +2. Notify all operators to switch back immediately +3. Do **not** rewrite or delete Cosmos state during first-response rollback +4. File an incident report referencing the `x-request-id` values from affected requests +5. Resolve the root cause before re-attempting cutover + +--- + +## Post-Adoption Monitoring (First 24 Hours) + +Watch the following immediately after cutting over: + +### Immediate (first 30 minutes) +- [ ] Platform auth failure rate is zero +- [ ] Token refresh failures are zero +- [ ] Backend `401` / `403` error rate is baseline (no spike) +- [ ] WebSocket connection error rate is baseline + +### First Hour +- [ ] Cosmos reads and writes are completing successfully (check backend logs for Cosmos errors) +- [ ] Dynamic config refresh cycle completes without error (every `DYNAMIC_CONFIG_REFRESH_MS`) +- [ ] No tenant isolation anomalies in security guard logs + +### First 24 Hours +- [ ] Runtime control drift: Cosmos control-plane state matches in-memory trading control mode +- [ ] Kill-switch state matches platform-service state +- [ ] No stale session events (operators are not re-prompted to log in unexpectedly) +- [ ] No build or chunk-size regressions affecting web load time (check browser waterfall) + +--- + +## Post-Cutover Sign-Off + +Complete after the first 24 hours with no rollback triggers: + +- [ ] All operators confirm the new dashboard is working correctly +- [ ] Monitoring checks above are all green +- [ ] Incident response runbooks updated to reference the new dashboard URL +- [ ] Legacy web repo marked as archived (not deleted — kept as reference) +- [ ] ROADMAP.md: mark "Web internal adoption" as `[x]` Done +- [ ] OPERATIONS.md: update Staged Cutover section to reflect Stage 2 complete + +--- + +## Next Stage + +After web internal adoption is confirmed: + +**Stage 3 — Mobile Internal Beta** (see planned `docs/CUTOVER_MOBILE.md`) +- Release mobile app to internal testers +- Validate sign-in, session restore, live state, degraded-state handling +- Gate: backend/web contracts must be stable through at least one full backend deploy cycle diff --git a/web/src/hooks/useTabFeatureFlags.dom.test.tsx b/web/src/hooks/useTabFeatureFlags.dom.test.tsx new file mode 100644 index 0000000..a0c41c8 --- /dev/null +++ b/web/src/hooks/useTabFeatureFlags.dom.test.tsx @@ -0,0 +1,110 @@ +// @vitest-environment jsdom +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { renderHook, waitFor } from '@testing-library/react'; +import { useTabFeatureFlags } from './useTabFeatureFlags'; + +const { getPlatformAccessTokenMock } = vi.hoisted(() => ({ + getPlatformAccessTokenMock: vi.fn<() => Promise>(), +})); + +vi.mock('../lib/authSession', () => ({ + getPlatformAccessToken: getPlatformAccessTokenMock, +})); + +vi.mock('../lib/runtime', () => ({ + tradingRuntime: { tradingApiUrl: 'http://localhost:4018' }, +})); + +// Reset module-level cache between tests +vi.mock('./useTabFeatureFlags', async (importOriginal) => { + const mod = await importOriginal(); + return mod; +}); + +describe('useTabFeatureFlags DOM behaviour', () => { + beforeEach(() => { + getPlatformAccessTokenMock.mockReset(); + vi.stubGlobal('fetch', vi.fn()); + }); + + it('returns defaults while loading when no auth token is available', async () => { + getPlatformAccessTokenMock.mockRejectedValue(new Error('No session')); + + const { result } = renderHook(() => useTabFeatureFlags()); + + // Initially loading + expect(result.current.loading).toBe(true); + expect(result.current.flags).toEqual({ marketplace: true, membership: true }); + + await waitFor(() => expect(result.current.loading).toBe(false)); + + // Falls back to defaults when unauthenticated + expect(result.current.flags).toEqual({ marketplace: true, membership: true }); + expect(vi.mocked(fetch)).not.toHaveBeenCalled(); + }); + + it('returns flags from the API when authenticated', async () => { + getPlatformAccessTokenMock.mockResolvedValue('token-xyz'); + vi.mocked(fetch).mockResolvedValue({ + ok: true, + json: async () => ({ + backtest: { enabled: false, customerEnabled: false }, + tabs: { marketplace: false, membership: true }, + }), + } as Response); + + const { result } = renderHook(() => useTabFeatureFlags()); + + await waitFor(() => expect(result.current.loading).toBe(false)); + + expect(result.current.flags.marketplace).toBe(false); + expect(result.current.flags.membership).toBe(true); + expect(vi.mocked(fetch)).toHaveBeenCalledWith( + 'http://localhost:4018/api/feature-flags', + expect.objectContaining({ + headers: expect.objectContaining({ + Authorization: 'Bearer token-xyz', + }), + }), + ); + }); + + it('falls back to defaults when the API returns a non-ok response', async () => { + getPlatformAccessTokenMock.mockResolvedValue('token-xyz'); + vi.mocked(fetch).mockResolvedValue({ + ok: false, + status: 503, + } as Response); + + const { result } = renderHook(() => useTabFeatureFlags()); + + await waitFor(() => expect(result.current.loading).toBe(false)); + + expect(result.current.flags).toEqual({ marketplace: true, membership: true }); + }); + + it('falls back to defaults when the API returns a malformed body', async () => { + getPlatformAccessTokenMock.mockResolvedValue('token-xyz'); + vi.mocked(fetch).mockResolvedValue({ + ok: true, + json: async () => { throw new Error('invalid json'); }, + } as unknown as Response); + + const { result } = renderHook(() => useTabFeatureFlags()); + + await waitFor(() => expect(result.current.loading).toBe(false)); + + expect(result.current.flags).toEqual({ marketplace: true, membership: true }); + }); + + it('falls back to defaults when fetch itself throws', async () => { + getPlatformAccessTokenMock.mockResolvedValue('token-xyz'); + vi.mocked(fetch).mockRejectedValue(new Error('network error')); + + const { result } = renderHook(() => useTabFeatureFlags()); + + await waitFor(() => expect(result.current.loading).toBe(false)); + + expect(result.current.flags).toEqual({ marketplace: true, membership: true }); + }); +}); diff --git a/web/src/hooks/useTabFeatureFlags.ts b/web/src/hooks/useTabFeatureFlags.ts new file mode 100644 index 0000000..91b8999 --- /dev/null +++ b/web/src/hooks/useTabFeatureFlags.ts @@ -0,0 +1,64 @@ +import { useEffect, useState } from 'react'; +import { getPlatformAccessToken } from '../lib/authSession'; +import { tradingRuntime } from '../lib/runtime'; +import { createRequestId } from '../../../shared/request-id.js'; +import type { TabFeatureFlags, TradingFeatureFlagsResponse } from '../../../shared/feature-flags.js'; + +const DEFAULT_FLAGS: TabFeatureFlags = { + marketplace: true, + membership: true, +}; + +const CACHE_TTL_MS = 30_000; +let cachedFlags: TabFeatureFlags | null = null; +let cachedAt = 0; + +async function loadTabFlags(): Promise { + const now = Date.now(); + if (cachedFlags && now - cachedAt < CACHE_TTL_MS) { + return cachedFlags; + } + + const accessToken = await getPlatformAccessToken().catch(() => null); + if (!accessToken) return DEFAULT_FLAGS; + + const response = await fetch(`${tradingRuntime.tradingApiUrl}/api/feature-flags`, { + headers: { + Authorization: `Bearer ${accessToken}`, + 'x-request-id': createRequestId('web-tab-flags'), + }, + }); + if (!response.ok) return DEFAULT_FLAGS; + + const body = (await response.json().catch(() => null)) as TradingFeatureFlagsResponse | null; + const flags: TabFeatureFlags = { + marketplace: body?.tabs?.marketplace ?? true, + membership: body?.tabs?.membership ?? true, + }; + + cachedFlags = flags; + cachedAt = Date.now(); + return flags; +} + +export interface UseTabFeatureFlagsResult { + flags: TabFeatureFlags; + loading: boolean; +} + +export function useTabFeatureFlags(): UseTabFeatureFlagsResult { + const [flags, setFlags] = useState(DEFAULT_FLAGS); + const [loading, setLoading] = useState(true); + + useEffect(() => { + let cancelled = false; + setLoading(true); + loadTabFlags() + .then((f) => { if (!cancelled) setFlags(f); }) + .catch(() => { if (!cancelled) setFlags(DEFAULT_FLAGS); }) + .finally(() => { if (!cancelled) setLoading(false); }); + return () => { cancelled = true; }; + }, []); + + return { flags, loading }; +}