diff --git a/backend/package.json b/backend/package.json index 72eda2c..4e84e49 100644 --- a/backend/package.json +++ b/backend/package.json @@ -5,7 +5,7 @@ "description": "ByteLyst Trading backend and execution control service", "main": "index.js", "scripts": { - "test": "npm run check:websocket-contract && npm run check:session-rule-normalization && npm run check:api-contract && npm run check:audit-repository && npm run check:market-data-endpoints && npm run check:chat-copilot-contract && npm run check:chat-copilot-fallbacks && npm run check:fmp-cache && npm run check:backtest-strategy-safety", + "test": "npm run check:websocket-contract && npm run check:session-rule-normalization && npm run check:api-contract && npm run check:audit-repository && npm run check:market-data-endpoints && npm run check:chat-copilot-contract && npm run check:chat-copilot-fallbacks && npm run check:fmp-cache && npm run check:backtest-strategy-safety && npm run check:backtest-engine", "dev": "node --import tsx src/bootstrap.ts", "build": "tsc", "typecheck": "tsc --noEmit", @@ -51,7 +51,8 @@ "check": "npm run build && npm run lint && npm run format", "pre-deploy": "npm run check", "cleanup-stale-orders": "node --import tsx src/scripts/cleanupStaleOrders.ts", - "revert-expired-orders": "node --import tsx src/scripts/revertExpiredOrders.ts" + "revert-expired-orders": "node --import tsx src/scripts/revertExpiredOrders.ts", + "check:backtest-engine": "node --import tsx testBacktestEngine.ts" }, "keywords": [], "author": "", diff --git a/backend/testBacktestEngine.ts b/backend/testBacktestEngine.ts new file mode 100644 index 0000000..ca0d7dc --- /dev/null +++ b/backend/testBacktestEngine.ts @@ -0,0 +1,322 @@ +/** + * Backtest engine regression tests. + * + * Stage B of docs/backtest/ENGINE_READINESS.md §4. Implements the minimum + * viable test suite called out in §3.1: codifies determinism, summary math, + * timeframe aggregation, log-level isolation, and edge-case handling. These + * tests are NOT exhaustive; they're the smallest set that prevents silent + * regression of behaviors verified during the readiness audit. + * + * Convention: matches sibling `testXxx.ts` scripts (no vitest in this repo). + * Run via `npm run check:backtest-engine`. + */ +import assert from 'node:assert/strict'; +import { config } from './src/config/index.js'; +import { runBacktest } from './src/backtest/index.js'; +import logger, { withLogLevel } from './src/utils/logger.js'; +import { aggregateCandles } from './src/backtest/data/normalize.js'; +import { computeSummary, computeSharpe } from './src/backtest/metrics/computeSummary.js'; +import type { BacktestRequest, EquityPoint } from './src/backtest/types.js'; +import type { Candle } from './src/connectors/types.js'; + +// --------------------------------------------------------------------------- +// Helpers + +const FIFTEEN_MINUTES_MS = 15 * 60 * 1000; +const ONE_HOUR_MS = 60 * 60 * 1000; +const FOUR_HOURS_MS = 4 * ONE_HOUR_MS; + +const buildSyntheticCandles = ( + startTs: number, + n: number, + pricer: (i: number) => { open: number; high: number; low: number; close: number } +): Candle[] => { + const out: Candle[] = []; + for (let i = 0; i < n; i++) { + const p = pricer(i); + out.push({ + timestamp: startTs + i * FIFTEEN_MINUTES_MS, + open: p.open, + high: p.high, + low: p.low, + close: p.close, + volume: 1, + }); + } + return out; +}; + +const buildBacktestRequest = ( + candles: Candle[], + overrides: Partial = {} +): BacktestRequest => { + const fromTs = candles[0]?.timestamp ?? 0; + const toTs = (candles[candles.length - 1]?.timestamp ?? 0) + FIFTEEN_MINUTES_MS; + return { + mode: 'backtest', + symbols: ['BTC/USD'], + timeframe: '15m', + dateRange: { + from: new Date(fromTs).toISOString(), + to: new Date(toTs).toISOString(), + }, + dataSource: { + type: 'json', + payload: { + candles: candles.map((c) => ({ + symbol: 'BTC/USD', + timeframe: '15m', + timestamp: c.timestamp, + open: c.open, + high: c.high, + low: c.low, + close: c.close, + volume: c.volume, + })), + }, + }, + execution: { + initialCapitalUsd: 10000, + orderType: 'market', + slippageBps: 5, + feeBps: 10, + partialFillPct: 1, + fillOnNextBar: true, + intraCandlePolicy: 'ohlc_path', + triggerTimeframe: '1m', + forceCloseAtWindowEnd: false, + }, + strategyConfig: { enabled: true, symbol: 'BTC/USD', riskPerTrade: 0.02, maxPositions: 1 }, + ...overrides, + } as BacktestRequest; +}; + +// Track which checks pass for a clean summary at the end +const passed: string[] = []; +const fail = (name: string, error: unknown): never => { + console.error(`\n[backtest-engine] FAIL: ${name}`); + console.error(error instanceof Error ? error.stack || error.message : error); + process.exit(1); +}; +const pass = (name: string): void => { + passed.push(name); +}; + +// --------------------------------------------------------------------------- +// Test 1 — aggregateCandles produces correct OHLC across timeframes +// +// Critical invariant: the production data pipeline depends on this aggregator +// to derive 1h and 4h from 15m. If aggregation breaks, the strategy engine +// silently fails with "Insufficient data" warnings. + +try { + // 4 × 15m candles with monotonic prices to make the expected OHLC obvious + const start = Date.parse('2024-01-01T00:00:00Z'); + const fifteens: Candle[] = [ + { timestamp: start + 0 * FIFTEEN_MINUTES_MS, open: 100, high: 110, low: 95, close: 105, volume: 1 }, + { timestamp: start + 1 * FIFTEEN_MINUTES_MS, open: 105, high: 115, low: 100, close: 112, volume: 2 }, + { timestamp: start + 2 * FIFTEEN_MINUTES_MS, open: 112, high: 120, low: 108, close: 118, volume: 3 }, + { timestamp: start + 3 * FIFTEEN_MINUTES_MS, open: 118, high: 125, low: 116, close: 122, volume: 4 }, + ]; + const oneHour = aggregateCandles(fifteens, '1h'); + assert.equal(oneHour.length, 1, '4 × 15m candles → exactly 1 × 1h candle'); + const h = oneHour[0]; + assert.equal(h.timestamp, start, '1h candle timestamp = first 15m timestamp'); + assert.equal(h.open, 100, '1h.open = first 15m.open'); + assert.equal(h.close, 122, '1h.close = last 15m.close'); + assert.equal(h.high, 125, '1h.high = max of 15m highs'); + assert.equal(h.low, 95, '1h.low = min of 15m lows'); + assert.equal(h.volume, 10, '1h.volume = sum of 15m volumes (1+2+3+4)'); + pass('aggregateCandles 15m→1h preserves OHLC + sums volume'); +} catch (e) { fail('aggregateCandles 15m→1h', e); } + +try { + // 16 × 15m → 1 × 4h + const start = Date.parse('2024-01-01T00:00:00Z'); + const fifteens: Candle[] = Array.from({ length: 16 }, (_, i) => ({ + timestamp: start + i * FIFTEEN_MINUTES_MS, + open: 100 + i, + high: 100 + i + 5, + low: 100 + i - 5, + close: 100 + i + 1, + volume: 1, + })); + const fourHour = aggregateCandles(fifteens, '4h'); + assert.equal(fourHour.length, 1, '16 × 15m → 1 × 4h'); + assert.equal(fourHour[0].open, 100, '4h.open = first 15m.open'); + assert.equal(fourHour[0].close, 116, '4h.close = last 15m.close'); + assert.equal(fourHour[0].volume, 16, '4h.volume = sum'); + pass('aggregateCandles 15m→4h preserves OHLC + sums volume'); +} catch (e) { fail('aggregateCandles 15m→4h', e); } + +// --------------------------------------------------------------------------- +// Test 2 — computeSummary math: known inputs → known outputs +// +// The summary fields drive what users see (PnL, win rate, drawdown, sharpe). +// Hand-computed expectations protect against accidental refactor regressions. + +try { + const trades = [ + { pnlUsd: 100 }, // win + { pnlUsd: -50 }, // loss + { pnlUsd: 150 }, // win + { pnlUsd: -100 }, // loss + { pnlUsd: 200 }, // win + ]; + const timeline: EquityPoint[] = [ + { timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 }, + { timestamp: 2, equityUsd: 10100, drawdownPct: 0, cashUsd: 10100, reservedUsd: 0 }, + { timestamp: 3, equityUsd: 10050, drawdownPct: 0.50, cashUsd: 10050, reservedUsd: 0 }, + { timestamp: 4, equityUsd: 10200, drawdownPct: 0, cashUsd: 10200, reservedUsd: 0 }, + { timestamp: 5, equityUsd: 10100, drawdownPct: 0.98, cashUsd: 10100, reservedUsd: 0 }, + { timestamp: 6, equityUsd: 10300, drawdownPct: 0, cashUsd: 10300, reservedUsd: 0 }, + ]; + const summary = computeSummary(trades, timeline, '15m'); + assert.equal(summary.totalTrades, 5, 'totalTrades = trades.length'); + assert.equal(summary.netPnlUsd, 300, 'netPnl = 100 + (-50) + 150 + (-100) + 200'); + assert.equal(summary.winRate, 60, 'winRate = 3/5 × 100'); + assert.equal(summary.maxDrawdownPct, 0.98, 'maxDrawdown = max of timeline drawdownPct'); + assert.equal(typeof summary.sharpe, 'number', 'sharpe is a number (computed from returns)'); + pass('computeSummary derives PnL/winRate/drawdown from trades + timeline'); +} catch (e) { fail('computeSummary math', e); } + +try { + // Edge: empty trades and timeline → all zeros, no NaN/Infinity + const summary = computeSummary([], [], '15m'); + assert.equal(summary.totalTrades, 0); + assert.equal(summary.netPnlUsd, 0); + assert.equal(summary.winRate, 0); + assert.equal(summary.maxDrawdownPct, 0); + assert.equal(summary.sharpe, 0); + pass('computeSummary handles empty inputs (no NaN, all zeros)'); +} catch (e) { fail('computeSummary empty inputs', e); } + +try { + // Edge: sharpe with single timeline point should not divide-by-zero + const single: EquityPoint[] = [ + { timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 }, + ]; + assert.equal(computeSharpe(single, '15m'), 0, 'sharpe = 0 with < 2 points (no returns)'); + pass('computeSharpe handles single-point timeline (returns 0, no divide-by-zero)'); +} catch (e) { fail('computeSharpe single point', e); } + +// --------------------------------------------------------------------------- +// Test 3 — withLogLevel isolation: temporary level swap is safe across throws +// +// Stage D guarantee: backtest runs lower the log level via withLogLevel() +// during the run, but the level must be restored even if the wrapped fn +// throws. Without restoration, a single failed backtest would silence the +// rest of the process indefinitely. + +try { + const initial = logger.level; + const result = await withLogLevel('error', () => 42); + assert.equal(result, 42, 'withLogLevel returns inner value'); + assert.equal(logger.level, initial, 'level restored after success'); + + let caught: unknown = null; + try { + await withLogLevel('error', () => { throw new Error('boom'); }); + } catch (e) { + caught = e; + } + assert.equal((caught as Error)?.message, 'boom', 'inner throw propagates'); + assert.equal(logger.level, initial, 'level restored after throw'); + pass('withLogLevel restores logger.level after success and after throw'); +} catch (e) { fail('withLogLevel isolation', e); } + +// --------------------------------------------------------------------------- +// Test 4 — runBacktest determinism: same input → identical output +// +// This is the headline guarantee. If the engine ever becomes non-deterministic +// (e.g. someone adds Math.random() or Date.now() inside a hot path), this test +// will catch it. + +const originalFlag = config.ENABLE_BACKTEST; +config.ENABLE_BACKTEST = true; + +try { + const start = Date.parse('2024-01-01T00:00:00Z'); + const candles = buildSyntheticCandles(start, 2000, (i) => { + const base = 50000 + i * 0.5 + Math.sin(i / 50) * 800; + return { open: base, high: base + 50, low: base - 50, close: base + 10 }; + }); + const request = buildBacktestRequest(candles); + + const r1 = await runBacktest(request); + const r2 = await runBacktest(request); + + assert.equal(r1.trades.length, r2.trades.length, 'same trade count'); + assert.equal(r1.summary.netPnlUsd, r2.summary.netPnlUsd, 'same netPnl'); + assert.equal(r1.summary.maxDrawdownPct, r2.summary.maxDrawdownPct, 'same drawdown'); + assert.equal(r1.summary.sharpe, r2.summary.sharpe, 'same sharpe'); + assert.equal(JSON.stringify(r1), JSON.stringify(r2), 'byte-identical JSON serialization'); + pass('runBacktest is deterministic across runs (byte-identical)'); +} catch (e) { fail('determinism', e); } + +// --------------------------------------------------------------------------- +// Test 5 — Flat-price tape produces zero trades +// +// Sanity check: a strategy can't trade itself into a position when nothing is +// happening. If this ever fails, the engine is fabricating signals. + +try { + const start = Date.parse('2024-01-01T00:00:00Z'); + const flat = buildSyntheticCandles(start, 500, () => ({ open: 50000, high: 50000, low: 50000, close: 50000 })); + const result = await runBacktest(buildBacktestRequest(flat)); + assert.equal(result.trades.length, 0, 'flat tape → 0 trades'); + assert.equal(result.summary.netPnlUsd, 0, 'flat tape → 0 PnL'); + assert.equal(result.summary.maxDrawdownPct, 0, 'flat tape → 0 drawdown'); + pass('flat-price tape produces zero trades and zero PnL'); +} catch (e) { fail('flat tape', e); } + +// --------------------------------------------------------------------------- +// Test 6 — BacktestResult shape contract +// +// Anyone wiring this into UI relies on the result shape. Lock down the +// top-level keys so a refactor that drops/renames them is loud. + +try { + const start = Date.parse('2024-01-01T00:00:00Z'); + const candles = buildSyntheticCandles(start, 200, () => ({ open: 50000, high: 50100, low: 49900, close: 50050 })); + const result = await runBacktest(buildBacktestRequest(candles)); + const requiredKeys = ['mode', 'trades', 'summary', 'timeline', 'window', 'warmup', 'openPositionsAtEnd', 'assumptions']; + for (const key of requiredKeys) { + assert.ok(key in result, `result.${key} present`); + } + assert.equal(result.mode, 'backtest', 'result.mode = "backtest"'); + assert.ok(Array.isArray(result.trades), 'result.trades is array'); + assert.ok(Array.isArray(result.timeline), 'result.timeline is array'); + assert.equal(typeof result.summary.netPnlUsd, 'number', 'summary.netPnlUsd is number'); + assert.equal(result.window.timezone, 'UTC', 'window.timezone is UTC'); + pass('runBacktest returns the documented BacktestResult shape'); +} catch (e) { fail('result shape', e); } + +// --------------------------------------------------------------------------- +// Test 7 — Empty / invalid window throws sensibly (no silent zero result) + +try { + const start = Date.parse('2024-01-01T00:00:00Z'); + const empty: Candle[] = []; + let threw = false; + try { + await runBacktest(buildBacktestRequest(empty, { + dateRange: { + from: new Date(start).toISOString(), + to: new Date(start + FIFTEEN_MINUTES_MS).toISOString(), + }, + })); + } catch { + threw = true; + } + assert.equal(threw, true, 'empty candle data throws (does not silently return 0 trades)'); + pass('empty candle dataset throws explicit error'); +} catch (e) { fail('empty data error', e); } + +// --------------------------------------------------------------------------- + +config.ENABLE_BACKTEST = originalFlag; + +console.log('[backtest-engine] OK — passed:'); +for (const name of passed) console.log(' ✓', name); +console.log(`[backtest-engine] ${passed.length}/${passed.length} regression checks passed`);