/** * Backtest engine regression tests. * * Stage B of docs/backtest/ENGINE_READINESS.md §4. Implements the minimum * viable test suite called out in §3.1: codifies determinism, summary math, * timeframe aggregation, log-level isolation, and edge-case handling. These * tests are NOT exhaustive; they're the smallest set that prevents silent * regression of behaviors verified during the readiness audit. * * Convention: matches sibling `testXxx.ts` scripts (no vitest in this repo). * Run via `npm run check:backtest-engine`. */ import assert from 'node:assert/strict'; import { config } from './src/config/index.js'; import { runBacktest } from './src/backtest/index.js'; import logger, { withLogLevel } from './src/utils/logger.js'; import { aggregateCandles } from './src/backtest/data/normalize.js'; import { computeSummary, computeSharpe } from './src/backtest/metrics/computeSummary.js'; import type { BacktestRequest, EquityPoint } from './src/backtest/types.js'; import type { Candle } from './src/connectors/types.js'; // --------------------------------------------------------------------------- // Helpers const FIFTEEN_MINUTES_MS = 15 * 60 * 1000; const ONE_HOUR_MS = 60 * 60 * 1000; const FOUR_HOURS_MS = 4 * ONE_HOUR_MS; const buildSyntheticCandles = ( startTs: number, n: number, pricer: (i: number) => { open: number; high: number; low: number; close: number } ): Candle[] => { const out: Candle[] = []; for (let i = 0; i < n; i++) { const p = pricer(i); out.push({ timestamp: startTs + i * FIFTEEN_MINUTES_MS, open: p.open, high: p.high, low: p.low, close: p.close, volume: 1, }); } return out; }; const buildBacktestRequest = ( candles: Candle[], overrides: Partial = {} ): BacktestRequest => { const fromTs = candles[0]?.timestamp ?? 0; const toTs = (candles[candles.length - 1]?.timestamp ?? 0) + FIFTEEN_MINUTES_MS; return { mode: 'backtest', symbols: ['BTC/USD'], timeframe: '15m', dateRange: { from: new Date(fromTs).toISOString(), to: new Date(toTs).toISOString(), }, dataSource: { type: 'json', payload: { candles: candles.map((c) => ({ symbol: 'BTC/USD', timeframe: '15m', timestamp: c.timestamp, open: c.open, high: c.high, low: c.low, close: c.close, volume: c.volume, })), }, }, execution: { initialCapitalUsd: 10000, orderType: 'market', slippageBps: 5, feeBps: 10, partialFillPct: 1, fillOnNextBar: true, intraCandlePolicy: 'ohlc_path', triggerTimeframe: '1m', forceCloseAtWindowEnd: false, }, strategyConfig: { enabled: true, symbol: 'BTC/USD', riskPerTrade: 0.02, maxPositions: 1 }, ...overrides, } as BacktestRequest; }; // Track which checks pass for a clean summary at the end const passed: string[] = []; const fail = (name: string, error: unknown): never => { console.error(`\n[backtest-engine] FAIL: ${name}`); console.error(error instanceof Error ? error.stack || error.message : error); process.exit(1); }; const pass = (name: string): void => { passed.push(name); }; // --------------------------------------------------------------------------- // Test 1 — aggregateCandles produces correct OHLC across timeframes // // Critical invariant: the production data pipeline depends on this aggregator // to derive 1h and 4h from 15m. If aggregation breaks, the strategy engine // silently fails with "Insufficient data" warnings. try { // 4 × 15m candles with monotonic prices to make the expected OHLC obvious const start = Date.parse('2024-01-01T00:00:00Z'); const fifteens: Candle[] = [ { timestamp: start + 0 * FIFTEEN_MINUTES_MS, open: 100, high: 110, low: 95, close: 105, volume: 1 }, { timestamp: start + 1 * FIFTEEN_MINUTES_MS, open: 105, high: 115, low: 100, close: 112, volume: 2 }, { timestamp: start + 2 * FIFTEEN_MINUTES_MS, open: 112, high: 120, low: 108, close: 118, volume: 3 }, { timestamp: start + 3 * FIFTEEN_MINUTES_MS, open: 118, high: 125, low: 116, close: 122, volume: 4 }, ]; const oneHour = aggregateCandles(fifteens, '1h'); assert.equal(oneHour.length, 1, '4 × 15m candles → exactly 1 × 1h candle'); const h = oneHour[0]; assert.equal(h.timestamp, start, '1h candle timestamp = first 15m timestamp'); assert.equal(h.open, 100, '1h.open = first 15m.open'); assert.equal(h.close, 122, '1h.close = last 15m.close'); assert.equal(h.high, 125, '1h.high = max of 15m highs'); assert.equal(h.low, 95, '1h.low = min of 15m lows'); assert.equal(h.volume, 10, '1h.volume = sum of 15m volumes (1+2+3+4)'); pass('aggregateCandles 15m→1h preserves OHLC + sums volume'); } catch (e) { fail('aggregateCandles 15m→1h', e); } try { // 16 × 15m → 1 × 4h const start = Date.parse('2024-01-01T00:00:00Z'); const fifteens: Candle[] = Array.from({ length: 16 }, (_, i) => ({ timestamp: start + i * FIFTEEN_MINUTES_MS, open: 100 + i, high: 100 + i + 5, low: 100 + i - 5, close: 100 + i + 1, volume: 1, })); const fourHour = aggregateCandles(fifteens, '4h'); assert.equal(fourHour.length, 1, '16 × 15m → 1 × 4h'); assert.equal(fourHour[0].open, 100, '4h.open = first 15m.open'); assert.equal(fourHour[0].close, 116, '4h.close = last 15m.close'); assert.equal(fourHour[0].volume, 16, '4h.volume = sum'); pass('aggregateCandles 15m→4h preserves OHLC + sums volume'); } catch (e) { fail('aggregateCandles 15m→4h', e); } // --------------------------------------------------------------------------- // Test 2 — computeSummary math: known inputs → known outputs // // The summary fields drive what users see (PnL, win rate, drawdown, sharpe). // Hand-computed expectations protect against accidental refactor regressions. try { const trades = [ { pnlUsd: 100 }, // win { pnlUsd: -50 }, // loss { pnlUsd: 150 }, // win { pnlUsd: -100 }, // loss { pnlUsd: 200 }, // win ]; const timeline: EquityPoint[] = [ { timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 }, { timestamp: 2, equityUsd: 10100, drawdownPct: 0, cashUsd: 10100, reservedUsd: 0 }, { timestamp: 3, equityUsd: 10050, drawdownPct: 0.50, cashUsd: 10050, reservedUsd: 0 }, { timestamp: 4, equityUsd: 10200, drawdownPct: 0, cashUsd: 10200, reservedUsd: 0 }, { timestamp: 5, equityUsd: 10100, drawdownPct: 0.98, cashUsd: 10100, reservedUsd: 0 }, { timestamp: 6, equityUsd: 10300, drawdownPct: 0, cashUsd: 10300, reservedUsd: 0 }, ]; const summary = computeSummary(trades, timeline, '15m'); assert.equal(summary.totalTrades, 5, 'totalTrades = trades.length'); assert.equal(summary.netPnlUsd, 300, 'netPnl = 100 + (-50) + 150 + (-100) + 200'); assert.equal(summary.winRate, 60, 'winRate = 3/5 × 100'); assert.equal(summary.maxDrawdownPct, 0.98, 'maxDrawdown = max of timeline drawdownPct'); assert.equal(typeof summary.sharpe, 'number', 'sharpe is a number (computed from returns)'); pass('computeSummary derives PnL/winRate/drawdown from trades + timeline'); } catch (e) { fail('computeSummary math', e); } try { // Edge: empty trades and timeline → all zeros, no NaN/Infinity const summary = computeSummary([], [], '15m'); assert.equal(summary.totalTrades, 0); assert.equal(summary.netPnlUsd, 0); assert.equal(summary.winRate, 0); assert.equal(summary.maxDrawdownPct, 0); assert.equal(summary.sharpe, 0); pass('computeSummary handles empty inputs (no NaN, all zeros)'); } catch (e) { fail('computeSummary empty inputs', e); } try { // Edge: sharpe with single timeline point should not divide-by-zero const single: EquityPoint[] = [ { timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 }, ]; assert.equal(computeSharpe(single, '15m'), 0, 'sharpe = 0 with < 2 points (no returns)'); pass('computeSharpe handles single-point timeline (returns 0, no divide-by-zero)'); } catch (e) { fail('computeSharpe single point', e); } // --------------------------------------------------------------------------- // Test 3 — withLogLevel isolation: temporary level swap is safe across throws // // Stage D guarantee: backtest runs lower the log level via withLogLevel() // during the run, but the level must be restored even if the wrapped fn // throws. Without restoration, a single failed backtest would silence the // rest of the process indefinitely. try { const initial = logger.level; const result = await withLogLevel('error', () => 42); assert.equal(result, 42, 'withLogLevel returns inner value'); assert.equal(logger.level, initial, 'level restored after success'); let caught: unknown = null; try { await withLogLevel('error', () => { throw new Error('boom'); }); } catch (e) { caught = e; } assert.equal((caught as Error)?.message, 'boom', 'inner throw propagates'); assert.equal(logger.level, initial, 'level restored after throw'); pass('withLogLevel restores logger.level after success and after throw'); } catch (e) { fail('withLogLevel isolation', e); } // --------------------------------------------------------------------------- // Test 4 — runBacktest determinism: same input → identical output // // This is the headline guarantee. If the engine ever becomes non-deterministic // (e.g. someone adds Math.random() or Date.now() inside a hot path), this test // will catch it. const originalFlag = config.ENABLE_BACKTEST; config.ENABLE_BACKTEST = true; try { const start = Date.parse('2024-01-01T00:00:00Z'); const candles = buildSyntheticCandles(start, 2000, (i) => { const base = 50000 + i * 0.5 + Math.sin(i / 50) * 800; return { open: base, high: base + 50, low: base - 50, close: base + 10 }; }); const request = buildBacktestRequest(candles); const r1 = await runBacktest(request); const r2 = await runBacktest(request); assert.equal(r1.trades.length, r2.trades.length, 'same trade count'); assert.equal(r1.summary.netPnlUsd, r2.summary.netPnlUsd, 'same netPnl'); assert.equal(r1.summary.maxDrawdownPct, r2.summary.maxDrawdownPct, 'same drawdown'); assert.equal(r1.summary.sharpe, r2.summary.sharpe, 'same sharpe'); assert.equal(JSON.stringify(r1), JSON.stringify(r2), 'byte-identical JSON serialization'); pass('runBacktest is deterministic across runs (byte-identical)'); } catch (e) { fail('determinism', e); } // --------------------------------------------------------------------------- // Test 5 — Flat-price tape produces zero trades // // Sanity check: a strategy can't trade itself into a position when nothing is // happening. If this ever fails, the engine is fabricating signals. try { const start = Date.parse('2024-01-01T00:00:00Z'); const flat = buildSyntheticCandles(start, 500, () => ({ open: 50000, high: 50000, low: 50000, close: 50000 })); const result = await runBacktest(buildBacktestRequest(flat)); assert.equal(result.trades.length, 0, 'flat tape → 0 trades'); assert.equal(result.summary.netPnlUsd, 0, 'flat tape → 0 PnL'); assert.equal(result.summary.maxDrawdownPct, 0, 'flat tape → 0 drawdown'); pass('flat-price tape produces zero trades and zero PnL'); } catch (e) { fail('flat tape', e); } // --------------------------------------------------------------------------- // Test 6 — BacktestResult shape contract // // Anyone wiring this into UI relies on the result shape. Lock down the // top-level keys so a refactor that drops/renames them is loud. try { const start = Date.parse('2024-01-01T00:00:00Z'); const candles = buildSyntheticCandles(start, 200, () => ({ open: 50000, high: 50100, low: 49900, close: 50050 })); const result = await runBacktest(buildBacktestRequest(candles)); const requiredKeys = ['mode', 'trades', 'summary', 'timeline', 'window', 'warmup', 'openPositionsAtEnd', 'assumptions']; for (const key of requiredKeys) { assert.ok(key in result, `result.${key} present`); } assert.equal(result.mode, 'backtest', 'result.mode = "backtest"'); assert.ok(Array.isArray(result.trades), 'result.trades is array'); assert.ok(Array.isArray(result.timeline), 'result.timeline is array'); assert.equal(typeof result.summary.netPnlUsd, 'number', 'summary.netPnlUsd is number'); assert.equal(result.window.timezone, 'UTC', 'window.timezone is UTC'); pass('runBacktest returns the documented BacktestResult shape'); } catch (e) { fail('result shape', e); } // --------------------------------------------------------------------------- // Test 7 — Empty / invalid window throws sensibly (no silent zero result) try { const start = Date.parse('2024-01-01T00:00:00Z'); const empty: Candle[] = []; let threw = false; try { await runBacktest(buildBacktestRequest(empty, { dateRange: { from: new Date(start).toISOString(), to: new Date(start + FIFTEEN_MINUTES_MS).toISOString(), }, })); } catch { threw = true; } assert.equal(threw, true, 'empty candle data throws (does not silently return 0 trades)'); pass('empty candle dataset throws explicit error'); } catch (e) { fail('empty data error', e); } // --------------------------------------------------------------------------- // Test 8a — skipGlobalFeatureFlagCheck honors per-user override // // When the route handler has already done a per-user check (Stage E2 in // docs/backtest/ENGINE_READINESS.md), runBacktest should bypass the global // ENABLE_BACKTEST guard so a per-user override of `backtestEnabled: true` // can let a specific user run backtests even when global is off. try { const previousGlobalFlag = config.ENABLE_BACKTEST; config.ENABLE_BACKTEST = false; const start = Date.parse('2024-01-01T00:00:00Z'); const candles = buildSyntheticCandles(start, 200, () => ({ open: 50000, high: 50100, low: 49900, close: 50050 })); const request = buildBacktestRequest(candles); // Without the override → assertion should fire let blockedAsExpected = false; try { await runBacktest(request); } catch (e) { blockedAsExpected = (e as Error).message.includes('disabled'); } assert.ok(blockedAsExpected, 'global gate blocks when ENABLE_BACKTEST=false'); // With override → bypasses the gate. Should at minimum not throw the // "feature is disabled" error (may still return 0 trades for synthetic data). let bypassWorked = false; try { const r = await runBacktest(request, { skipGlobalFeatureFlagCheck: true }); bypassWorked = typeof r.summary?.netPnlUsd === 'number'; } catch (e) { // Any error other than the feature-disabled one is acceptable here; // the point is the global guard didn't fire. bypassWorked = !(e as Error).message.includes('disabled'); } assert.ok(bypassWorked, 'skipGlobalFeatureFlagCheck bypasses ENABLE_BACKTEST guard'); config.ENABLE_BACKTEST = previousGlobalFlag; pass('skipGlobalFeatureFlagCheck enables per-user override semantics'); } catch (e) { fail('skipGlobalFeatureFlagCheck', e); } // --------------------------------------------------------------------------- // Test 8 — Alpaca data source plumbing (without hitting the network) // // Stage C added BacktestAlpacaSource. We can't call the real Alpaca API in a // test, but we can verify the type discriminator + dispatcher routing by // asserting that a malformed Alpaca request fails with the expected error. try { const previousKey = config.ALPACA_API_KEY; const previousSecret = config.ALPACA_API_SECRET; config.ALPACA_API_KEY = ''; config.ALPACA_API_SECRET = ''; let caught: unknown = null; try { await runBacktest({ mode: 'backtest', symbols: ['AAPL'], timeframe: '15m', dateRange: { from: '2024-01-01T00:00:00Z', to: '2024-01-31T00:00:00Z' }, dataSource: { type: 'alpaca', payload: { feed: 'iex' } }, execution: { initialCapitalUsd: 10000, orderType: 'market', slippageBps: 5, feeBps: 10, partialFillPct: 1, fillOnNextBar: true, intraCandlePolicy: 'ohlc_path', triggerTimeframe: '1m', forceCloseAtWindowEnd: false, }, strategyConfig: { enabled: true, symbol: 'AAPL', riskPerTrade: 0.02, maxPositions: 1 }, } as BacktestRequest); } catch (e) { caught = e; } config.ALPACA_API_KEY = previousKey; config.ALPACA_API_SECRET = previousSecret; assert.ok(caught instanceof Error, 'Alpaca source without credentials throws'); assert.match( (caught as Error).message, /ALPACA_API_KEY/i, 'error message names the missing env var' ); pass('alpaca data source dispatcher routes correctly + errors clearly'); } catch (e) { fail('alpaca dispatcher', e); } // --------------------------------------------------------------------------- config.ENABLE_BACKTEST = originalFlag; console.log('[backtest-engine] OK — passed:'); for (const name of passed) console.log(' ✓', name); console.log(`[backtest-engine] ${passed.length}/${passed.length} regression checks passed`);