learning_ai_invt_trdg/backend/testBacktestEngine.ts
Devin 35efa786bd test(backend): backtest engine regression suite (Stage B)
Adds testBacktestEngine.ts — the minimum viable test set called out in
docs/backtest/ENGINE_READINESS.md §3.1. Codifies behaviors verified
during the readiness audit so they don't silently regress.

Coverage (10 assertions, all passing):
  Unit (testable building blocks):
    - aggregateCandles 15m→1h: OHLC preserved, volume summed
    - aggregateCandles 15m→4h: OHLC preserved, volume summed
    - computeSummary: hand-computed PnL/winRate/drawdown match
    - computeSummary: empty inputs → all zeros (no NaN/Infinity)
    - computeSharpe: single-point timeline → 0 (no divide-by-zero)
    - withLogLevel: level restored after success AND after throw

  Integration (full runBacktest):
    - Determinism: same input → byte-identical JSON output
    - Flat-price tape → 0 trades, 0 PnL, 0 drawdown
    - Result shape contract: all documented top-level keys present
    - Empty candle dataset throws explicitly (no silent 0-trade result)

Conforms to the existing testXxx.ts convention used by the other
check:backtest-* scripts. Wired into package.json:
  - "check:backtest-engine": "node --import tsx testBacktestEngine.ts"
  - chained into the top-level "test" script

Generated with [Devin](https://cli.devin.ai/docs)

Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com>
2026-05-10 10:46:45 +00:00

323 lines
14 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Backtest engine regression tests.
*
* Stage B of docs/backtest/ENGINE_READINESS.md §4. Implements the minimum
* viable test suite called out in §3.1: codifies determinism, summary math,
* timeframe aggregation, log-level isolation, and edge-case handling. These
* tests are NOT exhaustive; they're the smallest set that prevents silent
* regression of behaviors verified during the readiness audit.
*
* Convention: matches sibling `testXxx.ts` scripts (no vitest in this repo).
* Run via `npm run check:backtest-engine`.
*/
import assert from 'node:assert/strict';
import { config } from './src/config/index.js';
import { runBacktest } from './src/backtest/index.js';
import logger, { withLogLevel } from './src/utils/logger.js';
import { aggregateCandles } from './src/backtest/data/normalize.js';
import { computeSummary, computeSharpe } from './src/backtest/metrics/computeSummary.js';
import type { BacktestRequest, EquityPoint } from './src/backtest/types.js';
import type { Candle } from './src/connectors/types.js';
// ---------------------------------------------------------------------------
// Helpers
const FIFTEEN_MINUTES_MS = 15 * 60 * 1000;
const ONE_HOUR_MS = 60 * 60 * 1000;
const FOUR_HOURS_MS = 4 * ONE_HOUR_MS;
const buildSyntheticCandles = (
startTs: number,
n: number,
pricer: (i: number) => { open: number; high: number; low: number; close: number }
): Candle[] => {
const out: Candle[] = [];
for (let i = 0; i < n; i++) {
const p = pricer(i);
out.push({
timestamp: startTs + i * FIFTEEN_MINUTES_MS,
open: p.open,
high: p.high,
low: p.low,
close: p.close,
volume: 1,
});
}
return out;
};
const buildBacktestRequest = (
candles: Candle[],
overrides: Partial<BacktestRequest> = {}
): BacktestRequest => {
const fromTs = candles[0]?.timestamp ?? 0;
const toTs = (candles[candles.length - 1]?.timestamp ?? 0) + FIFTEEN_MINUTES_MS;
return {
mode: 'backtest',
symbols: ['BTC/USD'],
timeframe: '15m',
dateRange: {
from: new Date(fromTs).toISOString(),
to: new Date(toTs).toISOString(),
},
dataSource: {
type: 'json',
payload: {
candles: candles.map((c) => ({
symbol: 'BTC/USD',
timeframe: '15m',
timestamp: c.timestamp,
open: c.open,
high: c.high,
low: c.low,
close: c.close,
volume: c.volume,
})),
},
},
execution: {
initialCapitalUsd: 10000,
orderType: 'market',
slippageBps: 5,
feeBps: 10,
partialFillPct: 1,
fillOnNextBar: true,
intraCandlePolicy: 'ohlc_path',
triggerTimeframe: '1m',
forceCloseAtWindowEnd: false,
},
strategyConfig: { enabled: true, symbol: 'BTC/USD', riskPerTrade: 0.02, maxPositions: 1 },
...overrides,
} as BacktestRequest;
};
// Track which checks pass for a clean summary at the end
const passed: string[] = [];
const fail = (name: string, error: unknown): never => {
console.error(`\n[backtest-engine] FAIL: ${name}`);
console.error(error instanceof Error ? error.stack || error.message : error);
process.exit(1);
};
const pass = (name: string): void => {
passed.push(name);
};
// ---------------------------------------------------------------------------
// Test 1 — aggregateCandles produces correct OHLC across timeframes
//
// Critical invariant: the production data pipeline depends on this aggregator
// to derive 1h and 4h from 15m. If aggregation breaks, the strategy engine
// silently fails with "Insufficient data" warnings.
try {
// 4 × 15m candles with monotonic prices to make the expected OHLC obvious
const start = Date.parse('2024-01-01T00:00:00Z');
const fifteens: Candle[] = [
{ timestamp: start + 0 * FIFTEEN_MINUTES_MS, open: 100, high: 110, low: 95, close: 105, volume: 1 },
{ timestamp: start + 1 * FIFTEEN_MINUTES_MS, open: 105, high: 115, low: 100, close: 112, volume: 2 },
{ timestamp: start + 2 * FIFTEEN_MINUTES_MS, open: 112, high: 120, low: 108, close: 118, volume: 3 },
{ timestamp: start + 3 * FIFTEEN_MINUTES_MS, open: 118, high: 125, low: 116, close: 122, volume: 4 },
];
const oneHour = aggregateCandles(fifteens, '1h');
assert.equal(oneHour.length, 1, '4 × 15m candles → exactly 1 × 1h candle');
const h = oneHour[0];
assert.equal(h.timestamp, start, '1h candle timestamp = first 15m timestamp');
assert.equal(h.open, 100, '1h.open = first 15m.open');
assert.equal(h.close, 122, '1h.close = last 15m.close');
assert.equal(h.high, 125, '1h.high = max of 15m highs');
assert.equal(h.low, 95, '1h.low = min of 15m lows');
assert.equal(h.volume, 10, '1h.volume = sum of 15m volumes (1+2+3+4)');
pass('aggregateCandles 15m→1h preserves OHLC + sums volume');
} catch (e) { fail('aggregateCandles 15m→1h', e); }
try {
// 16 × 15m → 1 × 4h
const start = Date.parse('2024-01-01T00:00:00Z');
const fifteens: Candle[] = Array.from({ length: 16 }, (_, i) => ({
timestamp: start + i * FIFTEEN_MINUTES_MS,
open: 100 + i,
high: 100 + i + 5,
low: 100 + i - 5,
close: 100 + i + 1,
volume: 1,
}));
const fourHour = aggregateCandles(fifteens, '4h');
assert.equal(fourHour.length, 1, '16 × 15m → 1 × 4h');
assert.equal(fourHour[0].open, 100, '4h.open = first 15m.open');
assert.equal(fourHour[0].close, 116, '4h.close = last 15m.close');
assert.equal(fourHour[0].volume, 16, '4h.volume = sum');
pass('aggregateCandles 15m→4h preserves OHLC + sums volume');
} catch (e) { fail('aggregateCandles 15m→4h', e); }
// ---------------------------------------------------------------------------
// Test 2 — computeSummary math: known inputs → known outputs
//
// The summary fields drive what users see (PnL, win rate, drawdown, sharpe).
// Hand-computed expectations protect against accidental refactor regressions.
try {
const trades = [
{ pnlUsd: 100 }, // win
{ pnlUsd: -50 }, // loss
{ pnlUsd: 150 }, // win
{ pnlUsd: -100 }, // loss
{ pnlUsd: 200 }, // win
];
const timeline: EquityPoint[] = [
{ timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 },
{ timestamp: 2, equityUsd: 10100, drawdownPct: 0, cashUsd: 10100, reservedUsd: 0 },
{ timestamp: 3, equityUsd: 10050, drawdownPct: 0.50, cashUsd: 10050, reservedUsd: 0 },
{ timestamp: 4, equityUsd: 10200, drawdownPct: 0, cashUsd: 10200, reservedUsd: 0 },
{ timestamp: 5, equityUsd: 10100, drawdownPct: 0.98, cashUsd: 10100, reservedUsd: 0 },
{ timestamp: 6, equityUsd: 10300, drawdownPct: 0, cashUsd: 10300, reservedUsd: 0 },
];
const summary = computeSummary(trades, timeline, '15m');
assert.equal(summary.totalTrades, 5, 'totalTrades = trades.length');
assert.equal(summary.netPnlUsd, 300, 'netPnl = 100 + (-50) + 150 + (-100) + 200');
assert.equal(summary.winRate, 60, 'winRate = 3/5 × 100');
assert.equal(summary.maxDrawdownPct, 0.98, 'maxDrawdown = max of timeline drawdownPct');
assert.equal(typeof summary.sharpe, 'number', 'sharpe is a number (computed from returns)');
pass('computeSummary derives PnL/winRate/drawdown from trades + timeline');
} catch (e) { fail('computeSummary math', e); }
try {
// Edge: empty trades and timeline → all zeros, no NaN/Infinity
const summary = computeSummary([], [], '15m');
assert.equal(summary.totalTrades, 0);
assert.equal(summary.netPnlUsd, 0);
assert.equal(summary.winRate, 0);
assert.equal(summary.maxDrawdownPct, 0);
assert.equal(summary.sharpe, 0);
pass('computeSummary handles empty inputs (no NaN, all zeros)');
} catch (e) { fail('computeSummary empty inputs', e); }
try {
// Edge: sharpe with single timeline point should not divide-by-zero
const single: EquityPoint[] = [
{ timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 },
];
assert.equal(computeSharpe(single, '15m'), 0, 'sharpe = 0 with < 2 points (no returns)');
pass('computeSharpe handles single-point timeline (returns 0, no divide-by-zero)');
} catch (e) { fail('computeSharpe single point', e); }
// ---------------------------------------------------------------------------
// Test 3 — withLogLevel isolation: temporary level swap is safe across throws
//
// Stage D guarantee: backtest runs lower the log level via withLogLevel()
// during the run, but the level must be restored even if the wrapped fn
// throws. Without restoration, a single failed backtest would silence the
// rest of the process indefinitely.
try {
const initial = logger.level;
const result = await withLogLevel('error', () => 42);
assert.equal(result, 42, 'withLogLevel returns inner value');
assert.equal(logger.level, initial, 'level restored after success');
let caught: unknown = null;
try {
await withLogLevel('error', () => { throw new Error('boom'); });
} catch (e) {
caught = e;
}
assert.equal((caught as Error)?.message, 'boom', 'inner throw propagates');
assert.equal(logger.level, initial, 'level restored after throw');
pass('withLogLevel restores logger.level after success and after throw');
} catch (e) { fail('withLogLevel isolation', e); }
// ---------------------------------------------------------------------------
// Test 4 — runBacktest determinism: same input → identical output
//
// This is the headline guarantee. If the engine ever becomes non-deterministic
// (e.g. someone adds Math.random() or Date.now() inside a hot path), this test
// will catch it.
const originalFlag = config.ENABLE_BACKTEST;
config.ENABLE_BACKTEST = true;
try {
const start = Date.parse('2024-01-01T00:00:00Z');
const candles = buildSyntheticCandles(start, 2000, (i) => {
const base = 50000 + i * 0.5 + Math.sin(i / 50) * 800;
return { open: base, high: base + 50, low: base - 50, close: base + 10 };
});
const request = buildBacktestRequest(candles);
const r1 = await runBacktest(request);
const r2 = await runBacktest(request);
assert.equal(r1.trades.length, r2.trades.length, 'same trade count');
assert.equal(r1.summary.netPnlUsd, r2.summary.netPnlUsd, 'same netPnl');
assert.equal(r1.summary.maxDrawdownPct, r2.summary.maxDrawdownPct, 'same drawdown');
assert.equal(r1.summary.sharpe, r2.summary.sharpe, 'same sharpe');
assert.equal(JSON.stringify(r1), JSON.stringify(r2), 'byte-identical JSON serialization');
pass('runBacktest is deterministic across runs (byte-identical)');
} catch (e) { fail('determinism', e); }
// ---------------------------------------------------------------------------
// Test 5 — Flat-price tape produces zero trades
//
// Sanity check: a strategy can't trade itself into a position when nothing is
// happening. If this ever fails, the engine is fabricating signals.
try {
const start = Date.parse('2024-01-01T00:00:00Z');
const flat = buildSyntheticCandles(start, 500, () => ({ open: 50000, high: 50000, low: 50000, close: 50000 }));
const result = await runBacktest(buildBacktestRequest(flat));
assert.equal(result.trades.length, 0, 'flat tape → 0 trades');
assert.equal(result.summary.netPnlUsd, 0, 'flat tape → 0 PnL');
assert.equal(result.summary.maxDrawdownPct, 0, 'flat tape → 0 drawdown');
pass('flat-price tape produces zero trades and zero PnL');
} catch (e) { fail('flat tape', e); }
// ---------------------------------------------------------------------------
// Test 6 — BacktestResult shape contract
//
// Anyone wiring this into UI relies on the result shape. Lock down the
// top-level keys so a refactor that drops/renames them is loud.
try {
const start = Date.parse('2024-01-01T00:00:00Z');
const candles = buildSyntheticCandles(start, 200, () => ({ open: 50000, high: 50100, low: 49900, close: 50050 }));
const result = await runBacktest(buildBacktestRequest(candles));
const requiredKeys = ['mode', 'trades', 'summary', 'timeline', 'window', 'warmup', 'openPositionsAtEnd', 'assumptions'];
for (const key of requiredKeys) {
assert.ok(key in result, `result.${key} present`);
}
assert.equal(result.mode, 'backtest', 'result.mode = "backtest"');
assert.ok(Array.isArray(result.trades), 'result.trades is array');
assert.ok(Array.isArray(result.timeline), 'result.timeline is array');
assert.equal(typeof result.summary.netPnlUsd, 'number', 'summary.netPnlUsd is number');
assert.equal(result.window.timezone, 'UTC', 'window.timezone is UTC');
pass('runBacktest returns the documented BacktestResult shape');
} catch (e) { fail('result shape', e); }
// ---------------------------------------------------------------------------
// Test 7 — Empty / invalid window throws sensibly (no silent zero result)
try {
const start = Date.parse('2024-01-01T00:00:00Z');
const empty: Candle[] = [];
let threw = false;
try {
await runBacktest(buildBacktestRequest(empty, {
dateRange: {
from: new Date(start).toISOString(),
to: new Date(start + FIFTEEN_MINUTES_MS).toISOString(),
},
}));
} catch {
threw = true;
}
assert.equal(threw, true, 'empty candle data throws (does not silently return 0 trades)');
pass('empty candle dataset throws explicit error');
} catch (e) { fail('empty data error', e); }
// ---------------------------------------------------------------------------
config.ENABLE_BACKTEST = originalFlag;
console.log('[backtest-engine] OK — passed:');
for (const name of passed) console.log(' ✓', name);
console.log(`[backtest-engine] ${passed.length}/${passed.length} regression checks passed`);