test(backend): backtest engine regression suite (Stage B)
Adds testBacktestEngine.ts — the minimum viable test set called out in
docs/backtest/ENGINE_READINESS.md §3.1. Codifies behaviors verified
during the readiness audit so they don't silently regress.
Coverage (10 assertions, all passing):
Unit (testable building blocks):
- aggregateCandles 15m→1h: OHLC preserved, volume summed
- aggregateCandles 15m→4h: OHLC preserved, volume summed
- computeSummary: hand-computed PnL/winRate/drawdown match
- computeSummary: empty inputs → all zeros (no NaN/Infinity)
- computeSharpe: single-point timeline → 0 (no divide-by-zero)
- withLogLevel: level restored after success AND after throw
Integration (full runBacktest):
- Determinism: same input → byte-identical JSON output
- Flat-price tape → 0 trades, 0 PnL, 0 drawdown
- Result shape contract: all documented top-level keys present
- Empty candle dataset throws explicitly (no silent 0-trade result)
Conforms to the existing testXxx.ts convention used by the other
check:backtest-* scripts. Wired into package.json:
- "check:backtest-engine": "node --import tsx testBacktestEngine.ts"
- chained into the top-level "test" script
Generated with [Devin](https://cli.devin.ai/docs)
Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com>
This commit is contained in:
parent
3c3dce6b73
commit
35efa786bd
@ -5,7 +5,7 @@
|
|||||||
"description": "ByteLyst Trading backend and execution control service",
|
"description": "ByteLyst Trading backend and execution control service",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "npm run check:websocket-contract && npm run check:session-rule-normalization && npm run check:api-contract && npm run check:audit-repository && npm run check:market-data-endpoints && npm run check:chat-copilot-contract && npm run check:chat-copilot-fallbacks && npm run check:fmp-cache && npm run check:backtest-strategy-safety",
|
"test": "npm run check:websocket-contract && npm run check:session-rule-normalization && npm run check:api-contract && npm run check:audit-repository && npm run check:market-data-endpoints && npm run check:chat-copilot-contract && npm run check:chat-copilot-fallbacks && npm run check:fmp-cache && npm run check:backtest-strategy-safety && npm run check:backtest-engine",
|
||||||
"dev": "node --import tsx src/bootstrap.ts",
|
"dev": "node --import tsx src/bootstrap.ts",
|
||||||
"build": "tsc",
|
"build": "tsc",
|
||||||
"typecheck": "tsc --noEmit",
|
"typecheck": "tsc --noEmit",
|
||||||
@ -51,7 +51,8 @@
|
|||||||
"check": "npm run build && npm run lint && npm run format",
|
"check": "npm run build && npm run lint && npm run format",
|
||||||
"pre-deploy": "npm run check",
|
"pre-deploy": "npm run check",
|
||||||
"cleanup-stale-orders": "node --import tsx src/scripts/cleanupStaleOrders.ts",
|
"cleanup-stale-orders": "node --import tsx src/scripts/cleanupStaleOrders.ts",
|
||||||
"revert-expired-orders": "node --import tsx src/scripts/revertExpiredOrders.ts"
|
"revert-expired-orders": "node --import tsx src/scripts/revertExpiredOrders.ts",
|
||||||
|
"check:backtest-engine": "node --import tsx testBacktestEngine.ts"
|
||||||
},
|
},
|
||||||
"keywords": [],
|
"keywords": [],
|
||||||
"author": "",
|
"author": "",
|
||||||
|
|||||||
322
backend/testBacktestEngine.ts
Normal file
322
backend/testBacktestEngine.ts
Normal file
@ -0,0 +1,322 @@
|
|||||||
|
/**
|
||||||
|
* Backtest engine regression tests.
|
||||||
|
*
|
||||||
|
* Stage B of docs/backtest/ENGINE_READINESS.md §4. Implements the minimum
|
||||||
|
* viable test suite called out in §3.1: codifies determinism, summary math,
|
||||||
|
* timeframe aggregation, log-level isolation, and edge-case handling. These
|
||||||
|
* tests are NOT exhaustive; they're the smallest set that prevents silent
|
||||||
|
* regression of behaviors verified during the readiness audit.
|
||||||
|
*
|
||||||
|
* Convention: matches sibling `testXxx.ts` scripts (no vitest in this repo).
|
||||||
|
* Run via `npm run check:backtest-engine`.
|
||||||
|
*/
|
||||||
|
import assert from 'node:assert/strict';
|
||||||
|
import { config } from './src/config/index.js';
|
||||||
|
import { runBacktest } from './src/backtest/index.js';
|
||||||
|
import logger, { withLogLevel } from './src/utils/logger.js';
|
||||||
|
import { aggregateCandles } from './src/backtest/data/normalize.js';
|
||||||
|
import { computeSummary, computeSharpe } from './src/backtest/metrics/computeSummary.js';
|
||||||
|
import type { BacktestRequest, EquityPoint } from './src/backtest/types.js';
|
||||||
|
import type { Candle } from './src/connectors/types.js';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
|
||||||
|
const FIFTEEN_MINUTES_MS = 15 * 60 * 1000;
|
||||||
|
const ONE_HOUR_MS = 60 * 60 * 1000;
|
||||||
|
const FOUR_HOURS_MS = 4 * ONE_HOUR_MS;
|
||||||
|
|
||||||
|
const buildSyntheticCandles = (
|
||||||
|
startTs: number,
|
||||||
|
n: number,
|
||||||
|
pricer: (i: number) => { open: number; high: number; low: number; close: number }
|
||||||
|
): Candle[] => {
|
||||||
|
const out: Candle[] = [];
|
||||||
|
for (let i = 0; i < n; i++) {
|
||||||
|
const p = pricer(i);
|
||||||
|
out.push({
|
||||||
|
timestamp: startTs + i * FIFTEEN_MINUTES_MS,
|
||||||
|
open: p.open,
|
||||||
|
high: p.high,
|
||||||
|
low: p.low,
|
||||||
|
close: p.close,
|
||||||
|
volume: 1,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
};
|
||||||
|
|
||||||
|
const buildBacktestRequest = (
|
||||||
|
candles: Candle[],
|
||||||
|
overrides: Partial<BacktestRequest> = {}
|
||||||
|
): BacktestRequest => {
|
||||||
|
const fromTs = candles[0]?.timestamp ?? 0;
|
||||||
|
const toTs = (candles[candles.length - 1]?.timestamp ?? 0) + FIFTEEN_MINUTES_MS;
|
||||||
|
return {
|
||||||
|
mode: 'backtest',
|
||||||
|
symbols: ['BTC/USD'],
|
||||||
|
timeframe: '15m',
|
||||||
|
dateRange: {
|
||||||
|
from: new Date(fromTs).toISOString(),
|
||||||
|
to: new Date(toTs).toISOString(),
|
||||||
|
},
|
||||||
|
dataSource: {
|
||||||
|
type: 'json',
|
||||||
|
payload: {
|
||||||
|
candles: candles.map((c) => ({
|
||||||
|
symbol: 'BTC/USD',
|
||||||
|
timeframe: '15m',
|
||||||
|
timestamp: c.timestamp,
|
||||||
|
open: c.open,
|
||||||
|
high: c.high,
|
||||||
|
low: c.low,
|
||||||
|
close: c.close,
|
||||||
|
volume: c.volume,
|
||||||
|
})),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
execution: {
|
||||||
|
initialCapitalUsd: 10000,
|
||||||
|
orderType: 'market',
|
||||||
|
slippageBps: 5,
|
||||||
|
feeBps: 10,
|
||||||
|
partialFillPct: 1,
|
||||||
|
fillOnNextBar: true,
|
||||||
|
intraCandlePolicy: 'ohlc_path',
|
||||||
|
triggerTimeframe: '1m',
|
||||||
|
forceCloseAtWindowEnd: false,
|
||||||
|
},
|
||||||
|
strategyConfig: { enabled: true, symbol: 'BTC/USD', riskPerTrade: 0.02, maxPositions: 1 },
|
||||||
|
...overrides,
|
||||||
|
} as BacktestRequest;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Track which checks pass for a clean summary at the end
|
||||||
|
const passed: string[] = [];
|
||||||
|
const fail = (name: string, error: unknown): never => {
|
||||||
|
console.error(`\n[backtest-engine] FAIL: ${name}`);
|
||||||
|
console.error(error instanceof Error ? error.stack || error.message : error);
|
||||||
|
process.exit(1);
|
||||||
|
};
|
||||||
|
const pass = (name: string): void => {
|
||||||
|
passed.push(name);
|
||||||
|
};
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Test 1 — aggregateCandles produces correct OHLC across timeframes
|
||||||
|
//
|
||||||
|
// Critical invariant: the production data pipeline depends on this aggregator
|
||||||
|
// to derive 1h and 4h from 15m. If aggregation breaks, the strategy engine
|
||||||
|
// silently fails with "Insufficient data" warnings.
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 4 × 15m candles with monotonic prices to make the expected OHLC obvious
|
||||||
|
const start = Date.parse('2024-01-01T00:00:00Z');
|
||||||
|
const fifteens: Candle[] = [
|
||||||
|
{ timestamp: start + 0 * FIFTEEN_MINUTES_MS, open: 100, high: 110, low: 95, close: 105, volume: 1 },
|
||||||
|
{ timestamp: start + 1 * FIFTEEN_MINUTES_MS, open: 105, high: 115, low: 100, close: 112, volume: 2 },
|
||||||
|
{ timestamp: start + 2 * FIFTEEN_MINUTES_MS, open: 112, high: 120, low: 108, close: 118, volume: 3 },
|
||||||
|
{ timestamp: start + 3 * FIFTEEN_MINUTES_MS, open: 118, high: 125, low: 116, close: 122, volume: 4 },
|
||||||
|
];
|
||||||
|
const oneHour = aggregateCandles(fifteens, '1h');
|
||||||
|
assert.equal(oneHour.length, 1, '4 × 15m candles → exactly 1 × 1h candle');
|
||||||
|
const h = oneHour[0];
|
||||||
|
assert.equal(h.timestamp, start, '1h candle timestamp = first 15m timestamp');
|
||||||
|
assert.equal(h.open, 100, '1h.open = first 15m.open');
|
||||||
|
assert.equal(h.close, 122, '1h.close = last 15m.close');
|
||||||
|
assert.equal(h.high, 125, '1h.high = max of 15m highs');
|
||||||
|
assert.equal(h.low, 95, '1h.low = min of 15m lows');
|
||||||
|
assert.equal(h.volume, 10, '1h.volume = sum of 15m volumes (1+2+3+4)');
|
||||||
|
pass('aggregateCandles 15m→1h preserves OHLC + sums volume');
|
||||||
|
} catch (e) { fail('aggregateCandles 15m→1h', e); }
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 16 × 15m → 1 × 4h
|
||||||
|
const start = Date.parse('2024-01-01T00:00:00Z');
|
||||||
|
const fifteens: Candle[] = Array.from({ length: 16 }, (_, i) => ({
|
||||||
|
timestamp: start + i * FIFTEEN_MINUTES_MS,
|
||||||
|
open: 100 + i,
|
||||||
|
high: 100 + i + 5,
|
||||||
|
low: 100 + i - 5,
|
||||||
|
close: 100 + i + 1,
|
||||||
|
volume: 1,
|
||||||
|
}));
|
||||||
|
const fourHour = aggregateCandles(fifteens, '4h');
|
||||||
|
assert.equal(fourHour.length, 1, '16 × 15m → 1 × 4h');
|
||||||
|
assert.equal(fourHour[0].open, 100, '4h.open = first 15m.open');
|
||||||
|
assert.equal(fourHour[0].close, 116, '4h.close = last 15m.close');
|
||||||
|
assert.equal(fourHour[0].volume, 16, '4h.volume = sum');
|
||||||
|
pass('aggregateCandles 15m→4h preserves OHLC + sums volume');
|
||||||
|
} catch (e) { fail('aggregateCandles 15m→4h', e); }
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Test 2 — computeSummary math: known inputs → known outputs
|
||||||
|
//
|
||||||
|
// The summary fields drive what users see (PnL, win rate, drawdown, sharpe).
|
||||||
|
// Hand-computed expectations protect against accidental refactor regressions.
|
||||||
|
|
||||||
|
try {
|
||||||
|
const trades = [
|
||||||
|
{ pnlUsd: 100 }, // win
|
||||||
|
{ pnlUsd: -50 }, // loss
|
||||||
|
{ pnlUsd: 150 }, // win
|
||||||
|
{ pnlUsd: -100 }, // loss
|
||||||
|
{ pnlUsd: 200 }, // win
|
||||||
|
];
|
||||||
|
const timeline: EquityPoint[] = [
|
||||||
|
{ timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 },
|
||||||
|
{ timestamp: 2, equityUsd: 10100, drawdownPct: 0, cashUsd: 10100, reservedUsd: 0 },
|
||||||
|
{ timestamp: 3, equityUsd: 10050, drawdownPct: 0.50, cashUsd: 10050, reservedUsd: 0 },
|
||||||
|
{ timestamp: 4, equityUsd: 10200, drawdownPct: 0, cashUsd: 10200, reservedUsd: 0 },
|
||||||
|
{ timestamp: 5, equityUsd: 10100, drawdownPct: 0.98, cashUsd: 10100, reservedUsd: 0 },
|
||||||
|
{ timestamp: 6, equityUsd: 10300, drawdownPct: 0, cashUsd: 10300, reservedUsd: 0 },
|
||||||
|
];
|
||||||
|
const summary = computeSummary(trades, timeline, '15m');
|
||||||
|
assert.equal(summary.totalTrades, 5, 'totalTrades = trades.length');
|
||||||
|
assert.equal(summary.netPnlUsd, 300, 'netPnl = 100 + (-50) + 150 + (-100) + 200');
|
||||||
|
assert.equal(summary.winRate, 60, 'winRate = 3/5 × 100');
|
||||||
|
assert.equal(summary.maxDrawdownPct, 0.98, 'maxDrawdown = max of timeline drawdownPct');
|
||||||
|
assert.equal(typeof summary.sharpe, 'number', 'sharpe is a number (computed from returns)');
|
||||||
|
pass('computeSummary derives PnL/winRate/drawdown from trades + timeline');
|
||||||
|
} catch (e) { fail('computeSummary math', e); }
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Edge: empty trades and timeline → all zeros, no NaN/Infinity
|
||||||
|
const summary = computeSummary([], [], '15m');
|
||||||
|
assert.equal(summary.totalTrades, 0);
|
||||||
|
assert.equal(summary.netPnlUsd, 0);
|
||||||
|
assert.equal(summary.winRate, 0);
|
||||||
|
assert.equal(summary.maxDrawdownPct, 0);
|
||||||
|
assert.equal(summary.sharpe, 0);
|
||||||
|
pass('computeSummary handles empty inputs (no NaN, all zeros)');
|
||||||
|
} catch (e) { fail('computeSummary empty inputs', e); }
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Edge: sharpe with single timeline point should not divide-by-zero
|
||||||
|
const single: EquityPoint[] = [
|
||||||
|
{ timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 },
|
||||||
|
];
|
||||||
|
assert.equal(computeSharpe(single, '15m'), 0, 'sharpe = 0 with < 2 points (no returns)');
|
||||||
|
pass('computeSharpe handles single-point timeline (returns 0, no divide-by-zero)');
|
||||||
|
} catch (e) { fail('computeSharpe single point', e); }
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Test 3 — withLogLevel isolation: temporary level swap is safe across throws
|
||||||
|
//
|
||||||
|
// Stage D guarantee: backtest runs lower the log level via withLogLevel()
|
||||||
|
// during the run, but the level must be restored even if the wrapped fn
|
||||||
|
// throws. Without restoration, a single failed backtest would silence the
|
||||||
|
// rest of the process indefinitely.
|
||||||
|
|
||||||
|
try {
|
||||||
|
const initial = logger.level;
|
||||||
|
const result = await withLogLevel('error', () => 42);
|
||||||
|
assert.equal(result, 42, 'withLogLevel returns inner value');
|
||||||
|
assert.equal(logger.level, initial, 'level restored after success');
|
||||||
|
|
||||||
|
let caught: unknown = null;
|
||||||
|
try {
|
||||||
|
await withLogLevel('error', () => { throw new Error('boom'); });
|
||||||
|
} catch (e) {
|
||||||
|
caught = e;
|
||||||
|
}
|
||||||
|
assert.equal((caught as Error)?.message, 'boom', 'inner throw propagates');
|
||||||
|
assert.equal(logger.level, initial, 'level restored after throw');
|
||||||
|
pass('withLogLevel restores logger.level after success and after throw');
|
||||||
|
} catch (e) { fail('withLogLevel isolation', e); }
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Test 4 — runBacktest determinism: same input → identical output
|
||||||
|
//
|
||||||
|
// This is the headline guarantee. If the engine ever becomes non-deterministic
|
||||||
|
// (e.g. someone adds Math.random() or Date.now() inside a hot path), this test
|
||||||
|
// will catch it.
|
||||||
|
|
||||||
|
const originalFlag = config.ENABLE_BACKTEST;
|
||||||
|
config.ENABLE_BACKTEST = true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const start = Date.parse('2024-01-01T00:00:00Z');
|
||||||
|
const candles = buildSyntheticCandles(start, 2000, (i) => {
|
||||||
|
const base = 50000 + i * 0.5 + Math.sin(i / 50) * 800;
|
||||||
|
return { open: base, high: base + 50, low: base - 50, close: base + 10 };
|
||||||
|
});
|
||||||
|
const request = buildBacktestRequest(candles);
|
||||||
|
|
||||||
|
const r1 = await runBacktest(request);
|
||||||
|
const r2 = await runBacktest(request);
|
||||||
|
|
||||||
|
assert.equal(r1.trades.length, r2.trades.length, 'same trade count');
|
||||||
|
assert.equal(r1.summary.netPnlUsd, r2.summary.netPnlUsd, 'same netPnl');
|
||||||
|
assert.equal(r1.summary.maxDrawdownPct, r2.summary.maxDrawdownPct, 'same drawdown');
|
||||||
|
assert.equal(r1.summary.sharpe, r2.summary.sharpe, 'same sharpe');
|
||||||
|
assert.equal(JSON.stringify(r1), JSON.stringify(r2), 'byte-identical JSON serialization');
|
||||||
|
pass('runBacktest is deterministic across runs (byte-identical)');
|
||||||
|
} catch (e) { fail('determinism', e); }
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Test 5 — Flat-price tape produces zero trades
|
||||||
|
//
|
||||||
|
// Sanity check: a strategy can't trade itself into a position when nothing is
|
||||||
|
// happening. If this ever fails, the engine is fabricating signals.
|
||||||
|
|
||||||
|
try {
|
||||||
|
const start = Date.parse('2024-01-01T00:00:00Z');
|
||||||
|
const flat = buildSyntheticCandles(start, 500, () => ({ open: 50000, high: 50000, low: 50000, close: 50000 }));
|
||||||
|
const result = await runBacktest(buildBacktestRequest(flat));
|
||||||
|
assert.equal(result.trades.length, 0, 'flat tape → 0 trades');
|
||||||
|
assert.equal(result.summary.netPnlUsd, 0, 'flat tape → 0 PnL');
|
||||||
|
assert.equal(result.summary.maxDrawdownPct, 0, 'flat tape → 0 drawdown');
|
||||||
|
pass('flat-price tape produces zero trades and zero PnL');
|
||||||
|
} catch (e) { fail('flat tape', e); }
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Test 6 — BacktestResult shape contract
|
||||||
|
//
|
||||||
|
// Anyone wiring this into UI relies on the result shape. Lock down the
|
||||||
|
// top-level keys so a refactor that drops/renames them is loud.
|
||||||
|
|
||||||
|
try {
|
||||||
|
const start = Date.parse('2024-01-01T00:00:00Z');
|
||||||
|
const candles = buildSyntheticCandles(start, 200, () => ({ open: 50000, high: 50100, low: 49900, close: 50050 }));
|
||||||
|
const result = await runBacktest(buildBacktestRequest(candles));
|
||||||
|
const requiredKeys = ['mode', 'trades', 'summary', 'timeline', 'window', 'warmup', 'openPositionsAtEnd', 'assumptions'];
|
||||||
|
for (const key of requiredKeys) {
|
||||||
|
assert.ok(key in result, `result.${key} present`);
|
||||||
|
}
|
||||||
|
assert.equal(result.mode, 'backtest', 'result.mode = "backtest"');
|
||||||
|
assert.ok(Array.isArray(result.trades), 'result.trades is array');
|
||||||
|
assert.ok(Array.isArray(result.timeline), 'result.timeline is array');
|
||||||
|
assert.equal(typeof result.summary.netPnlUsd, 'number', 'summary.netPnlUsd is number');
|
||||||
|
assert.equal(result.window.timezone, 'UTC', 'window.timezone is UTC');
|
||||||
|
pass('runBacktest returns the documented BacktestResult shape');
|
||||||
|
} catch (e) { fail('result shape', e); }
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Test 7 — Empty / invalid window throws sensibly (no silent zero result)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const start = Date.parse('2024-01-01T00:00:00Z');
|
||||||
|
const empty: Candle[] = [];
|
||||||
|
let threw = false;
|
||||||
|
try {
|
||||||
|
await runBacktest(buildBacktestRequest(empty, {
|
||||||
|
dateRange: {
|
||||||
|
from: new Date(start).toISOString(),
|
||||||
|
to: new Date(start + FIFTEEN_MINUTES_MS).toISOString(),
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
} catch {
|
||||||
|
threw = true;
|
||||||
|
}
|
||||||
|
assert.equal(threw, true, 'empty candle data throws (does not silently return 0 trades)');
|
||||||
|
pass('empty candle dataset throws explicit error');
|
||||||
|
} catch (e) { fail('empty data error', e); }
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
config.ENABLE_BACKTEST = originalFlag;
|
||||||
|
|
||||||
|
console.log('[backtest-engine] OK — passed:');
|
||||||
|
for (const name of passed) console.log(' ✓', name);
|
||||||
|
console.log(`[backtest-engine] ${passed.length}/${passed.length} regression checks passed`);
|
||||||
Loading…
Reference in New Issue
Block a user