learning_ai_invt_trdg/backend/testBacktestEngine.ts
Devin 4456873ab4 feat(backtest): Alpaca historical data source for equities (Stage C)
Adds BacktestAlpacaSource so saved trade plans for US equities can be
backtested without manual CSV upload. Mirrors the existing Kraken
loader pattern.

Backend:
  + backend/src/backtest/data/alpacaLoader.ts
      loadDatasetFromAlpaca({ symbols, fromTs, toTs, feed, adjustment })
      - Uses the existing @alpacahq/alpaca-trade-api SDK
      - Fetches 15Min bars; normalize.ts aggregates 1h/4h
      - 50-day warm-up lookback so ProEngine has enough EMA/RSI history
      - Throws cleanly with config guidance if ALPACA_API_KEY missing
      - In-memory cache keyed by (symbol, window, feed, adjustment)
  ~ backend/src/backtest/types.ts
      + BacktestAlpacaSource interface
      + 'alpaca' added to BacktestDataSource and BacktestDataSourceType
  ~ backend/src/backtest/data/loadHistoricalData.ts
      Wires 'alpaca' source into the dispatcher

Frontend:
  ~ web/src/backtest/types.ts — adds 'alpaca' to BacktestDataSourceType
  ~ web/src/backtest/components/BacktestConfigurator.tsx
      + 'alpaca' as a SourceType option
      + AUTO_FETCH_SOURCES list — kraken AND alpaca skip the upload-required
        validation
      + 'Alpaca (US equities)' option in the source-picker dropdown
      + Source-picker change handler seeds default IEX/raw Alpaca payload

Tests:
  + testBacktestEngine.ts: new "alpaca data source dispatcher" assertion
    Verifies the type discriminator + error message without hitting
    the network. 11/11 regression checks pass.

Caveats (documented in alpacaLoader inline + ENGINE_READINESS.md §3.4):
  - Free IEX feed has limited symbol coverage (~2016+)
  - SIP feed (paid) needed for full pre-2017 + full-market historical
  - The loader graceful-fails when credentials aren't configured
  - Existing Alpaca live-trading connector unchanged — backtest uses
    its own SDK instance with a different fetch path

Generated with [Devin](https://cli.devin.ai/docs)

Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com>
2026-05-10 10:59:06 +00:00

365 lines
16 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Backtest engine regression tests.
*
* Stage B of docs/backtest/ENGINE_READINESS.md §4. Implements the minimum
* viable test suite called out in §3.1: codifies determinism, summary math,
* timeframe aggregation, log-level isolation, and edge-case handling. These
* tests are NOT exhaustive; they're the smallest set that prevents silent
* regression of behaviors verified during the readiness audit.
*
* Convention: matches sibling `testXxx.ts` scripts (no vitest in this repo).
* Run via `npm run check:backtest-engine`.
*/
import assert from 'node:assert/strict';
import { config } from './src/config/index.js';
import { runBacktest } from './src/backtest/index.js';
import logger, { withLogLevel } from './src/utils/logger.js';
import { aggregateCandles } from './src/backtest/data/normalize.js';
import { computeSummary, computeSharpe } from './src/backtest/metrics/computeSummary.js';
import type { BacktestRequest, EquityPoint } from './src/backtest/types.js';
import type { Candle } from './src/connectors/types.js';
// ---------------------------------------------------------------------------
// Helpers
const FIFTEEN_MINUTES_MS = 15 * 60 * 1000;
const ONE_HOUR_MS = 60 * 60 * 1000;
const FOUR_HOURS_MS = 4 * ONE_HOUR_MS;
const buildSyntheticCandles = (
startTs: number,
n: number,
pricer: (i: number) => { open: number; high: number; low: number; close: number }
): Candle[] => {
const out: Candle[] = [];
for (let i = 0; i < n; i++) {
const p = pricer(i);
out.push({
timestamp: startTs + i * FIFTEEN_MINUTES_MS,
open: p.open,
high: p.high,
low: p.low,
close: p.close,
volume: 1,
});
}
return out;
};
const buildBacktestRequest = (
candles: Candle[],
overrides: Partial<BacktestRequest> = {}
): BacktestRequest => {
const fromTs = candles[0]?.timestamp ?? 0;
const toTs = (candles[candles.length - 1]?.timestamp ?? 0) + FIFTEEN_MINUTES_MS;
return {
mode: 'backtest',
symbols: ['BTC/USD'],
timeframe: '15m',
dateRange: {
from: new Date(fromTs).toISOString(),
to: new Date(toTs).toISOString(),
},
dataSource: {
type: 'json',
payload: {
candles: candles.map((c) => ({
symbol: 'BTC/USD',
timeframe: '15m',
timestamp: c.timestamp,
open: c.open,
high: c.high,
low: c.low,
close: c.close,
volume: c.volume,
})),
},
},
execution: {
initialCapitalUsd: 10000,
orderType: 'market',
slippageBps: 5,
feeBps: 10,
partialFillPct: 1,
fillOnNextBar: true,
intraCandlePolicy: 'ohlc_path',
triggerTimeframe: '1m',
forceCloseAtWindowEnd: false,
},
strategyConfig: { enabled: true, symbol: 'BTC/USD', riskPerTrade: 0.02, maxPositions: 1 },
...overrides,
} as BacktestRequest;
};
// Track which checks pass for a clean summary at the end
const passed: string[] = [];
const fail = (name: string, error: unknown): never => {
console.error(`\n[backtest-engine] FAIL: ${name}`);
console.error(error instanceof Error ? error.stack || error.message : error);
process.exit(1);
};
const pass = (name: string): void => {
passed.push(name);
};
// ---------------------------------------------------------------------------
// Test 1 — aggregateCandles produces correct OHLC across timeframes
//
// Critical invariant: the production data pipeline depends on this aggregator
// to derive 1h and 4h from 15m. If aggregation breaks, the strategy engine
// silently fails with "Insufficient data" warnings.
try {
// 4 × 15m candles with monotonic prices to make the expected OHLC obvious
const start = Date.parse('2024-01-01T00:00:00Z');
const fifteens: Candle[] = [
{ timestamp: start + 0 * FIFTEEN_MINUTES_MS, open: 100, high: 110, low: 95, close: 105, volume: 1 },
{ timestamp: start + 1 * FIFTEEN_MINUTES_MS, open: 105, high: 115, low: 100, close: 112, volume: 2 },
{ timestamp: start + 2 * FIFTEEN_MINUTES_MS, open: 112, high: 120, low: 108, close: 118, volume: 3 },
{ timestamp: start + 3 * FIFTEEN_MINUTES_MS, open: 118, high: 125, low: 116, close: 122, volume: 4 },
];
const oneHour = aggregateCandles(fifteens, '1h');
assert.equal(oneHour.length, 1, '4 × 15m candles → exactly 1 × 1h candle');
const h = oneHour[0];
assert.equal(h.timestamp, start, '1h candle timestamp = first 15m timestamp');
assert.equal(h.open, 100, '1h.open = first 15m.open');
assert.equal(h.close, 122, '1h.close = last 15m.close');
assert.equal(h.high, 125, '1h.high = max of 15m highs');
assert.equal(h.low, 95, '1h.low = min of 15m lows');
assert.equal(h.volume, 10, '1h.volume = sum of 15m volumes (1+2+3+4)');
pass('aggregateCandles 15m→1h preserves OHLC + sums volume');
} catch (e) { fail('aggregateCandles 15m→1h', e); }
try {
// 16 × 15m → 1 × 4h
const start = Date.parse('2024-01-01T00:00:00Z');
const fifteens: Candle[] = Array.from({ length: 16 }, (_, i) => ({
timestamp: start + i * FIFTEEN_MINUTES_MS,
open: 100 + i,
high: 100 + i + 5,
low: 100 + i - 5,
close: 100 + i + 1,
volume: 1,
}));
const fourHour = aggregateCandles(fifteens, '4h');
assert.equal(fourHour.length, 1, '16 × 15m → 1 × 4h');
assert.equal(fourHour[0].open, 100, '4h.open = first 15m.open');
assert.equal(fourHour[0].close, 116, '4h.close = last 15m.close');
assert.equal(fourHour[0].volume, 16, '4h.volume = sum');
pass('aggregateCandles 15m→4h preserves OHLC + sums volume');
} catch (e) { fail('aggregateCandles 15m→4h', e); }
// ---------------------------------------------------------------------------
// Test 2 — computeSummary math: known inputs → known outputs
//
// The summary fields drive what users see (PnL, win rate, drawdown, sharpe).
// Hand-computed expectations protect against accidental refactor regressions.
try {
const trades = [
{ pnlUsd: 100 }, // win
{ pnlUsd: -50 }, // loss
{ pnlUsd: 150 }, // win
{ pnlUsd: -100 }, // loss
{ pnlUsd: 200 }, // win
];
const timeline: EquityPoint[] = [
{ timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 },
{ timestamp: 2, equityUsd: 10100, drawdownPct: 0, cashUsd: 10100, reservedUsd: 0 },
{ timestamp: 3, equityUsd: 10050, drawdownPct: 0.50, cashUsd: 10050, reservedUsd: 0 },
{ timestamp: 4, equityUsd: 10200, drawdownPct: 0, cashUsd: 10200, reservedUsd: 0 },
{ timestamp: 5, equityUsd: 10100, drawdownPct: 0.98, cashUsd: 10100, reservedUsd: 0 },
{ timestamp: 6, equityUsd: 10300, drawdownPct: 0, cashUsd: 10300, reservedUsd: 0 },
];
const summary = computeSummary(trades, timeline, '15m');
assert.equal(summary.totalTrades, 5, 'totalTrades = trades.length');
assert.equal(summary.netPnlUsd, 300, 'netPnl = 100 + (-50) + 150 + (-100) + 200');
assert.equal(summary.winRate, 60, 'winRate = 3/5 × 100');
assert.equal(summary.maxDrawdownPct, 0.98, 'maxDrawdown = max of timeline drawdownPct');
assert.equal(typeof summary.sharpe, 'number', 'sharpe is a number (computed from returns)');
pass('computeSummary derives PnL/winRate/drawdown from trades + timeline');
} catch (e) { fail('computeSummary math', e); }
try {
// Edge: empty trades and timeline → all zeros, no NaN/Infinity
const summary = computeSummary([], [], '15m');
assert.equal(summary.totalTrades, 0);
assert.equal(summary.netPnlUsd, 0);
assert.equal(summary.winRate, 0);
assert.equal(summary.maxDrawdownPct, 0);
assert.equal(summary.sharpe, 0);
pass('computeSummary handles empty inputs (no NaN, all zeros)');
} catch (e) { fail('computeSummary empty inputs', e); }
try {
// Edge: sharpe with single timeline point should not divide-by-zero
const single: EquityPoint[] = [
{ timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 },
];
assert.equal(computeSharpe(single, '15m'), 0, 'sharpe = 0 with < 2 points (no returns)');
pass('computeSharpe handles single-point timeline (returns 0, no divide-by-zero)');
} catch (e) { fail('computeSharpe single point', e); }
// ---------------------------------------------------------------------------
// Test 3 — withLogLevel isolation: temporary level swap is safe across throws
//
// Stage D guarantee: backtest runs lower the log level via withLogLevel()
// during the run, but the level must be restored even if the wrapped fn
// throws. Without restoration, a single failed backtest would silence the
// rest of the process indefinitely.
try {
const initial = logger.level;
const result = await withLogLevel('error', () => 42);
assert.equal(result, 42, 'withLogLevel returns inner value');
assert.equal(logger.level, initial, 'level restored after success');
let caught: unknown = null;
try {
await withLogLevel('error', () => { throw new Error('boom'); });
} catch (e) {
caught = e;
}
assert.equal((caught as Error)?.message, 'boom', 'inner throw propagates');
assert.equal(logger.level, initial, 'level restored after throw');
pass('withLogLevel restores logger.level after success and after throw');
} catch (e) { fail('withLogLevel isolation', e); }
// ---------------------------------------------------------------------------
// Test 4 — runBacktest determinism: same input → identical output
//
// This is the headline guarantee. If the engine ever becomes non-deterministic
// (e.g. someone adds Math.random() or Date.now() inside a hot path), this test
// will catch it.
const originalFlag = config.ENABLE_BACKTEST;
config.ENABLE_BACKTEST = true;
try {
const start = Date.parse('2024-01-01T00:00:00Z');
const candles = buildSyntheticCandles(start, 2000, (i) => {
const base = 50000 + i * 0.5 + Math.sin(i / 50) * 800;
return { open: base, high: base + 50, low: base - 50, close: base + 10 };
});
const request = buildBacktestRequest(candles);
const r1 = await runBacktest(request);
const r2 = await runBacktest(request);
assert.equal(r1.trades.length, r2.trades.length, 'same trade count');
assert.equal(r1.summary.netPnlUsd, r2.summary.netPnlUsd, 'same netPnl');
assert.equal(r1.summary.maxDrawdownPct, r2.summary.maxDrawdownPct, 'same drawdown');
assert.equal(r1.summary.sharpe, r2.summary.sharpe, 'same sharpe');
assert.equal(JSON.stringify(r1), JSON.stringify(r2), 'byte-identical JSON serialization');
pass('runBacktest is deterministic across runs (byte-identical)');
} catch (e) { fail('determinism', e); }
// ---------------------------------------------------------------------------
// Test 5 — Flat-price tape produces zero trades
//
// Sanity check: a strategy can't trade itself into a position when nothing is
// happening. If this ever fails, the engine is fabricating signals.
try {
const start = Date.parse('2024-01-01T00:00:00Z');
const flat = buildSyntheticCandles(start, 500, () => ({ open: 50000, high: 50000, low: 50000, close: 50000 }));
const result = await runBacktest(buildBacktestRequest(flat));
assert.equal(result.trades.length, 0, 'flat tape → 0 trades');
assert.equal(result.summary.netPnlUsd, 0, 'flat tape → 0 PnL');
assert.equal(result.summary.maxDrawdownPct, 0, 'flat tape → 0 drawdown');
pass('flat-price tape produces zero trades and zero PnL');
} catch (e) { fail('flat tape', e); }
// ---------------------------------------------------------------------------
// Test 6 — BacktestResult shape contract
//
// Anyone wiring this into UI relies on the result shape. Lock down the
// top-level keys so a refactor that drops/renames them is loud.
try {
const start = Date.parse('2024-01-01T00:00:00Z');
const candles = buildSyntheticCandles(start, 200, () => ({ open: 50000, high: 50100, low: 49900, close: 50050 }));
const result = await runBacktest(buildBacktestRequest(candles));
const requiredKeys = ['mode', 'trades', 'summary', 'timeline', 'window', 'warmup', 'openPositionsAtEnd', 'assumptions'];
for (const key of requiredKeys) {
assert.ok(key in result, `result.${key} present`);
}
assert.equal(result.mode, 'backtest', 'result.mode = "backtest"');
assert.ok(Array.isArray(result.trades), 'result.trades is array');
assert.ok(Array.isArray(result.timeline), 'result.timeline is array');
assert.equal(typeof result.summary.netPnlUsd, 'number', 'summary.netPnlUsd is number');
assert.equal(result.window.timezone, 'UTC', 'window.timezone is UTC');
pass('runBacktest returns the documented BacktestResult shape');
} catch (e) { fail('result shape', e); }
// ---------------------------------------------------------------------------
// Test 7 — Empty / invalid window throws sensibly (no silent zero result)
try {
const start = Date.parse('2024-01-01T00:00:00Z');
const empty: Candle[] = [];
let threw = false;
try {
await runBacktest(buildBacktestRequest(empty, {
dateRange: {
from: new Date(start).toISOString(),
to: new Date(start + FIFTEEN_MINUTES_MS).toISOString(),
},
}));
} catch {
threw = true;
}
assert.equal(threw, true, 'empty candle data throws (does not silently return 0 trades)');
pass('empty candle dataset throws explicit error');
} catch (e) { fail('empty data error', e); }
// ---------------------------------------------------------------------------
// Test 8 — Alpaca data source plumbing (without hitting the network)
//
// Stage C added BacktestAlpacaSource. We can't call the real Alpaca API in a
// test, but we can verify the type discriminator + dispatcher routing by
// asserting that a malformed Alpaca request fails with the expected error.
try {
const previousKey = config.ALPACA_API_KEY;
const previousSecret = config.ALPACA_API_SECRET;
config.ALPACA_API_KEY = '';
config.ALPACA_API_SECRET = '';
let caught: unknown = null;
try {
await runBacktest({
mode: 'backtest',
symbols: ['AAPL'],
timeframe: '15m',
dateRange: { from: '2024-01-01T00:00:00Z', to: '2024-01-31T00:00:00Z' },
dataSource: { type: 'alpaca', payload: { feed: 'iex' } },
execution: {
initialCapitalUsd: 10000, orderType: 'market', slippageBps: 5,
feeBps: 10, partialFillPct: 1, fillOnNextBar: true,
intraCandlePolicy: 'ohlc_path', triggerTimeframe: '1m',
forceCloseAtWindowEnd: false,
},
strategyConfig: { enabled: true, symbol: 'AAPL', riskPerTrade: 0.02, maxPositions: 1 },
} as BacktestRequest);
} catch (e) {
caught = e;
}
config.ALPACA_API_KEY = previousKey;
config.ALPACA_API_SECRET = previousSecret;
assert.ok(caught instanceof Error, 'Alpaca source without credentials throws');
assert.match(
(caught as Error).message,
/ALPACA_API_KEY/i,
'error message names the missing env var'
);
pass('alpaca data source dispatcher routes correctly + errors clearly');
} catch (e) { fail('alpaca dispatcher', e); }
// ---------------------------------------------------------------------------
config.ENABLE_BACKTEST = originalFlag;
console.log('[backtest-engine] OK — passed:');
for (const name of passed) console.log(' ✓', name);
console.log(`[backtest-engine] ${passed.length}/${passed.length} regression checks passed`);