learning_ai_invt_trdg/backend/testBacktestEngine.ts

/**
 * Backtest engine regression tests.
 *
 * Stage B of docs/backtest/ENGINE_READINESS.md §4. Implements the minimum
 * viable test suite called out in §3.1: codifies determinism, summary math,
 * timeframe aggregation, log-level isolation, and edge-case handling. These
 * tests are NOT exhaustive; they're the smallest set that prevents silent
 * regression of behaviors verified during the readiness audit.
 *
 * Convention: matches sibling `testXxx.ts` scripts (no vitest in this repo).
 * Run via `npm run check:backtest-engine`.
 */
import assert from 'node:assert/strict';
import { config } from './src/config/index.js';
import { runBacktest } from './src/backtest/index.js';
import logger, { withLogLevel } from './src/utils/logger.js';
import { aggregateCandles } from './src/backtest/data/normalize.js';
import { computeSummary, computeSharpe } from './src/backtest/metrics/computeSummary.js';
import type { BacktestRequest, EquityPoint } from './src/backtest/types.js';
import type { Candle } from './src/connectors/types.js';

// ---------------------------------------------------------------------------
// Helpers

const FIFTEEN_MINUTES_MS = 15 * 60 * 1000;
const ONE_HOUR_MS = 60 * 60 * 1000;
const FOUR_HOURS_MS = 4 * ONE_HOUR_MS;

const buildSyntheticCandles = (
    startTs: number,
    n: number,
    pricer: (i: number) => { open: number; high: number; low: number; close: number }
): Candle[] => {
    const out: Candle[] = [];
    for (let i = 0; i < n; i++) {
        const p = pricer(i);
        out.push({
            timestamp: startTs + i * FIFTEEN_MINUTES_MS,
            open: p.open,
            high: p.high,
            low: p.low,
            close: p.close,
            volume: 1,
        });
    }
    return out;
};

const buildBacktestRequest = (
    candles: Candle[],
    overrides: Partial<BacktestRequest> = {}
): BacktestRequest => {
    const fromTs = candles[0]?.timestamp ?? 0;
    const toTs = (candles[candles.length - 1]?.timestamp ?? 0) + FIFTEEN_MINUTES_MS;
    return {
        mode: 'backtest',
        symbols: ['BTC/USD'],
        timeframe: '15m',
        dateRange: {
            from: new Date(fromTs).toISOString(),
            to: new Date(toTs).toISOString(),
        },
        dataSource: {
            type: 'json',
            payload: {
                candles: candles.map((c) => ({
                    symbol: 'BTC/USD',
                    timeframe: '15m',
                    timestamp: c.timestamp,
                    open: c.open,
                    high: c.high,
                    low: c.low,
                    close: c.close,
                    volume: c.volume,
                })),
            },
        },
        execution: {
            initialCapitalUsd: 10000,
            orderType: 'market',
            slippageBps: 5,
            feeBps: 10,
            partialFillPct: 1,
            fillOnNextBar: true,
            intraCandlePolicy: 'ohlc_path',
            triggerTimeframe: '1m',
            forceCloseAtWindowEnd: false,
        },
        strategyConfig: { enabled: true, symbol: 'BTC/USD', riskPerTrade: 0.02, maxPositions: 1 },
        ...overrides,
    } as BacktestRequest;
};

// Track which checks pass for a clean summary at the end
const passed: string[] = [];
const fail = (name: string, error: unknown): never => {
    console.error(`\n[backtest-engine] FAIL: ${name}`);
    console.error(error instanceof Error ? error.stack || error.message : error);
    process.exit(1);
};
const pass = (name: string): void => {
    passed.push(name);
};

// ---------------------------------------------------------------------------
// Test 1 — aggregateCandles produces correct OHLC across timeframes
//
// Critical invariant: the production data pipeline depends on this aggregator
// to derive 1h and 4h from 15m. If aggregation breaks, the strategy engine
// silently fails with "Insufficient data" warnings.

try {
    // 4 × 15m candles with monotonic prices to make the expected OHLC obvious
    const start = Date.parse('2024-01-01T00:00:00Z');
    const fifteens: Candle[] = [
        { timestamp: start + 0 * FIFTEEN_MINUTES_MS, open: 100, high: 110, low: 95,  close: 105, volume: 1 },
        { timestamp: start + 1 * FIFTEEN_MINUTES_MS, open: 105, high: 115, low: 100, close: 112, volume: 2 },
        { timestamp: start + 2 * FIFTEEN_MINUTES_MS, open: 112, high: 120, low: 108, close: 118, volume: 3 },
        { timestamp: start + 3 * FIFTEEN_MINUTES_MS, open: 118, high: 125, low: 116, close: 122, volume: 4 },
    ];
    const oneHour = aggregateCandles(fifteens, '1h');
    assert.equal(oneHour.length, 1, '4 × 15m candles → exactly 1 × 1h candle');
    const h = oneHour[0];
    assert.equal(h.timestamp, start, '1h candle timestamp = first 15m timestamp');
    assert.equal(h.open, 100, '1h.open = first 15m.open');
    assert.equal(h.close, 122, '1h.close = last 15m.close');
    assert.equal(h.high, 125, '1h.high = max of 15m highs');
    assert.equal(h.low, 95, '1h.low = min of 15m lows');
    assert.equal(h.volume, 10, '1h.volume = sum of 15m volumes (1+2+3+4)');
    pass('aggregateCandles 15m→1h preserves OHLC + sums volume');
} catch (e) { fail('aggregateCandles 15m→1h', e); }

try {
    // 16 × 15m → 1 × 4h
    const start = Date.parse('2024-01-01T00:00:00Z');
    const fifteens: Candle[] = Array.from({ length: 16 }, (_, i) => ({
        timestamp: start + i * FIFTEEN_MINUTES_MS,
        open: 100 + i,
        high: 100 + i + 5,
        low: 100 + i - 5,
        close: 100 + i + 1,
        volume: 1,
    }));
    const fourHour = aggregateCandles(fifteens, '4h');
    assert.equal(fourHour.length, 1, '16 × 15m → 1 × 4h');
    assert.equal(fourHour[0].open, 100, '4h.open = first 15m.open');
    assert.equal(fourHour[0].close, 116, '4h.close = last 15m.close');
    assert.equal(fourHour[0].volume, 16, '4h.volume = sum');
    pass('aggregateCandles 15m→4h preserves OHLC + sums volume');
} catch (e) { fail('aggregateCandles 15m→4h', e); }

// ---------------------------------------------------------------------------
// Test 2 — computeSummary math: known inputs → known outputs
//
// The summary fields drive what users see (PnL, win rate, drawdown, sharpe).
// Hand-computed expectations protect against accidental refactor regressions.

try {
    const trades = [
        { pnlUsd: 100 },   // win
        { pnlUsd: -50 },   // loss
        { pnlUsd: 150 },   // win
        { pnlUsd: -100 },  // loss
        { pnlUsd: 200 },   // win
    ];
    const timeline: EquityPoint[] = [
        { timestamp: 1, equityUsd: 10000, drawdownPct: 0,    cashUsd: 10000, reservedUsd: 0 },
        { timestamp: 2, equityUsd: 10100, drawdownPct: 0,    cashUsd: 10100, reservedUsd: 0 },
        { timestamp: 3, equityUsd: 10050, drawdownPct: 0.50, cashUsd: 10050, reservedUsd: 0 },
        { timestamp: 4, equityUsd: 10200, drawdownPct: 0,    cashUsd: 10200, reservedUsd: 0 },
        { timestamp: 5, equityUsd: 10100, drawdownPct: 0.98, cashUsd: 10100, reservedUsd: 0 },
        { timestamp: 6, equityUsd: 10300, drawdownPct: 0,    cashUsd: 10300, reservedUsd: 0 },
    ];
    const summary = computeSummary(trades, timeline, '15m');
    assert.equal(summary.totalTrades, 5, 'totalTrades = trades.length');
    assert.equal(summary.netPnlUsd, 300, 'netPnl = 100 + (-50) + 150 + (-100) + 200');
    assert.equal(summary.winRate, 60, 'winRate = 3/5 × 100');
    assert.equal(summary.maxDrawdownPct, 0.98, 'maxDrawdown = max of timeline drawdownPct');
    assert.equal(typeof summary.sharpe, 'number', 'sharpe is a number (computed from returns)');
    pass('computeSummary derives PnL/winRate/drawdown from trades + timeline');
} catch (e) { fail('computeSummary math', e); }

try {
    // Edge: empty trades and timeline → all zeros, no NaN/Infinity
    const summary = computeSummary([], [], '15m');
    assert.equal(summary.totalTrades, 0);
    assert.equal(summary.netPnlUsd, 0);
    assert.equal(summary.winRate, 0);
    assert.equal(summary.maxDrawdownPct, 0);
    assert.equal(summary.sharpe, 0);
    pass('computeSummary handles empty inputs (no NaN, all zeros)');
} catch (e) { fail('computeSummary empty inputs', e); }

try {
    // Edge: sharpe with single timeline point should not divide-by-zero
    const single: EquityPoint[] = [
        { timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 },
    ];
    assert.equal(computeSharpe(single, '15m'), 0, 'sharpe = 0 with < 2 points (no returns)');
    pass('computeSharpe handles single-point timeline (returns 0, no divide-by-zero)');
} catch (e) { fail('computeSharpe single point', e); }

// ---------------------------------------------------------------------------
// Test 3 — withLogLevel isolation: temporary level swap is safe across throws
//
// Stage D guarantee: backtest runs lower the log level via withLogLevel()
// during the run, but the level must be restored even if the wrapped fn
// throws. Without restoration, a single failed backtest would silence the
// rest of the process indefinitely.

try {
    const initial = logger.level;
    const result = await withLogLevel('error', () => 42);
    assert.equal(result, 42, 'withLogLevel returns inner value');
    assert.equal(logger.level, initial, 'level restored after success');

    let caught: unknown = null;
    try {
        await withLogLevel('error', () => { throw new Error('boom'); });
    } catch (e) {
        caught = e;
    }
    assert.equal((caught as Error)?.message, 'boom', 'inner throw propagates');
    assert.equal(logger.level, initial, 'level restored after throw');
    pass('withLogLevel restores logger.level after success and after throw');
} catch (e) { fail('withLogLevel isolation', e); }

// ---------------------------------------------------------------------------
// Test 4 — runBacktest determinism: same input → identical output
//
// This is the headline guarantee. If the engine ever becomes non-deterministic
// (e.g. someone adds Math.random() or Date.now() inside a hot path), this test
// will catch it.

const originalFlag = config.ENABLE_BACKTEST;
config.ENABLE_BACKTEST = true;

try {
    const start = Date.parse('2024-01-01T00:00:00Z');
    const candles = buildSyntheticCandles(start, 2000, (i) => {
        const base = 50000 + i * 0.5 + Math.sin(i / 50) * 800;
        return { open: base, high: base + 50, low: base - 50, close: base + 10 };
    });
    const request = buildBacktestRequest(candles);

    const r1 = await runBacktest(request);
    const r2 = await runBacktest(request);

    assert.equal(r1.trades.length, r2.trades.length, 'same trade count');
    assert.equal(r1.summary.netPnlUsd, r2.summary.netPnlUsd, 'same netPnl');
    assert.equal(r1.summary.maxDrawdownPct, r2.summary.maxDrawdownPct, 'same drawdown');
    assert.equal(r1.summary.sharpe, r2.summary.sharpe, 'same sharpe');
    assert.equal(JSON.stringify(r1), JSON.stringify(r2), 'byte-identical JSON serialization');
    pass('runBacktest is deterministic across runs (byte-identical)');
} catch (e) { fail('determinism', e); }

// ---------------------------------------------------------------------------
// Test 5 — Flat-price tape produces zero trades
//
// Sanity check: a strategy can't trade itself into a position when nothing is
// happening. If this ever fails, the engine is fabricating signals.

try {
    const start = Date.parse('2024-01-01T00:00:00Z');
    const flat = buildSyntheticCandles(start, 500, () => ({ open: 50000, high: 50000, low: 50000, close: 50000 }));
    const result = await runBacktest(buildBacktestRequest(flat));
    assert.equal(result.trades.length, 0, 'flat tape → 0 trades');
    assert.equal(result.summary.netPnlUsd, 0, 'flat tape → 0 PnL');
    assert.equal(result.summary.maxDrawdownPct, 0, 'flat tape → 0 drawdown');
    pass('flat-price tape produces zero trades and zero PnL');
} catch (e) { fail('flat tape', e); }

// ---------------------------------------------------------------------------
// Test 6 — BacktestResult shape contract
//
// Anyone wiring this into UI relies on the result shape. Lock down the
// top-level keys so a refactor that drops/renames them is loud.

try {
    const start = Date.parse('2024-01-01T00:00:00Z');
    const candles = buildSyntheticCandles(start, 200, () => ({ open: 50000, high: 50100, low: 49900, close: 50050 }));
    const result = await runBacktest(buildBacktestRequest(candles));
    const requiredKeys = ['mode', 'trades', 'summary', 'timeline', 'window', 'warmup', 'openPositionsAtEnd', 'assumptions'];
    for (const key of requiredKeys) {
        assert.ok(key in result, `result.${key} present`);
    }
    assert.equal(result.mode, 'backtest', 'result.mode = "backtest"');
    assert.ok(Array.isArray(result.trades), 'result.trades is array');
    assert.ok(Array.isArray(result.timeline), 'result.timeline is array');
    assert.equal(typeof result.summary.netPnlUsd, 'number', 'summary.netPnlUsd is number');
    assert.equal(result.window.timezone, 'UTC', 'window.timezone is UTC');
    pass('runBacktest returns the documented BacktestResult shape');
} catch (e) { fail('result shape', e); }

// ---------------------------------------------------------------------------
// Test 7 — Empty / invalid window throws sensibly (no silent zero result)

try {
    const start = Date.parse('2024-01-01T00:00:00Z');
    const empty: Candle[] = [];
    let threw = false;
    try {
        await runBacktest(buildBacktestRequest(empty, {
            dateRange: {
                from: new Date(start).toISOString(),
                to: new Date(start + FIFTEEN_MINUTES_MS).toISOString(),
            },
        }));
    } catch {
        threw = true;
    }
    assert.equal(threw, true, 'empty candle data throws (does not silently return 0 trades)');
    pass('empty candle dataset throws explicit error');
} catch (e) { fail('empty data error', e); }

// ---------------------------------------------------------------------------
// Test 8a — skipGlobalFeatureFlagCheck honors per-user override
//
// When the route handler has already done a per-user check (Stage E2 in
// docs/backtest/ENGINE_READINESS.md), runBacktest should bypass the global
// ENABLE_BACKTEST guard so a per-user override of `backtestEnabled: true`
// can let a specific user run backtests even when global is off.

try {
    const previousGlobalFlag = config.ENABLE_BACKTEST;
    config.ENABLE_BACKTEST = false;
    const start = Date.parse('2024-01-01T00:00:00Z');
    const candles = buildSyntheticCandles(start, 200, () => ({ open: 50000, high: 50100, low: 49900, close: 50050 }));
    const request = buildBacktestRequest(candles);

    // Without the override → assertion should fire
    let blockedAsExpected = false;
    try {
        await runBacktest(request);
    } catch (e) {
        blockedAsExpected = (e as Error).message.includes('disabled');
    }
    assert.ok(blockedAsExpected, 'global gate blocks when ENABLE_BACKTEST=false');

    // With override → bypasses the gate. Should at minimum not throw the
    // "feature is disabled" error (may still return 0 trades for synthetic data).
    let bypassWorked = false;
    try {
        const r = await runBacktest(request, { skipGlobalFeatureFlagCheck: true });
        bypassWorked = typeof r.summary?.netPnlUsd === 'number';
    } catch (e) {
        // Any error other than the feature-disabled one is acceptable here;
        // the point is the global guard didn't fire.
        bypassWorked = !(e as Error).message.includes('disabled');
    }
    assert.ok(bypassWorked, 'skipGlobalFeatureFlagCheck bypasses ENABLE_BACKTEST guard');

    config.ENABLE_BACKTEST = previousGlobalFlag;
    pass('skipGlobalFeatureFlagCheck enables per-user override semantics');
} catch (e) { fail('skipGlobalFeatureFlagCheck', e); }

// ---------------------------------------------------------------------------
// Test 8 — Alpaca data source plumbing (without hitting the network)
//
// Stage C added BacktestAlpacaSource. We can't call the real Alpaca API in a
// test, but we can verify the type discriminator + dispatcher routing by
// asserting that a malformed Alpaca request fails with the expected error.

try {
    const previousKey = config.ALPACA_API_KEY;
    const previousSecret = config.ALPACA_API_SECRET;
    config.ALPACA_API_KEY = '';
    config.ALPACA_API_SECRET = '';
    let caught: unknown = null;
    try {
        await runBacktest({
            mode: 'backtest',
            symbols: ['AAPL'],
            timeframe: '15m',
            dateRange: { from: '2024-01-01T00:00:00Z', to: '2024-01-31T00:00:00Z' },
            dataSource: { type: 'alpaca', payload: { feed: 'iex' } },
            execution: {
                initialCapitalUsd: 10000, orderType: 'market', slippageBps: 5,
                feeBps: 10, partialFillPct: 1, fillOnNextBar: true,
                intraCandlePolicy: 'ohlc_path', triggerTimeframe: '1m',
                forceCloseAtWindowEnd: false,
            },
            strategyConfig: { enabled: true, symbol: 'AAPL', riskPerTrade: 0.02, maxPositions: 1 },
        } as BacktestRequest);
    } catch (e) {
        caught = e;
    }
    config.ALPACA_API_KEY = previousKey;
    config.ALPACA_API_SECRET = previousSecret;
    assert.ok(caught instanceof Error, 'Alpaca source without credentials throws');
    assert.match(
        (caught as Error).message,
        /ALPACA_API_KEY/i,
        'error message names the missing env var'
    );
    pass('alpaca data source dispatcher routes correctly + errors clearly');
} catch (e) { fail('alpaca dispatcher', e); }

// ---------------------------------------------------------------------------

config.ENABLE_BACKTEST = originalFlag;

console.log('[backtest-engine] OK — passed:');
for (const name of passed) console.log('  ✓', name);
console.log(`[backtest-engine] ${passed.length}/${passed.length} regression checks passed`);