learning_ai_invt_trdg/backend/testBacktestEngine.ts
Devin 4fc53703c6 feat(backtest): runtime + per-user feature flags (Option C)
Replaces the build-time VITE_BACKTEST_ENABLED gate with a fully runtime
flow: a global Cosmos-backed default (already shipped in the existing
dynamicConfig system) plus a new per-user override layer. An admin can
now enable backtest for specific users without flipping the global
switch — useful for staged rollout and beta testers.

Resolution order: per-user override > global config > env fallback.
Both /api/feature-flags (FE display) and /api/backtest/run (server
guard) consult the same merge logic.

Backend (backend/src/...):
  ~ services/profileRepository.ts
      + TradingUserFeatureFlags interface
      + featureFlags?: TradingUserFeatureFlags on TradingUserProfile
      + setUserFeatureFlags(userId, { backtestEnabled, ... })
      ~ saveCurrentUserProfile() — strip role + featureFlags from input
        so non-admins can't elevate via PATCH /api/me/profile
      ~ mergeTradingUserProfiles() — preserves explicit flag values only
  ~ services/apiServer.ts
      ~ /api/feature-flags merges per-user override into the response
      + /api/admin/users/:userId/feature-flags  (GET — overrides + effective)
      + /api/admin/users/:userId/feature-flags  (PATCH — admin-only writer)
      ~ /api/backtest/run resolves effective flags before guarding
  ~ backtest/index.ts
      + RunBacktestOptions.skipGlobalFeatureFlagCheck
      ~ runBacktest() honors the override (route already gated stricter)

Frontend (web/src/...):
  ~ backtest/flags.ts — isBacktestBuildEnabled() now returns true.
    Kept as a no-op function so existing callers don't break.
  + lib/userFeatureFlagsApi.ts — typed admin client
  + components/admin/UserFeatureFlagsPanel.tsx
      Tri-state picker per flag (Default / On / Off), Look up by user id,
      Save/Reset, shows the merged "effective" value.
  ~ tabs/ConfigTab.tsx — mounts <UserFeatureFlagsPanel /> below the
    existing global Backtest Access Control section.
  ~ layout-fixes.css §27 — styles for the per-user panel.

Tests:
  + testBacktestEngine: skipGlobalFeatureFlagCheck enables per-user
    override semantics. 12/12 regression checks pass.

Security note: featureFlags + role are explicitly stripped from
saveCurrentUserProfile input. Only the admin-only PATCH endpoint can
set per-user overrides.

Generated with [Devin](https://cli.devin.ai/docs)

Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com>
2026-05-10 19:04:12 +00:00

406 lines
18 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Backtest engine regression tests.
*
* Stage B of docs/backtest/ENGINE_READINESS.md §4. Implements the minimum
* viable test suite called out in §3.1: codifies determinism, summary math,
* timeframe aggregation, log-level isolation, and edge-case handling. These
* tests are NOT exhaustive; they're the smallest set that prevents silent
* regression of behaviors verified during the readiness audit.
*
* Convention: matches sibling `testXxx.ts` scripts (no vitest in this repo).
* Run via `npm run check:backtest-engine`.
*/
import assert from 'node:assert/strict';
import { config } from './src/config/index.js';
import { runBacktest } from './src/backtest/index.js';
import logger, { withLogLevel } from './src/utils/logger.js';
import { aggregateCandles } from './src/backtest/data/normalize.js';
import { computeSummary, computeSharpe } from './src/backtest/metrics/computeSummary.js';
import type { BacktestRequest, EquityPoint } from './src/backtest/types.js';
import type { Candle } from './src/connectors/types.js';
// ---------------------------------------------------------------------------
// Helpers
const FIFTEEN_MINUTES_MS = 15 * 60 * 1000;
const ONE_HOUR_MS = 60 * 60 * 1000;
const FOUR_HOURS_MS = 4 * ONE_HOUR_MS;
const buildSyntheticCandles = (
startTs: number,
n: number,
pricer: (i: number) => { open: number; high: number; low: number; close: number }
): Candle[] => {
const out: Candle[] = [];
for (let i = 0; i < n; i++) {
const p = pricer(i);
out.push({
timestamp: startTs + i * FIFTEEN_MINUTES_MS,
open: p.open,
high: p.high,
low: p.low,
close: p.close,
volume: 1,
});
}
return out;
};
const buildBacktestRequest = (
candles: Candle[],
overrides: Partial<BacktestRequest> = {}
): BacktestRequest => {
const fromTs = candles[0]?.timestamp ?? 0;
const toTs = (candles[candles.length - 1]?.timestamp ?? 0) + FIFTEEN_MINUTES_MS;
return {
mode: 'backtest',
symbols: ['BTC/USD'],
timeframe: '15m',
dateRange: {
from: new Date(fromTs).toISOString(),
to: new Date(toTs).toISOString(),
},
dataSource: {
type: 'json',
payload: {
candles: candles.map((c) => ({
symbol: 'BTC/USD',
timeframe: '15m',
timestamp: c.timestamp,
open: c.open,
high: c.high,
low: c.low,
close: c.close,
volume: c.volume,
})),
},
},
execution: {
initialCapitalUsd: 10000,
orderType: 'market',
slippageBps: 5,
feeBps: 10,
partialFillPct: 1,
fillOnNextBar: true,
intraCandlePolicy: 'ohlc_path',
triggerTimeframe: '1m',
forceCloseAtWindowEnd: false,
},
strategyConfig: { enabled: true, symbol: 'BTC/USD', riskPerTrade: 0.02, maxPositions: 1 },
...overrides,
} as BacktestRequest;
};
// Track which checks pass for a clean summary at the end
const passed: string[] = [];
const fail = (name: string, error: unknown): never => {
console.error(`\n[backtest-engine] FAIL: ${name}`);
console.error(error instanceof Error ? error.stack || error.message : error);
process.exit(1);
};
const pass = (name: string): void => {
passed.push(name);
};
// ---------------------------------------------------------------------------
// Test 1 — aggregateCandles produces correct OHLC across timeframes
//
// Critical invariant: the production data pipeline depends on this aggregator
// to derive 1h and 4h from 15m. If aggregation breaks, the strategy engine
// silently fails with "Insufficient data" warnings.
try {
// 4 × 15m candles with monotonic prices to make the expected OHLC obvious
const start = Date.parse('2024-01-01T00:00:00Z');
const fifteens: Candle[] = [
{ timestamp: start + 0 * FIFTEEN_MINUTES_MS, open: 100, high: 110, low: 95, close: 105, volume: 1 },
{ timestamp: start + 1 * FIFTEEN_MINUTES_MS, open: 105, high: 115, low: 100, close: 112, volume: 2 },
{ timestamp: start + 2 * FIFTEEN_MINUTES_MS, open: 112, high: 120, low: 108, close: 118, volume: 3 },
{ timestamp: start + 3 * FIFTEEN_MINUTES_MS, open: 118, high: 125, low: 116, close: 122, volume: 4 },
];
const oneHour = aggregateCandles(fifteens, '1h');
assert.equal(oneHour.length, 1, '4 × 15m candles → exactly 1 × 1h candle');
const h = oneHour[0];
assert.equal(h.timestamp, start, '1h candle timestamp = first 15m timestamp');
assert.equal(h.open, 100, '1h.open = first 15m.open');
assert.equal(h.close, 122, '1h.close = last 15m.close');
assert.equal(h.high, 125, '1h.high = max of 15m highs');
assert.equal(h.low, 95, '1h.low = min of 15m lows');
assert.equal(h.volume, 10, '1h.volume = sum of 15m volumes (1+2+3+4)');
pass('aggregateCandles 15m→1h preserves OHLC + sums volume');
} catch (e) { fail('aggregateCandles 15m→1h', e); }
try {
// 16 × 15m → 1 × 4h
const start = Date.parse('2024-01-01T00:00:00Z');
const fifteens: Candle[] = Array.from({ length: 16 }, (_, i) => ({
timestamp: start + i * FIFTEEN_MINUTES_MS,
open: 100 + i,
high: 100 + i + 5,
low: 100 + i - 5,
close: 100 + i + 1,
volume: 1,
}));
const fourHour = aggregateCandles(fifteens, '4h');
assert.equal(fourHour.length, 1, '16 × 15m → 1 × 4h');
assert.equal(fourHour[0].open, 100, '4h.open = first 15m.open');
assert.equal(fourHour[0].close, 116, '4h.close = last 15m.close');
assert.equal(fourHour[0].volume, 16, '4h.volume = sum');
pass('aggregateCandles 15m→4h preserves OHLC + sums volume');
} catch (e) { fail('aggregateCandles 15m→4h', e); }
// ---------------------------------------------------------------------------
// Test 2 — computeSummary math: known inputs → known outputs
//
// The summary fields drive what users see (PnL, win rate, drawdown, sharpe).
// Hand-computed expectations protect against accidental refactor regressions.
try {
const trades = [
{ pnlUsd: 100 }, // win
{ pnlUsd: -50 }, // loss
{ pnlUsd: 150 }, // win
{ pnlUsd: -100 }, // loss
{ pnlUsd: 200 }, // win
];
const timeline: EquityPoint[] = [
{ timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 },
{ timestamp: 2, equityUsd: 10100, drawdownPct: 0, cashUsd: 10100, reservedUsd: 0 },
{ timestamp: 3, equityUsd: 10050, drawdownPct: 0.50, cashUsd: 10050, reservedUsd: 0 },
{ timestamp: 4, equityUsd: 10200, drawdownPct: 0, cashUsd: 10200, reservedUsd: 0 },
{ timestamp: 5, equityUsd: 10100, drawdownPct: 0.98, cashUsd: 10100, reservedUsd: 0 },
{ timestamp: 6, equityUsd: 10300, drawdownPct: 0, cashUsd: 10300, reservedUsd: 0 },
];
const summary = computeSummary(trades, timeline, '15m');
assert.equal(summary.totalTrades, 5, 'totalTrades = trades.length');
assert.equal(summary.netPnlUsd, 300, 'netPnl = 100 + (-50) + 150 + (-100) + 200');
assert.equal(summary.winRate, 60, 'winRate = 3/5 × 100');
assert.equal(summary.maxDrawdownPct, 0.98, 'maxDrawdown = max of timeline drawdownPct');
assert.equal(typeof summary.sharpe, 'number', 'sharpe is a number (computed from returns)');
pass('computeSummary derives PnL/winRate/drawdown from trades + timeline');
} catch (e) { fail('computeSummary math', e); }
try {
// Edge: empty trades and timeline → all zeros, no NaN/Infinity
const summary = computeSummary([], [], '15m');
assert.equal(summary.totalTrades, 0);
assert.equal(summary.netPnlUsd, 0);
assert.equal(summary.winRate, 0);
assert.equal(summary.maxDrawdownPct, 0);
assert.equal(summary.sharpe, 0);
pass('computeSummary handles empty inputs (no NaN, all zeros)');
} catch (e) { fail('computeSummary empty inputs', e); }
try {
// Edge: sharpe with single timeline point should not divide-by-zero
const single: EquityPoint[] = [
{ timestamp: 1, equityUsd: 10000, drawdownPct: 0, cashUsd: 10000, reservedUsd: 0 },
];
assert.equal(computeSharpe(single, '15m'), 0, 'sharpe = 0 with < 2 points (no returns)');
pass('computeSharpe handles single-point timeline (returns 0, no divide-by-zero)');
} catch (e) { fail('computeSharpe single point', e); }
// ---------------------------------------------------------------------------
// Test 3 — withLogLevel isolation: temporary level swap is safe across throws
//
// Stage D guarantee: backtest runs lower the log level via withLogLevel()
// during the run, but the level must be restored even if the wrapped fn
// throws. Without restoration, a single failed backtest would silence the
// rest of the process indefinitely.
try {
const initial = logger.level;
const result = await withLogLevel('error', () => 42);
assert.equal(result, 42, 'withLogLevel returns inner value');
assert.equal(logger.level, initial, 'level restored after success');
let caught: unknown = null;
try {
await withLogLevel('error', () => { throw new Error('boom'); });
} catch (e) {
caught = e;
}
assert.equal((caught as Error)?.message, 'boom', 'inner throw propagates');
assert.equal(logger.level, initial, 'level restored after throw');
pass('withLogLevel restores logger.level after success and after throw');
} catch (e) { fail('withLogLevel isolation', e); }
// ---------------------------------------------------------------------------
// Test 4 — runBacktest determinism: same input → identical output
//
// This is the headline guarantee. If the engine ever becomes non-deterministic
// (e.g. someone adds Math.random() or Date.now() inside a hot path), this test
// will catch it.
const originalFlag = config.ENABLE_BACKTEST;
config.ENABLE_BACKTEST = true;
try {
const start = Date.parse('2024-01-01T00:00:00Z');
const candles = buildSyntheticCandles(start, 2000, (i) => {
const base = 50000 + i * 0.5 + Math.sin(i / 50) * 800;
return { open: base, high: base + 50, low: base - 50, close: base + 10 };
});
const request = buildBacktestRequest(candles);
const r1 = await runBacktest(request);
const r2 = await runBacktest(request);
assert.equal(r1.trades.length, r2.trades.length, 'same trade count');
assert.equal(r1.summary.netPnlUsd, r2.summary.netPnlUsd, 'same netPnl');
assert.equal(r1.summary.maxDrawdownPct, r2.summary.maxDrawdownPct, 'same drawdown');
assert.equal(r1.summary.sharpe, r2.summary.sharpe, 'same sharpe');
assert.equal(JSON.stringify(r1), JSON.stringify(r2), 'byte-identical JSON serialization');
pass('runBacktest is deterministic across runs (byte-identical)');
} catch (e) { fail('determinism', e); }
// ---------------------------------------------------------------------------
// Test 5 — Flat-price tape produces zero trades
//
// Sanity check: a strategy can't trade itself into a position when nothing is
// happening. If this ever fails, the engine is fabricating signals.
try {
const start = Date.parse('2024-01-01T00:00:00Z');
const flat = buildSyntheticCandles(start, 500, () => ({ open: 50000, high: 50000, low: 50000, close: 50000 }));
const result = await runBacktest(buildBacktestRequest(flat));
assert.equal(result.trades.length, 0, 'flat tape → 0 trades');
assert.equal(result.summary.netPnlUsd, 0, 'flat tape → 0 PnL');
assert.equal(result.summary.maxDrawdownPct, 0, 'flat tape → 0 drawdown');
pass('flat-price tape produces zero trades and zero PnL');
} catch (e) { fail('flat tape', e); }
// ---------------------------------------------------------------------------
// Test 6 — BacktestResult shape contract
//
// Anyone wiring this into UI relies on the result shape. Lock down the
// top-level keys so a refactor that drops/renames them is loud.
try {
const start = Date.parse('2024-01-01T00:00:00Z');
const candles = buildSyntheticCandles(start, 200, () => ({ open: 50000, high: 50100, low: 49900, close: 50050 }));
const result = await runBacktest(buildBacktestRequest(candles));
const requiredKeys = ['mode', 'trades', 'summary', 'timeline', 'window', 'warmup', 'openPositionsAtEnd', 'assumptions'];
for (const key of requiredKeys) {
assert.ok(key in result, `result.${key} present`);
}
assert.equal(result.mode, 'backtest', 'result.mode = "backtest"');
assert.ok(Array.isArray(result.trades), 'result.trades is array');
assert.ok(Array.isArray(result.timeline), 'result.timeline is array');
assert.equal(typeof result.summary.netPnlUsd, 'number', 'summary.netPnlUsd is number');
assert.equal(result.window.timezone, 'UTC', 'window.timezone is UTC');
pass('runBacktest returns the documented BacktestResult shape');
} catch (e) { fail('result shape', e); }
// ---------------------------------------------------------------------------
// Test 7 — Empty / invalid window throws sensibly (no silent zero result)
try {
const start = Date.parse('2024-01-01T00:00:00Z');
const empty: Candle[] = [];
let threw = false;
try {
await runBacktest(buildBacktestRequest(empty, {
dateRange: {
from: new Date(start).toISOString(),
to: new Date(start + FIFTEEN_MINUTES_MS).toISOString(),
},
}));
} catch {
threw = true;
}
assert.equal(threw, true, 'empty candle data throws (does not silently return 0 trades)');
pass('empty candle dataset throws explicit error');
} catch (e) { fail('empty data error', e); }
// ---------------------------------------------------------------------------
// Test 8a — skipGlobalFeatureFlagCheck honors per-user override
//
// When the route handler has already done a per-user check (Stage E2 in
// docs/backtest/ENGINE_READINESS.md), runBacktest should bypass the global
// ENABLE_BACKTEST guard so a per-user override of `backtestEnabled: true`
// can let a specific user run backtests even when global is off.
try {
const previousGlobalFlag = config.ENABLE_BACKTEST;
config.ENABLE_BACKTEST = false;
const start = Date.parse('2024-01-01T00:00:00Z');
const candles = buildSyntheticCandles(start, 200, () => ({ open: 50000, high: 50100, low: 49900, close: 50050 }));
const request = buildBacktestRequest(candles);
// Without the override → assertion should fire
let blockedAsExpected = false;
try {
await runBacktest(request);
} catch (e) {
blockedAsExpected = (e as Error).message.includes('disabled');
}
assert.ok(blockedAsExpected, 'global gate blocks when ENABLE_BACKTEST=false');
// With override → bypasses the gate. Should at minimum not throw the
// "feature is disabled" error (may still return 0 trades for synthetic data).
let bypassWorked = false;
try {
const r = await runBacktest(request, { skipGlobalFeatureFlagCheck: true });
bypassWorked = typeof r.summary?.netPnlUsd === 'number';
} catch (e) {
// Any error other than the feature-disabled one is acceptable here;
// the point is the global guard didn't fire.
bypassWorked = !(e as Error).message.includes('disabled');
}
assert.ok(bypassWorked, 'skipGlobalFeatureFlagCheck bypasses ENABLE_BACKTEST guard');
config.ENABLE_BACKTEST = previousGlobalFlag;
pass('skipGlobalFeatureFlagCheck enables per-user override semantics');
} catch (e) { fail('skipGlobalFeatureFlagCheck', e); }
// ---------------------------------------------------------------------------
// Test 8 — Alpaca data source plumbing (without hitting the network)
//
// Stage C added BacktestAlpacaSource. We can't call the real Alpaca API in a
// test, but we can verify the type discriminator + dispatcher routing by
// asserting that a malformed Alpaca request fails with the expected error.
try {
const previousKey = config.ALPACA_API_KEY;
const previousSecret = config.ALPACA_API_SECRET;
config.ALPACA_API_KEY = '';
config.ALPACA_API_SECRET = '';
let caught: unknown = null;
try {
await runBacktest({
mode: 'backtest',
symbols: ['AAPL'],
timeframe: '15m',
dateRange: { from: '2024-01-01T00:00:00Z', to: '2024-01-31T00:00:00Z' },
dataSource: { type: 'alpaca', payload: { feed: 'iex' } },
execution: {
initialCapitalUsd: 10000, orderType: 'market', slippageBps: 5,
feeBps: 10, partialFillPct: 1, fillOnNextBar: true,
intraCandlePolicy: 'ohlc_path', triggerTimeframe: '1m',
forceCloseAtWindowEnd: false,
},
strategyConfig: { enabled: true, symbol: 'AAPL', riskPerTrade: 0.02, maxPositions: 1 },
} as BacktestRequest);
} catch (e) {
caught = e;
}
config.ALPACA_API_KEY = previousKey;
config.ALPACA_API_SECRET = previousSecret;
assert.ok(caught instanceof Error, 'Alpaca source without credentials throws');
assert.match(
(caught as Error).message,
/ALPACA_API_KEY/i,
'error message names the missing env var'
);
pass('alpaca data source dispatcher routes correctly + errors clearly');
} catch (e) { fail('alpaca dispatcher', e); }
// ---------------------------------------------------------------------------
config.ENABLE_BACKTEST = originalFlag;
console.log('[backtest-engine] OK — passed:');
for (const name of passed) console.log(' ✓', name);
console.log(`[backtest-engine] ${passed.length}/${passed.length} regression checks passed`);