feat(ai-diagnostics): add vector search repository with cosmos integration [1.2.2]
This commit is contained in:
parent
6b97476fc0
commit
917ea03af9
@ -0,0 +1,497 @@
|
|||||||
|
/**
|
||||||
|
* Intelligent A/B Testing — Unit Tests.
|
||||||
|
* Bucketing, statistics, Bayesian inference, early stopping, guardrails.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import { fnv1a, assignVariant, isInExperimentBucket, assignByStrategy } from './bucketing.js';
|
||||||
|
import {
|
||||||
|
betaFromVariant,
|
||||||
|
betaCredibleInterval,
|
||||||
|
probabilityVariantBeatsControl,
|
||||||
|
probabilityVariantBeatsAll,
|
||||||
|
expectedLossIfChosen,
|
||||||
|
checkEarlyStopping,
|
||||||
|
generateExperimentResult,
|
||||||
|
validateAA,
|
||||||
|
calculateSampleSize,
|
||||||
|
} from './statistics.js';
|
||||||
|
import { runGuardrails, canAutoPromote, evaluateAutoPromotion } from './guardrails.js';
|
||||||
|
import { matchesTargeting } from './targeting.js';
|
||||||
|
import type { VariantDoc, ExperimentDoc, MetricType } from './types.js';
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// Bucketing Tests
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('FNV-1a Hash', () => {
|
||||||
|
it('produces deterministic output', () => {
|
||||||
|
const h1 = fnv1a('test-string');
|
||||||
|
const h2 = fnv1a('test-string');
|
||||||
|
expect(h1).toBe(h2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('produces different hashes for different inputs', () => {
|
||||||
|
const h1 = fnv1a('input-a');
|
||||||
|
const h2 = fnv1a('input-b');
|
||||||
|
expect(h1).not.toBe(h2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('produces 32-bit unsigned integers', () => {
|
||||||
|
const h = fnv1a('any-string');
|
||||||
|
expect(h).toBeGreaterThanOrEqual(0);
|
||||||
|
expect(h).toBeLessThanOrEqual(0xffffffff);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('assignVariant', () => {
|
||||||
|
const variants = [
|
||||||
|
{ key: 'control', weight: 50 },
|
||||||
|
{ key: 'variant_a', weight: 50 },
|
||||||
|
];
|
||||||
|
|
||||||
|
it('assigns deterministically', () => {
|
||||||
|
const v1 = assignVariant('exp-1', 'user-a', variants);
|
||||||
|
const v2 = assignVariant('exp-1', 'user-a', variants);
|
||||||
|
expect(v1).toBe(v2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('distributes across variants', () => {
|
||||||
|
const assignments = new Set<string>();
|
||||||
|
for (let i = 0; i < 100; i++) {
|
||||||
|
assignments.add(assignVariant('exp-1', `user-${i}`, variants));
|
||||||
|
}
|
||||||
|
expect(assignments.size).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('respects uneven weights', () => {
|
||||||
|
const unevenVariants = [
|
||||||
|
{ key: 'control', weight: 90 },
|
||||||
|
{ key: 'variant_a', weight: 10 },
|
||||||
|
];
|
||||||
|
let controlCount = 0;
|
||||||
|
for (let i = 0; i < 1000; i++) {
|
||||||
|
if (assignVariant('exp-2', `u-${i}`, unevenVariants) === 'control') {
|
||||||
|
controlCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
expect(controlCount).toBeGreaterThan(700);
|
||||||
|
expect(controlCount).toBeLessThan(990);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('isInExperimentBucket', () => {
|
||||||
|
it('includes correct percentage of users', () => {
|
||||||
|
let included = 0;
|
||||||
|
for (let i = 0; i < 1000; i++) {
|
||||||
|
if (isInExperimentBucket('exp-1', `user-${i}`, 50)) {
|
||||||
|
included++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Should be roughly 50% (allow wide margin for hash distribution)
|
||||||
|
expect(included).toBeGreaterThan(400);
|
||||||
|
expect(included).toBeLessThan(600);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('is deterministic for same user/experiment', () => {
|
||||||
|
const r1 = isInExperimentBucket('exp-1', 'user-a', 50);
|
||||||
|
const r2 = isInExperimentBucket('exp-1', 'user-a', 50);
|
||||||
|
expect(r1).toBe(r2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('excludes all users at 0%', () => {
|
||||||
|
for (let i = 0; i < 100; i++) {
|
||||||
|
expect(isInExperimentBucket('exp-1', `user-${i}`, 0)).toBe(false);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('includes all users at 100%', () => {
|
||||||
|
for (let i = 0; i < 100; i++) {
|
||||||
|
expect(isInExperimentBucket('exp-1', `user-${i}`, 100)).toBe(true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// Targeting Tests
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('matchesTargeting', () => {
|
||||||
|
it('matches when no targeting specified', () => {
|
||||||
|
expect(matchesTargeting({ platform: 'ios' }, {})).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('matches platform', () => {
|
||||||
|
expect(matchesTargeting({ platform: 'ios' }, { platforms: ['ios'] })).toBe(true);
|
||||||
|
expect(matchesTargeting({ platform: 'android' }, { platforms: ['ios'] })).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('matches version range', () => {
|
||||||
|
expect(matchesTargeting({ appVersion: '1.5.0' }, { appVersions: { min: '1.0.0' } })).toBe(true);
|
||||||
|
expect(matchesTargeting({ appVersion: '0.5.0' }, { appVersions: { min: '1.0.0' } })).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('matches user segments', () => {
|
||||||
|
expect(matchesTargeting({ userSegments: ['pro'] }, { userSegments: ['pro'] })).toBe(true);
|
||||||
|
expect(matchesTargeting({ userSegments: ['free'] }, { userSegments: ['pro'] })).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('matches user properties', () => {
|
||||||
|
expect(matchesTargeting(
|
||||||
|
{ userProperties: { tier: 'premium' } },
|
||||||
|
{ userProperties: { tier: 'premium' } }
|
||||||
|
)).toBe(true);
|
||||||
|
expect(matchesTargeting(
|
||||||
|
{ userProperties: { tier: 'basic' } },
|
||||||
|
{ userProperties: { tier: 'premium' } }
|
||||||
|
)).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// Bayesian Statistics Tests
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function createMockVariant(overrides: Partial<VariantDoc> = {}): VariantDoc {
|
||||||
|
return {
|
||||||
|
id: 'var_test',
|
||||||
|
experimentId: 'exp_test',
|
||||||
|
name: 'Test Variant',
|
||||||
|
description: '',
|
||||||
|
isControl: false,
|
||||||
|
flagConfig: {},
|
||||||
|
currentAllocationPercent: 50,
|
||||||
|
stats: {
|
||||||
|
participants: 100,
|
||||||
|
events: 50,
|
||||||
|
primaryMetricValue: 0.1,
|
||||||
|
primaryMetricStdDev: 0.05,
|
||||||
|
conversions: 10,
|
||||||
|
conversionRate: 0.1,
|
||||||
|
betaAlpha: 11,
|
||||||
|
betaBeta: 91,
|
||||||
|
},
|
||||||
|
createdAt: new Date().toISOString(),
|
||||||
|
updatedAt: new Date().toISOString(),
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function createMockExperiment(overrides: Partial<ExperimentDoc> = {}): ExperimentDoc {
|
||||||
|
return {
|
||||||
|
id: 'exp_test',
|
||||||
|
productId: 'test_product',
|
||||||
|
name: 'Test Experiment',
|
||||||
|
description: '',
|
||||||
|
hypothesis: 'Test hypothesis',
|
||||||
|
status: 'running',
|
||||||
|
controlVariantId: 'var_control',
|
||||||
|
variantIds: ['var_control', 'var_test'],
|
||||||
|
allocationStrategy: 'random',
|
||||||
|
targetPercent: 100,
|
||||||
|
targeting: {},
|
||||||
|
primaryMetric: {
|
||||||
|
name: 'conversion',
|
||||||
|
type: 'conversion',
|
||||||
|
eventName: 'purchase',
|
||||||
|
aggregation: 'count',
|
||||||
|
direction: 'increase',
|
||||||
|
minimumDetectableEffect: 5,
|
||||||
|
},
|
||||||
|
secondaryMetrics: [],
|
||||||
|
guardrails: {
|
||||||
|
minSampleSizePerVariant: 100,
|
||||||
|
maxDurationDays: 30,
|
||||||
|
autoStopEnabled: true,
|
||||||
|
winnerThreshold: 95,
|
||||||
|
requireApprovalFor: 'none',
|
||||||
|
},
|
||||||
|
totalParticipants: 200,
|
||||||
|
totalEvents: 100,
|
||||||
|
createdAt: new Date().toISOString(),
|
||||||
|
updatedAt: new Date().toISOString(),
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('Beta Distribution', () => {
|
||||||
|
it('computes correct parameters from variant', () => {
|
||||||
|
const variant = createMockVariant({
|
||||||
|
stats: { participants: 100, events: 50, primaryMetricValue: 0.1, conversions: 10, conversionRate: 0.1 },
|
||||||
|
});
|
||||||
|
const beta = betaFromVariant(variant);
|
||||||
|
expect(beta.alpha).toBe(11); // conversions + 1
|
||||||
|
expect(beta.beta).toBe(91); // failures + 1
|
||||||
|
});
|
||||||
|
|
||||||
|
it('generates credible interval', () => {
|
||||||
|
const interval = betaCredibleInterval(11, 91);
|
||||||
|
expect(interval.lower).toBeGreaterThan(0);
|
||||||
|
expect(interval.lower).toBeLessThan(interval.mean);
|
||||||
|
expect(interval.mean).toBeLessThan(interval.upper);
|
||||||
|
expect(interval.upper).toBeLessThan(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('has mean close to conversion rate', () => {
|
||||||
|
const interval = betaCredibleInterval(11, 91);
|
||||||
|
// Mean of Beta(11, 91) = 11 / (11 + 91) ≈ 0.108
|
||||||
|
expect(interval.mean).toBeCloseTo(0.108, 2);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('Probability Calculations', () => {
|
||||||
|
it('calculates probability variant beats control', () => {
|
||||||
|
const control = createMockVariant({
|
||||||
|
stats: { participants: 100, events: 50, primaryMetricValue: 0.1, conversions: 10, conversionRate: 0.1 },
|
||||||
|
});
|
||||||
|
const variant = createMockVariant({
|
||||||
|
stats: { participants: 100, events: 50, primaryMetricValue: 0.2, conversions: 20, conversionRate: 0.2 },
|
||||||
|
});
|
||||||
|
|
||||||
|
const prob = probabilityVariantBeatsControl(variant, control, 'conversion', 5000);
|
||||||
|
// Variant with 20% conversion should beat control with 10% conversion
|
||||||
|
expect(prob).toBeGreaterThan(0.8);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('calculates probability variant beats all', () => {
|
||||||
|
const variants = [
|
||||||
|
createMockVariant({ id: 'v1', stats: { participants: 100, events: 50, primaryMetricValue: 0.1, conversions: 10, conversionRate: 0.1 } }),
|
||||||
|
createMockVariant({ id: 'v2', stats: { participants: 100, events: 50, primaryMetricValue: 0.2, conversions: 20, conversionRate: 0.2 } }),
|
||||||
|
createMockVariant({ id: 'v3', stats: { participants: 100, events: 50, primaryMetricValue: 0.15, conversions: 15, conversionRate: 0.15 } }),
|
||||||
|
];
|
||||||
|
|
||||||
|
const prob = probabilityVariantBeatsAll(variants[1], variants, 'conversion', 5000);
|
||||||
|
// v2 (20%) should beat both v1 (10%) and v3 (15%)
|
||||||
|
expect(prob).toBeGreaterThan(0.5);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('calculates expected loss', () => {
|
||||||
|
const variants = [
|
||||||
|
createMockVariant({ stats: { participants: 100, events: 50, primaryMetricValue: 0.1, conversions: 10, conversionRate: 0.1 } }),
|
||||||
|
createMockVariant({ stats: { participants: 100, events: 50, primaryMetricValue: 0.1, conversions: 10, conversionRate: 0.1 }),
|
||||||
|
];
|
||||||
|
|
||||||
|
const loss = expectedLossIfChosen(variants[0], variants, 'conversion', 5000);
|
||||||
|
// Expected loss should be small for identical variants
|
||||||
|
expect(loss).toBeLessThan(0.05);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// Early Stopping Tests
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('checkEarlyStopping', () => {
|
||||||
|
it('does not stop before minimum sample size', () => {
|
||||||
|
const experiment = createMockExperiment();
|
||||||
|
const variants = [
|
||||||
|
createMockVariant({ isControl: true, stats: { participants: 50, events: 25, primaryMetricValue: 0.1, conversions: 5, conversionRate: 0.1 } }),
|
||||||
|
createMockVariant({ stats: { participants: 50, events: 25, primaryMetricValue: 0.5, conversions: 25, conversionRate: 0.5 } }),
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = checkEarlyStopping(experiment, variants, 5);
|
||||||
|
expect(result.shouldStop).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('stops when winner is clear', () => {
|
||||||
|
const experiment = createMockExperiment();
|
||||||
|
const variants = [
|
||||||
|
createMockVariant({ isControl: true, stats: { participants: 1000, events: 500, primaryMetricValue: 0.1, conversions: 100, conversionRate: 0.1, betaAlpha: 101, betaBeta: 901 } }),
|
||||||
|
createMockVariant({ stats: { participants: 1000, events: 500, primaryMetricValue: 0.5, conversions: 500, conversionRate: 0.5, betaAlpha: 501, betaBeta: 501 } }),
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = checkEarlyStopping(experiment, variants, 10);
|
||||||
|
// 50% conversion rate should clearly beat 10% with enough samples
|
||||||
|
expect(result.shouldStop || result.confidence > 0.8).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('stops at max duration', () => {
|
||||||
|
const experiment = createMockExperiment({
|
||||||
|
guardrails: { ...createMockExperiment().guardrails, maxDurationDays: 14 },
|
||||||
|
});
|
||||||
|
const variants = [
|
||||||
|
createMockVariant({ isControl: true, stats: { participants: 200, events: 100, primaryMetricValue: 0.1, conversions: 20, conversionRate: 0.1 } }),
|
||||||
|
createMockVariant({ stats: { participants: 200, events: 100, primaryMetricValue: 0.11, conversions: 22, conversionRate: 0.11 } }),
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = checkEarlyStopping(experiment, variants, 30);
|
||||||
|
expect(result.shouldStop).toBe(true);
|
||||||
|
expect(result.reason).toContain('duration');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// Guardrails Tests
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('runGuardrails', () => {
|
||||||
|
it('requires minimum sample size', () => {
|
||||||
|
const experiment = createMockExperiment();
|
||||||
|
const variants = [
|
||||||
|
createMockVariant({ isControl: true, stats: { participants: 50, events: 25, primaryMetricValue: 0.1, conversions: 5, conversionRate: 0.1 } }),
|
||||||
|
createMockVariant({ stats: { participants: 50, events: 25, primaryMetricValue: 0.5, conversions: 25, conversionRate: 0.5 } }),
|
||||||
|
];
|
||||||
|
|
||||||
|
const checks = runGuardrails(experiment, variants, 10, false);
|
||||||
|
const sampleSizeCheck = checks.find(c => c.violation?.includes('sample'));
|
||||||
|
expect(sampleSizeCheck?.passed).toBe(false);
|
||||||
|
expect(sampleSizeCheck?.severity).toBe('blocking');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('requires approval for revenue experiments when configured', () => {
|
||||||
|
const experiment = createMockExperiment({
|
||||||
|
guardrails: { ...createMockExperiment().guardrails, requireApprovalFor: 'revenue' },
|
||||||
|
primaryMetric: { ...createMockExperiment().primaryMetric, type: 'revenue' },
|
||||||
|
});
|
||||||
|
const variants = [
|
||||||
|
createMockVariant({ isControl: true, stats: { participants: 200, events: 100, primaryMetricValue: 10, conversions: 50, conversionRate: 0.25 } }),
|
||||||
|
createMockVariant({ stats: { participants: 200, events: 100, primaryMetricValue: 15, conversions: 75, conversionRate: 0.375 } }),
|
||||||
|
];
|
||||||
|
|
||||||
|
const checks = runGuardrails(experiment, variants, 10, true);
|
||||||
|
const approvalCheck = checks.find(c => c.violation?.includes('Approval'));
|
||||||
|
expect(approvalCheck?.passed).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('canAutoPromote', () => {
|
||||||
|
it('allows promotion when all checks pass', () => {
|
||||||
|
const checks = [
|
||||||
|
{ passed: true, severity: 'info' as const },
|
||||||
|
{ passed: true, severity: 'info' as const },
|
||||||
|
];
|
||||||
|
expect(canAutoPromote(checks)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('blocks promotion on blocking violations', () => {
|
||||||
|
const checks = [
|
||||||
|
{ passed: true, severity: 'info' as const },
|
||||||
|
{ passed: false, violation: 'test', severity: 'blocking' as const },
|
||||||
|
];
|
||||||
|
expect(canAutoPromote(checks)).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows promotion with warnings only', () => {
|
||||||
|
const checks = [
|
||||||
|
{ passed: true, severity: 'info' as const },
|
||||||
|
{ passed: false, violation: 'test', severity: 'warning' as const },
|
||||||
|
];
|
||||||
|
expect(canAutoPromote(checks)).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// Experiment Results Tests
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('generateExperimentResult', () => {
|
||||||
|
it('generates results with variant comparisons', () => {
|
||||||
|
const experiment = createMockExperiment();
|
||||||
|
const variants = [
|
||||||
|
createMockVariant({ id: 'var_control', isControl: true, name: 'Control', stats: { participants: 200, events: 100, primaryMetricValue: 0.1, conversions: 20, conversionRate: 0.1 } }),
|
||||||
|
createMockVariant({ id: 'var_test', name: 'Test', stats: { participants: 200, events: 100, primaryMetricValue: 0.15, conversions: 30, conversionRate: 0.15 } }),
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = generateExperimentResult(experiment, variants, 14);
|
||||||
|
|
||||||
|
expect(result.experimentId).toBe(experiment.id);
|
||||||
|
expect(result.variantResults).toHaveLength(2);
|
||||||
|
expect(result.totalParticipants).toBe(experiment.totalParticipants);
|
||||||
|
expect(result.daysRunning).toBe(14);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('identifies winner when statistically significant', () => {
|
||||||
|
const experiment = createMockExperiment();
|
||||||
|
const variants = [
|
||||||
|
createMockVariant({ id: 'var_control', isControl: true, stats: { participants: 1000, events: 500, primaryMetricValue: 0.1, conversions: 100, conversionRate: 0.1, betaAlpha: 101, betaBeta: 901 } }),
|
||||||
|
createMockVariant({ id: 'var_test', name: 'Test', stats: { participants: 1000, events: 500, primaryMetricValue: 0.5, conversions: 500, conversionRate: 0.5, betaAlpha: 501, betaBeta: 501 } }),
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = generateExperimentResult(experiment, variants, 14);
|
||||||
|
|
||||||
|
// Winner should be identified with high probability
|
||||||
|
if (result.winnerVariantId) {
|
||||||
|
expect(result.winnerProbability).toBeGreaterThan(0.8);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// Validation Tests
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('validateAA', () => {
|
||||||
|
it('passes A/A test at reasonable rate', () => {
|
||||||
|
const result = validateAA(100, 0.1, 500);
|
||||||
|
// A/A tests should "pass" (not detect difference) at ~95% rate
|
||||||
|
expect(result.passRate).toBeGreaterThan(0.85);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('has low bias for identical variants', () => {
|
||||||
|
const result = validateAA(100, 0.1, 500);
|
||||||
|
expect(result.bias).toBeLessThan(0.1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('calculateSampleSize', () => {
|
||||||
|
it('calculates reasonable sample sizes', () => {
|
||||||
|
const n = calculateSampleSize(0.1, 0.05);
|
||||||
|
expect(n).toBeGreaterThan(100);
|
||||||
|
expect(n).toBeLessThan(100000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('requires more samples for smaller effect sizes', () => {
|
||||||
|
const n1 = calculateSampleSize(0.1, 0.05);
|
||||||
|
const n2 = calculateSampleSize(0.1, 0.02);
|
||||||
|
expect(n2).toBeGreaterThan(n1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('requires more samples for lower baseline rates', () => {
|
||||||
|
const n1 = calculateSampleSize(0.2, 0.05);
|
||||||
|
const n2 = calculateSampleSize(0.05, 0.05);
|
||||||
|
expect(n2).toBeGreaterThan(n1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// Assignment Strategy Tests
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('assignByStrategy', () => {
|
||||||
|
const mockControl: VariantDoc = createMockVariant({
|
||||||
|
id: 'var_control',
|
||||||
|
isControl: true,
|
||||||
|
stats: { participants: 100, events: 50, primaryMetricValue: 0.1, conversions: 10, conversionRate: 0.1, betaAlpha: 11, betaBeta: 91 },
|
||||||
|
});
|
||||||
|
|
||||||
|
const mockVariant: VariantDoc = createMockVariant({
|
||||||
|
id: 'var_test',
|
||||||
|
stats: { participants: 100, events: 50, primaryMetricValue: 0.2, conversions: 20, conversionRate: 0.2, betaAlpha: 21, betaBeta: 81 },
|
||||||
|
});
|
||||||
|
|
||||||
|
const ctx = {
|
||||||
|
variants: [mockControl, mockVariant],
|
||||||
|
controlVariant: mockControl,
|
||||||
|
totalParticipants: 200,
|
||||||
|
};
|
||||||
|
|
||||||
|
it('returns valid variant for random strategy', () => {
|
||||||
|
const variantId = assignByStrategy('random', ctx);
|
||||||
|
expect([mockControl.id, mockVariant.id]).toContain(variantId);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns valid variant for Thompson sampling', () => {
|
||||||
|
const variantId = assignByStrategy('thompson', ctx);
|
||||||
|
expect([mockControl.id, mockVariant.id]).toContain(variantId);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns valid variant for epsilon-greedy', () => {
|
||||||
|
const variantId = assignByStrategy('epsilon_greedy', { ...ctx, explorationRate: 0.1 });
|
||||||
|
expect([mockControl.id, mockVariant.id]).toContain(variantId);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns valid variant for UCB', () => {
|
||||||
|
const variantId = assignByStrategy('ucb', ctx);
|
||||||
|
expect([mockControl.id, mockVariant.id]).toContain(variantId);
|
||||||
|
});
|
||||||
|
});
|
||||||
413
services/platform-service/src/modules/ab-testing/routes.ts
Normal file
413
services/platform-service/src/modules/ab-testing/routes.ts
Normal file
@ -0,0 +1,413 @@
|
|||||||
|
/**
|
||||||
|
* A/B Testing — REST API Routes.
|
||||||
|
* Admin CRUD, user assignment, event tracking, results, suggestions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { FastifyInstance } from 'fastify';
|
||||||
|
import { UnauthorizedError, ForbiddenError, NotFoundError, BadRequestError } from '../../lib/errors.js';
|
||||||
|
import { getRequestProductId } from '../../lib/request-context.js';
|
||||||
|
import type { TargetingContext } from './targeting.js';
|
||||||
|
import {
|
||||||
|
CreateExperimentSchema,
|
||||||
|
UpdateExperimentSchema,
|
||||||
|
TrackEventSchema,
|
||||||
|
AdjustAllocationSchema,
|
||||||
|
type ExperimentDoc,
|
||||||
|
} from './types.js';
|
||||||
|
import {
|
||||||
|
listExperiments,
|
||||||
|
getExperiment,
|
||||||
|
createExperiment,
|
||||||
|
updateExperiment,
|
||||||
|
deleteExperiment,
|
||||||
|
listRunningExperiments,
|
||||||
|
listVariants,
|
||||||
|
getVariant,
|
||||||
|
getOrCreateAssignment,
|
||||||
|
trackEvent,
|
||||||
|
updateVariantAllocation,
|
||||||
|
updateVariantStats,
|
||||||
|
listSuggestions,
|
||||||
|
updateVariantBayesianResults,
|
||||||
|
} from './repository.js';
|
||||||
|
import { generateExperimentResult, checkEarlyStopping, calculateCredibleInterval, probabilityVariantBeatsControl } from './statistics.js';
|
||||||
|
import { evaluateAutoPromotion } from './guardrails.js';
|
||||||
|
import { matchesTargeting } from './targeting.js';
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// Auth Helpers
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
interface JwtPayload {
|
||||||
|
sub: string;
|
||||||
|
role?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function requireAuth(req: { jwtPayload?: JwtPayload }): string {
|
||||||
|
if (!req.jwtPayload?.sub) throw new UnauthorizedError('Authentication required');
|
||||||
|
return req.jwtPayload.sub;
|
||||||
|
}
|
||||||
|
|
||||||
|
function requireAdmin(req: { jwtPayload?: JwtPayload }): void {
|
||||||
|
requireAuth(req);
|
||||||
|
if (req.jwtPayload?.role !== 'admin') throw new ForbiddenError('Admin access required');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// Routes
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function abTestingRoutes(app: FastifyInstance): Promise<void> {
|
||||||
|
// ───────────────────────────────────────────────────────────────────────────
|
||||||
|
// Admin: Experiment Management
|
||||||
|
// ───────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// List experiments
|
||||||
|
app.get('/ab-testing/experiments', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
return listExperiments(productId);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get experiment details
|
||||||
|
app.get<{ Params: { id: string } }>('/ab-testing/experiments/:id', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const experiment = await getExperiment(req.params.id);
|
||||||
|
if (!experiment) throw new NotFoundError('Experiment not found');
|
||||||
|
|
||||||
|
// Include variants
|
||||||
|
const variants = await listVariants(req.params.id);
|
||||||
|
return { ...experiment, variants };
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create experiment
|
||||||
|
app.post('/ab-testing/experiments', async (req, reply) => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
const input = CreateExperimentSchema.parse(req.body);
|
||||||
|
const experiment = await createExperiment(productId, input);
|
||||||
|
reply.status(201);
|
||||||
|
return experiment;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Update experiment
|
||||||
|
app.patch<{ Params: { id: string } }>('/ab-testing/experiments/:id', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const updates = UpdateExperimentSchema.parse(req.body);
|
||||||
|
const experiment = await updateExperiment(req.params.id, updates);
|
||||||
|
if (!experiment) throw new NotFoundError('Experiment not found');
|
||||||
|
return experiment;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Delete experiment
|
||||||
|
app.delete<{ Params: { id: string } }>('/ab-testing/experiments/:id', async (req, reply) => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const ok = await deleteExperiment(req.params.id);
|
||||||
|
if (!ok) throw new NotFoundError('Experiment not found');
|
||||||
|
reply.status(204);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ───────────────────────────────────────────────────────────────────────────
|
||||||
|
// Admin: Experiment Lifecycle
|
||||||
|
// ───────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// Start experiment
|
||||||
|
app.post<{ Params: { id: string } }>('/ab-testing/experiments/:id/start', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const experiment = await updateExperiment(req.params.id, { status: 'running' });
|
||||||
|
if (!experiment) throw new NotFoundError('Experiment not found');
|
||||||
|
return experiment;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Pause experiment
|
||||||
|
app.post<{ Params: { id: string } }>('/ab-testing/experiments/:id/pause', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const experiment = await updateExperiment(req.params.id, { status: 'paused' });
|
||||||
|
if (!experiment) throw new NotFoundError('Experiment not found');
|
||||||
|
return experiment;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Stop experiment
|
||||||
|
app.post<{ Params: { id: string } }>('/ab-testing/experiments/:id/stop', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const experiment = await updateExperiment(req.params.id, { status: 'stopped' });
|
||||||
|
if (!experiment) throw new NotFoundError('Experiment not found');
|
||||||
|
return experiment;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Complete experiment with winner
|
||||||
|
app.post<{ Params: { id: string }; Body: { winnerVariantId: string } }>(
|
||||||
|
'/ab-testing/experiments/:id/complete',
|
||||||
|
async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const { winnerVariantId } = req.body;
|
||||||
|
const experiment = await getExperiment(req.params.id);
|
||||||
|
if (!experiment) throw new NotFoundError('Experiment not found');
|
||||||
|
|
||||||
|
const variants = await listVariants(req.params.id);
|
||||||
|
const winner = variants.find(v => v.id === winnerVariantId);
|
||||||
|
if (!winner) throw new BadRequestError('Invalid winner variant ID');
|
||||||
|
|
||||||
|
// Update all variants to 0% except winner to 100%
|
||||||
|
for (const v of variants) {
|
||||||
|
await updateVariantAllocation(v.id, req.params.id, v.id === winnerVariantId ? 100 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const updated = await updateExperiment(req.params.id, { status: 'completed' });
|
||||||
|
return { experiment: updated, winner };
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// Adjust traffic allocation
|
||||||
|
app.post<{ Params: { id: string } }>(
|
||||||
|
'/ab-testing/experiments/:id/allocation',
|
||||||
|
async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const { variantId, newAllocationPercent } = AdjustAllocationSchema.parse(req.body);
|
||||||
|
await updateVariantAllocation(variantId, req.params.id, newAllocationPercent);
|
||||||
|
return { success: true };
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// ───────────────────────────────────────────────────────────────────────────
|
||||||
|
// User: Assignment
|
||||||
|
// ───────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// Get variant assignment for user
|
||||||
|
app.post<{ Body: { experimentKey: string; context?: TargetingContext } }>(
|
||||||
|
'/ab-testing/assign',
|
||||||
|
async req => {
|
||||||
|
const userId = requireAuth(req);
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
const { experimentKey, context = {} } = req.body;
|
||||||
|
|
||||||
|
// Find running experiment by key
|
||||||
|
const experiments = await listRunningExperiments(productId);
|
||||||
|
const experiment = experiments.find(e => e.name === experimentKey || e.id === experimentKey);
|
||||||
|
|
||||||
|
if (!experiment) {
|
||||||
|
return { assigned: false, reason: 'Experiment not found or not running' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check targeting
|
||||||
|
if (!matchesTargeting(context, experiment.targeting)) {
|
||||||
|
return { assigned: false, reason: 'User does not match targeting criteria' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await getOrCreateAssignment(experiment, userId, context);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
return { assigned: false, reason: 'Not enrolled in experiment' };
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
assigned: true,
|
||||||
|
experimentId: experiment.id,
|
||||||
|
variantId: result.variant.id,
|
||||||
|
variantName: result.variant.name,
|
||||||
|
isControl: result.variant.isControl,
|
||||||
|
flagConfig: result.variant.flagConfig,
|
||||||
|
isNew: result.isNew,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// Batch assignment for multiple experiments
|
||||||
|
app.post<{ Body: { experimentKeys: string[]; context?: TargetingContext } }>(
|
||||||
|
'/ab-testing/assign/batch',
|
||||||
|
async req => {
|
||||||
|
const userId = requireAuth(req);
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
const { experimentKeys, context = {} } = req.body;
|
||||||
|
|
||||||
|
const results: Record<string, unknown> = {};
|
||||||
|
|
||||||
|
for (const key of experimentKeys) {
|
||||||
|
const experiments = await listRunningExperiments(productId);
|
||||||
|
const experiment = experiments.find(e => e.name === key || e.id === key);
|
||||||
|
|
||||||
|
if (!experiment) {
|
||||||
|
results[key] = { assigned: false, reason: 'Experiment not found' };
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await getOrCreateAssignment(experiment, userId, context);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
results[key] = { assigned: false, reason: 'Not enrolled' };
|
||||||
|
} else {
|
||||||
|
results[key] = {
|
||||||
|
assigned: true,
|
||||||
|
variantId: result.variant.id,
|
||||||
|
variantName: result.variant.name,
|
||||||
|
isControl: result.variant.isControl,
|
||||||
|
flagConfig: result.variant.flagConfig,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { assignments: results };
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// ───────────────────────────────────────────────────────────────────────────
|
||||||
|
// Event Tracking
|
||||||
|
// ───────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// Track experiment event
|
||||||
|
app.post<{ Body: { experimentId: string; metricName: string; metricType: string; value: number; converted?: boolean; eventMetadata?: Record<string, unknown> } }>(
|
||||||
|
'/ab-testing/events',
|
||||||
|
async (req, reply) => {
|
||||||
|
const userId = requireAuth(req);
|
||||||
|
const input = TrackEventSchema.parse(req.body);
|
||||||
|
|
||||||
|
const experiment = await getExperiment(input.experimentId);
|
||||||
|
if (!experiment) throw new NotFoundError('Experiment not found');
|
||||||
|
if (experiment.status !== 'running') {
|
||||||
|
throw new BadRequestError('Experiment is not running');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get assignment
|
||||||
|
const result = await getOrCreateAssignment(experiment, userId, { platform: input.platform });
|
||||||
|
if (!result) throw new BadRequestError('User not assigned to experiment');
|
||||||
|
|
||||||
|
await trackEvent(
|
||||||
|
input.experimentId,
|
||||||
|
userId,
|
||||||
|
result.assignment.id,
|
||||||
|
result.variant.id,
|
||||||
|
input.metricName,
|
||||||
|
input.metricType,
|
||||||
|
input.value,
|
||||||
|
input.converted ?? true,
|
||||||
|
input.platform,
|
||||||
|
input.appVersion,
|
||||||
|
input.eventMetadata
|
||||||
|
);
|
||||||
|
|
||||||
|
// Update variant primary metric if matches
|
||||||
|
if (input.metricName === experiment.primaryMetric.name) {
|
||||||
|
const currentConversions = result.variant.stats.conversions ?? 0;
|
||||||
|
const updatedConversions = currentConversions + (input.converted ? 1 : 0);
|
||||||
|
const updatedParticipants = Math.max(result.variant.stats.participants || 1, 1);
|
||||||
|
await updateVariantStats(result.variant.id, experiment.id, {
|
||||||
|
conversions: updatedConversions,
|
||||||
|
conversionRate: updatedConversions / updatedParticipants,
|
||||||
|
primaryMetricValue: input.value,
|
||||||
|
// Update Beta posterior for conversions
|
||||||
|
betaAlpha: updatedConversions + 1,
|
||||||
|
betaBeta: updatedParticipants - updatedConversions + 1,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
reply.status(201);
|
||||||
|
return { tracked: true };
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// ───────────────────────────────────────────────────────────────────────────
|
||||||
|
// Results & Statistics
|
||||||
|
// ───────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// Get experiment results
|
||||||
|
app.get<{ Params: { id: string } }>('/ab-testing/experiments/:id/results', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const experiment = await getExperiment(req.params.id);
|
||||||
|
if (!experiment) throw new NotFoundError('Experiment not found');
|
||||||
|
|
||||||
|
const variants = await listVariants(req.params.id);
|
||||||
|
const daysRunning = experiment.startedAt
|
||||||
|
? Math.floor((Date.now() - new Date(experiment.startedAt).getTime()) / (1000 * 60 * 60 * 24))
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
return generateExperimentResult(experiment, variants, daysRunning);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check early stopping
|
||||||
|
app.get<{ Params: { id: string } }>('/ab-testing/experiments/:id/stop-check', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const experiment = await getExperiment(req.params.id);
|
||||||
|
if (!experiment) throw new NotFoundError('Experiment not found');
|
||||||
|
|
||||||
|
const variants = await listVariants(req.params.id);
|
||||||
|
const daysRunning = experiment.startedAt
|
||||||
|
? Math.floor((Date.now() - new Date(experiment.startedAt).getTime()) / (1000 * 60 * 60 * 24))
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
const earlyStop = checkEarlyStopping(experiment, variants, daysRunning);
|
||||||
|
const autoPromo = evaluateAutoPromotion(experiment, variants, daysRunning, experiment.primaryMetric.type === 'revenue');
|
||||||
|
|
||||||
|
return {
|
||||||
|
shouldStop: earlyStop.shouldStop,
|
||||||
|
reason: earlyStop.reason,
|
||||||
|
confidence: earlyStop.confidence,
|
||||||
|
winnerVariantId: earlyStop.winnerVariantId,
|
||||||
|
canAutoPromote: autoPromo.canPromote,
|
||||||
|
violations: autoPromo.violations,
|
||||||
|
warnings: autoPromo.warnings,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get variant statistics
|
||||||
|
app.get<{ Params: { experimentId: string; variantId: string } }>(
|
||||||
|
'/ab-testing/experiments/:experimentId/variants/:variantId/stats',
|
||||||
|
async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const experiment = await getExperiment(req.params.experimentId);
|
||||||
|
if (!experiment) throw new NotFoundError('Experiment not found');
|
||||||
|
|
||||||
|
const variant = await getVariant(req.params.variantId, req.params.experimentId);
|
||||||
|
if (!variant) throw new NotFoundError('Variant not found');
|
||||||
|
|
||||||
|
const credibleInterval = calculateCredibleInterval(variant, experiment.primaryMetric.type);
|
||||||
|
const controlVariant = (await listVariants(req.params.experimentId)).find(v => v.isControl);
|
||||||
|
|
||||||
|
const probBeatsControl = controlVariant
|
||||||
|
? probabilityVariantBeatsControl(variant, controlVariant, experiment.primaryMetric.type)
|
||||||
|
: 0.5;
|
||||||
|
|
||||||
|
return {
|
||||||
|
variant,
|
||||||
|
credibleInterval,
|
||||||
|
probabilityBeatsControl: probBeatsControl,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// ───────────────────────────────────────────────────────────────────────────
|
||||||
|
// AI Suggestions
|
||||||
|
// ───────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// List AI-generated experiment suggestions
|
||||||
|
app.get('/ab-testing/suggestions', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
return listSuggestions(productId);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Generate hypothesis from pattern (placeholder for LLM integration)
|
||||||
|
app.post<{ Body: { featureName: string; pattern: string } }>(
|
||||||
|
'/ab-testing/hypotheses',
|
||||||
|
async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const { featureName, pattern } = req.body;
|
||||||
|
|
||||||
|
// Placeholder: In real implementation, call Azure OpenAI
|
||||||
|
const hypothesis = {
|
||||||
|
primary: `Changing ${featureName} will improve user engagement based on observed ${pattern}`,
|
||||||
|
alternatives: [
|
||||||
|
`Alternative: Simplify ${featureName} for faster completion`,
|
||||||
|
`Alternative: Add visual cues to ${featureName}`,
|
||||||
|
],
|
||||||
|
expectedEffectSize: 0.05,
|
||||||
|
successMetric: 'conversion_rate',
|
||||||
|
riskAssessment: 'low' as const,
|
||||||
|
impactScore: 75,
|
||||||
|
difficultyScore: 30,
|
||||||
|
powerPrediction: 85,
|
||||||
|
};
|
||||||
|
|
||||||
|
return { hypothesis, featureName, pattern };
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
@ -0,0 +1,506 @@
|
|||||||
|
import { getRegisteredContainer } from '@bytelyst/cosmos';
|
||||||
|
import { CosmosClient, Container } from '@azure/cosmos';
|
||||||
|
import { config } from '../../lib/config.js';
|
||||||
|
import type {
|
||||||
|
ErrorClusterDoc,
|
||||||
|
ErrorFingerprint,
|
||||||
|
DiagnosticInsightDoc,
|
||||||
|
NaturalLanguageQueryDoc,
|
||||||
|
ProactiveAlert,
|
||||||
|
} from './types.js';
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Container Access
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
function getErrorClustersContainer(): Container {
|
||||||
|
return getRegisteredContainer('error_clusters');
|
||||||
|
}
|
||||||
|
|
||||||
|
function getErrorFingerprintsContainer(): Container {
|
||||||
|
return getRegisteredContainer('error_fingerprints');
|
||||||
|
}
|
||||||
|
|
||||||
|
function getDiagnosticInsightsContainer(): Container {
|
||||||
|
return getRegisteredContainer('diagnostic_insights');
|
||||||
|
}
|
||||||
|
|
||||||
|
function getDiagnosticQueriesContainer(): Container {
|
||||||
|
return getRegisteredContainer('diagnostic_queries');
|
||||||
|
}
|
||||||
|
|
||||||
|
function getProactiveAlertsContainer(): Container {
|
||||||
|
return getRegisteredContainer('proactive_alerts');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Error Cluster Repository
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export async function createErrorCluster(
|
||||||
|
cluster: ErrorClusterDoc
|
||||||
|
): Promise<ErrorClusterDoc> {
|
||||||
|
const container = getErrorClustersContainer();
|
||||||
|
const { resource } = await container.items.create(cluster);
|
||||||
|
return resource as ErrorClusterDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getErrorClusterById(
|
||||||
|
clusterId: string,
|
||||||
|
productId: string
|
||||||
|
): Promise<ErrorClusterDoc | null> {
|
||||||
|
const container = getErrorClustersContainer();
|
||||||
|
try {
|
||||||
|
const { resource } = await container.item(clusterId, productId).read();
|
||||||
|
return resource as ErrorClusterDoc | null;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function updateErrorCluster(
|
||||||
|
cluster: ErrorClusterDoc
|
||||||
|
): Promise<ErrorClusterDoc> {
|
||||||
|
const container = getErrorClustersContainer();
|
||||||
|
const { resource } = await container.items.upsert(cluster);
|
||||||
|
return resource as unknown as ErrorClusterDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function findClustersByProduct(
|
||||||
|
productId: string,
|
||||||
|
options: {
|
||||||
|
status?: 'active' | 'investigating' | 'resolved' | 'ignored';
|
||||||
|
minOccurrences?: number;
|
||||||
|
limit?: number;
|
||||||
|
} = {}
|
||||||
|
): Promise<ErrorClusterDoc[]> {
|
||||||
|
const container = getErrorClustersContainer();
|
||||||
|
|
||||||
|
let query = 'SELECT * FROM c WHERE c.productId = @productId';
|
||||||
|
const parameters = [{ name: '@productId', value: productId }];
|
||||||
|
|
||||||
|
if (options.status) {
|
||||||
|
query += ' AND c.status = @status';
|
||||||
|
parameters.push({ name: '@status', value: options.status });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.minOccurrences) {
|
||||||
|
query += ' AND c.occurrenceCount >= @minOccurrences';
|
||||||
|
parameters.push({ name: '@minOccurrences', value: options.minOccurrences.toString() });
|
||||||
|
}
|
||||||
|
|
||||||
|
query += ' ORDER BY c.occurrenceCount DESC';
|
||||||
|
|
||||||
|
const { resources } = await container.items
|
||||||
|
.query({ query, parameters }, { maxItemCount: options.limit || 100 })
|
||||||
|
.fetchAll();
|
||||||
|
|
||||||
|
return resources as ErrorClusterDoc[];
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Vector Search (Cosine Similarity)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
interface VectorSearchResult {
|
||||||
|
cluster: ErrorClusterDoc;
|
||||||
|
similarity: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs vector search using cosine similarity in Cosmos DB
|
||||||
|
* Note: This is a client-side implementation as Cosmos DB doesn't natively
|
||||||
|
* support vector search yet. For production scale, consider:
|
||||||
|
* - Azure Cognitive Search with vector capability
|
||||||
|
* - Redis Vector Similarity Search
|
||||||
|
* - PostgreSQL with pgvector
|
||||||
|
*/
|
||||||
|
export async function searchSimilarClusters(
|
||||||
|
productId: string,
|
||||||
|
queryEmbedding: number[],
|
||||||
|
options: {
|
||||||
|
limit?: number;
|
||||||
|
threshold?: number;
|
||||||
|
excludeClusterId?: string;
|
||||||
|
} = {}
|
||||||
|
): Promise<VectorSearchResult[]> {
|
||||||
|
const container = getErrorClustersContainer();
|
||||||
|
const limit = options.limit || 10;
|
||||||
|
const threshold = options.threshold || 0.75;
|
||||||
|
|
||||||
|
// Fetch clusters with embeddings (limited set for performance)
|
||||||
|
const query = `
|
||||||
|
SELECT * FROM c
|
||||||
|
WHERE c.productId = @productId
|
||||||
|
AND IS_DEFINED(c.embedding)
|
||||||
|
AND c.status != 'ignored'
|
||||||
|
${options.excludeClusterId ? 'AND c.id != @excludeId' : ''}
|
||||||
|
`;
|
||||||
|
|
||||||
|
const parameters = [{ name: '@productId', value: productId }];
|
||||||
|
if (options.excludeClusterId) {
|
||||||
|
parameters.push({ name: '@excludeId', value: options.excludeClusterId });
|
||||||
|
}
|
||||||
|
|
||||||
|
const { resources } = await container.items
|
||||||
|
.query({ query, parameters }, { maxItemCount: 1000 })
|
||||||
|
.fetchAll();
|
||||||
|
|
||||||
|
const clusters = resources as ErrorClusterDoc[];
|
||||||
|
|
||||||
|
// Calculate cosine similarity for each cluster
|
||||||
|
const results: VectorSearchResult[] = clusters
|
||||||
|
.map((cluster) => ({
|
||||||
|
cluster,
|
||||||
|
similarity: cluster.embedding
|
||||||
|
? cosineSimilarity(queryEmbedding, cluster.embedding)
|
||||||
|
: 0,
|
||||||
|
}))
|
||||||
|
.filter((result) => result.similarity >= threshold)
|
||||||
|
.sort((a, b) => b.similarity - a.similarity)
|
||||||
|
.slice(0, limit);
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates cosine similarity between two vectors
|
||||||
|
*/
|
||||||
|
function cosineSimilarity(a: number[], b: number[]): number {
|
||||||
|
if (a.length !== b.length) {
|
||||||
|
throw new Error('Vectors must have same dimensions');
|
||||||
|
}
|
||||||
|
|
||||||
|
let dotProduct = 0;
|
||||||
|
let normA = 0;
|
||||||
|
let normB = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < a.length; i++) {
|
||||||
|
dotProduct += a[i] * b[i];
|
||||||
|
normA += a[i] * a[i];
|
||||||
|
normB += b[i] * b[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (normA === 0 || normB === 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds clusters by embedding similarity to a given cluster
|
||||||
|
*/
|
||||||
|
export async function findRelatedClusters(
|
||||||
|
clusterId: string,
|
||||||
|
productId: string,
|
||||||
|
options: {
|
||||||
|
limit?: number;
|
||||||
|
threshold?: number;
|
||||||
|
} = {}
|
||||||
|
): Promise<ErrorClusterDoc[]> {
|
||||||
|
// Get the source cluster
|
||||||
|
const sourceCluster = await getErrorClusterById(clusterId, productId);
|
||||||
|
if (!sourceCluster?.embedding) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search for similar clusters
|
||||||
|
const results = await searchSimilarClusters(productId, sourceCluster.embedding, {
|
||||||
|
limit: options.limit || 5,
|
||||||
|
threshold: options.threshold || 0.8,
|
||||||
|
excludeClusterId: clusterId,
|
||||||
|
});
|
||||||
|
|
||||||
|
return results.map((r) => r.cluster);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Error Fingerprint Repository
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export async function getFingerprintByHash(
|
||||||
|
fingerprintHash: string
|
||||||
|
): Promise<ErrorFingerprint | null> {
|
||||||
|
const container = getErrorFingerprintsContainer();
|
||||||
|
try {
|
||||||
|
const { resource } = await container.item(fingerprintHash, fingerprintHash).read();
|
||||||
|
return resource as ErrorFingerprint | null;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function saveFingerprint(
|
||||||
|
fingerprint: ErrorFingerprint
|
||||||
|
): Promise<ErrorFingerprint> {
|
||||||
|
const container = getErrorFingerprintsContainer();
|
||||||
|
const { resource } = await container.items.upsert(fingerprint);
|
||||||
|
return resource as unknown as ErrorFingerprint;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Diagnostic Insight Repository
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export async function createDiagnosticInsight(
|
||||||
|
insight: DiagnosticInsightDoc
|
||||||
|
): Promise<DiagnosticInsightDoc> {
|
||||||
|
const container = getDiagnosticInsightsContainer();
|
||||||
|
const { resource } = await container.items.create(insight);
|
||||||
|
return resource as DiagnosticInsightDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getDiagnosticInsightById(
|
||||||
|
insightId: string,
|
||||||
|
clusterId: string
|
||||||
|
): Promise<DiagnosticInsightDoc | null> {
|
||||||
|
const container = getDiagnosticInsightsContainer();
|
||||||
|
try {
|
||||||
|
const { resource } = await container.item(insightId, clusterId).read();
|
||||||
|
return resource as DiagnosticInsightDoc | null;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getLatestInsightForCluster(
|
||||||
|
clusterId: string,
|
||||||
|
productId: string
|
||||||
|
): Promise<DiagnosticInsightDoc | null> {
|
||||||
|
const container = getDiagnosticInsightsContainer();
|
||||||
|
|
||||||
|
const query = `
|
||||||
|
SELECT * FROM c
|
||||||
|
WHERE c.clusterId = @clusterId AND c.productId = @productId
|
||||||
|
ORDER BY c.generatedAt DESC
|
||||||
|
OFFSET 0 LIMIT 1
|
||||||
|
`;
|
||||||
|
|
||||||
|
const { resources } = await container.items
|
||||||
|
.query({
|
||||||
|
query,
|
||||||
|
parameters: [
|
||||||
|
{ name: '@clusterId', value: clusterId },
|
||||||
|
{ name: '@productId', value: productId },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
.fetchAll();
|
||||||
|
|
||||||
|
return (resources[0] as DiagnosticInsightDoc) || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function updateInsightFeedback(
|
||||||
|
insightId: string,
|
||||||
|
clusterId: string,
|
||||||
|
feedback: { helpful?: boolean; note?: string }
|
||||||
|
): Promise<void> {
|
||||||
|
const container = getDiagnosticInsightsContainer();
|
||||||
|
|
||||||
|
const insight = await getDiagnosticInsightById(insightId, clusterId);
|
||||||
|
if (!insight) return;
|
||||||
|
|
||||||
|
const feedbackStats = insight.feedbackStats || { helpful: 0, notHelpful: 0, engineerNotes: [] };
|
||||||
|
|
||||||
|
if (feedback.helpful === true) {
|
||||||
|
feedbackStats.helpful++;
|
||||||
|
} else if (feedback.helpful === false) {
|
||||||
|
feedbackStats.notHelpful++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (feedback.note) {
|
||||||
|
feedbackStats.engineerNotes.push(feedback.note);
|
||||||
|
}
|
||||||
|
|
||||||
|
await container.items.upsert({
|
||||||
|
...insight,
|
||||||
|
feedbackStats,
|
||||||
|
updatedAt: new Date().toISOString(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Natural Language Query Repository
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export async function saveNaturalLanguageQuery(
|
||||||
|
query: NaturalLanguageQueryDoc
|
||||||
|
): Promise<NaturalLanguageQueryDoc> {
|
||||||
|
const container = getDiagnosticQueriesContainer();
|
||||||
|
const { resource } = await container.items.create(query);
|
||||||
|
return resource as NaturalLanguageQueryDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getQueryHistory(
|
||||||
|
userId: string,
|
||||||
|
options: { limit?: number } = {}
|
||||||
|
): Promise<NaturalLanguageQueryDoc[]> {
|
||||||
|
const container = getDiagnosticQueriesContainer();
|
||||||
|
|
||||||
|
const query = `
|
||||||
|
SELECT * FROM c
|
||||||
|
WHERE c.userId = @userId
|
||||||
|
ORDER BY c.createdAt DESC
|
||||||
|
OFFSET 0 LIMIT @limit
|
||||||
|
`;
|
||||||
|
|
||||||
|
const { resources } = await container.items
|
||||||
|
.query({
|
||||||
|
query,
|
||||||
|
parameters: [
|
||||||
|
{ name: '@userId', value: userId },
|
||||||
|
{ name: '@limit', value: options.limit || 20 },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
.fetchAll();
|
||||||
|
|
||||||
|
return resources as NaturalLanguageQueryDoc[];
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Proactive Alert Repository
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export async function createProactiveAlert(alert: ProactiveAlert): Promise<ProactiveAlert> {
|
||||||
|
const container = getProactiveAlertsContainer();
|
||||||
|
const { resource } = await container.items.create(alert);
|
||||||
|
return resource as ProactiveAlert;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getActiveAlerts(productId: string): Promise<ProactiveAlert[]> {
|
||||||
|
const container = getProactiveAlertsContainer();
|
||||||
|
|
||||||
|
const query = `
|
||||||
|
SELECT * FROM c
|
||||||
|
WHERE c.productId = @productId
|
||||||
|
AND NOT IS_DEFINED(c.resolvedAt)
|
||||||
|
AND NOT IS_DEFINED(c.acknowledgedAt)
|
||||||
|
ORDER BY
|
||||||
|
CASE c.severity
|
||||||
|
WHEN 'critical' THEN 1
|
||||||
|
WHEN 'high' THEN 2
|
||||||
|
WHEN 'medium' THEN 3
|
||||||
|
WHEN 'low' THEN 4
|
||||||
|
ELSE 5
|
||||||
|
END,
|
||||||
|
c.createdAt DESC
|
||||||
|
`;
|
||||||
|
|
||||||
|
const { resources } = await container.items
|
||||||
|
.query({
|
||||||
|
query,
|
||||||
|
parameters: [{ name: '@productId', value: productId }],
|
||||||
|
})
|
||||||
|
.fetchAll();
|
||||||
|
|
||||||
|
return resources as ProactiveAlert[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function acknowledgeAlert(
|
||||||
|
alertId: string,
|
||||||
|
productId: string,
|
||||||
|
userId: string
|
||||||
|
): Promise<void> {
|
||||||
|
const container = getProactiveAlertsContainer();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const { resource } = await container.item(alertId, productId).read();
|
||||||
|
const alert = resource as ProactiveAlert;
|
||||||
|
|
||||||
|
await container.items.upsert({
|
||||||
|
...alert,
|
||||||
|
acknowledgedAt: new Date().toISOString(),
|
||||||
|
acknowledgedBy: userId,
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
// Alert not found
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Analytics Queries
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export async function getClusterTrends(
|
||||||
|
productId: string,
|
||||||
|
timeRange: { start: string; end: string }
|
||||||
|
): Promise<
|
||||||
|
Array<{
|
||||||
|
clusterId: string;
|
||||||
|
errorType: string;
|
||||||
|
firstSeenAt: string;
|
||||||
|
lastSeenAt: string;
|
||||||
|
occurrenceCount: number;
|
||||||
|
uniqueUsers: number;
|
||||||
|
}>
|
||||||
|
> {
|
||||||
|
const container = getErrorClustersContainer();
|
||||||
|
|
||||||
|
const query = `
|
||||||
|
SELECT
|
||||||
|
c.id as clusterId,
|
||||||
|
c.errorType,
|
||||||
|
c.firstSeenAt,
|
||||||
|
c.lastSeenAt,
|
||||||
|
c.occurrenceCount,
|
||||||
|
c.uniqueUsers
|
||||||
|
FROM c
|
||||||
|
WHERE c.productId = @productId
|
||||||
|
AND c.lastSeenAt >= @start
|
||||||
|
AND c.lastSeenAt <= @end
|
||||||
|
AND c.status != 'ignored'
|
||||||
|
ORDER BY c.occurrenceCount DESC
|
||||||
|
`;
|
||||||
|
|
||||||
|
const { resources } = await container.items
|
||||||
|
.query({
|
||||||
|
query,
|
||||||
|
parameters: [
|
||||||
|
{ name: '@productId', value: productId },
|
||||||
|
{ name: '@start', value: timeRange.start },
|
||||||
|
{ name: '@end', value: timeRange.end },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
.fetchAll();
|
||||||
|
|
||||||
|
return resources as Array<{
|
||||||
|
clusterId: string;
|
||||||
|
errorType: string;
|
||||||
|
firstSeenAt: string;
|
||||||
|
lastSeenAt: string;
|
||||||
|
occurrenceCount: number;
|
||||||
|
uniqueUsers: number;
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getTopErrorTypes(
|
||||||
|
productId: string,
|
||||||
|
limit: number = 10
|
||||||
|
): Promise<Array<{ errorType: string; count: number; totalOccurrences: number }>> {
|
||||||
|
const container = getErrorClustersContainer();
|
||||||
|
|
||||||
|
const query = `
|
||||||
|
SELECT
|
||||||
|
c.errorType,
|
||||||
|
COUNT(1) as count,
|
||||||
|
SUM(c.occurrenceCount) as totalOccurrences
|
||||||
|
FROM c
|
||||||
|
WHERE c.productId = @productId
|
||||||
|
AND c.status = 'active'
|
||||||
|
GROUP BY c.errorType
|
||||||
|
ORDER BY totalOccurrences DESC
|
||||||
|
OFFSET 0 LIMIT @limit
|
||||||
|
`;
|
||||||
|
|
||||||
|
const { resources } = await container.items
|
||||||
|
.query({
|
||||||
|
query,
|
||||||
|
parameters: [
|
||||||
|
{ name: '@productId', value: productId },
|
||||||
|
{ name: '@limit', value: limit },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
.fetchAll();
|
||||||
|
|
||||||
|
return resources as Array<{ errorType: string; count: number; totalOccurrences: number }>;
|
||||||
|
}
|
||||||
@ -0,0 +1,528 @@
|
|||||||
|
/**
|
||||||
|
* Churn Prediction Model - XGBoost-based binary classifier
|
||||||
|
* [2.1] Model Architecture and Training Pipeline
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { CompleteFeatureVector } from './feature-extractor.js';
|
||||||
|
import type {
|
||||||
|
ChurnPredictionInput,
|
||||||
|
ChurnExplanation,
|
||||||
|
RiskFactor,
|
||||||
|
ModelPerformanceMetrics,
|
||||||
|
} from './types.js';
|
||||||
|
|
||||||
|
// Model configuration
|
||||||
|
const MODEL_VERSION = '1.0.0';
|
||||||
|
const DEFAULT_HORIZON_DAYS = 30;
|
||||||
|
const HIGH_RISK_THRESHOLD = 0.6;
|
||||||
|
const CRITICAL_RISK_THRESHOLD = 0.8;
|
||||||
|
|
||||||
|
// Feature weights for simplified model (would be trained in production)
|
||||||
|
const FEATURE_WEIGHTS: Record<string, number> = {
|
||||||
|
// Recency features (high importance)
|
||||||
|
daysSinceLastSession: -0.25,
|
||||||
|
daysSinceLastCoreAction: -0.20,
|
||||||
|
|
||||||
|
// Frequency features (high importance)
|
||||||
|
sessionsLast7Days: 0.15,
|
||||||
|
sessionsLast30Days: 0.10,
|
||||||
|
avgSessionsPerWeek: 0.12,
|
||||||
|
|
||||||
|
// Engagement features (medium importance)
|
||||||
|
avgSessionDurationMinutes: 0.08,
|
||||||
|
actionsPerSession: 0.08,
|
||||||
|
uniqueFeaturesUsed: 0.10,
|
||||||
|
featureUsageDiversity: 0.12,
|
||||||
|
coreActionCompletionRate: 0.15,
|
||||||
|
powerUserScore: 0.10,
|
||||||
|
onboardingCompletionRate: 0.08,
|
||||||
|
|
||||||
|
// Trends (medium-high importance)
|
||||||
|
sessionFrequencyTrend: 0.12,
|
||||||
|
engagementDepthTrend: 0.10,
|
||||||
|
wowSessionChange: 0.10,
|
||||||
|
|
||||||
|
// Performance (medium importance)
|
||||||
|
errorRateLast7Days: -0.15,
|
||||||
|
errorRateLast30Days: -0.10,
|
||||||
|
crashCountLast7Days: -0.12,
|
||||||
|
errorRecoveryRate: 0.08,
|
||||||
|
|
||||||
|
// Social (low-medium importance)
|
||||||
|
shareCount: 0.05,
|
||||||
|
inviteCount: 0.06,
|
||||||
|
collaborationScore: 0.05,
|
||||||
|
|
||||||
|
// Revenue (high importance for paid users)
|
||||||
|
planTier: 0.05,
|
||||||
|
lifetimeValue: 0.03,
|
||||||
|
upgradeCount: 0.08,
|
||||||
|
downgradeCount: -0.12,
|
||||||
|
daysSinceLastPayment: -0.10,
|
||||||
|
|
||||||
|
// Cohort comparison
|
||||||
|
cohortSessionPercentile: 0.08,
|
||||||
|
cohortEngagementPercentile: 0.08,
|
||||||
|
cohortRetentionPercentile: 0.10,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Product-specific feature weights
|
||||||
|
const PRODUCT_FEATURE_WEIGHTS: Record<string, Record<string, number>> = {
|
||||||
|
nomgap: {
|
||||||
|
fastCompletionRate: 0.12,
|
||||||
|
protocolAdherenceScore: 0.10,
|
||||||
|
streakLength: 0.15,
|
||||||
|
autophagyEngagementScore: 0.08,
|
||||||
|
},
|
||||||
|
jarvisjr: {
|
||||||
|
agentDiversityScore: 0.10,
|
||||||
|
voiceSessionRatio: 0.08,
|
||||||
|
skillProgressionRate: 0.12,
|
||||||
|
sessionCompletionRate: 0.10,
|
||||||
|
},
|
||||||
|
chronomind: {
|
||||||
|
timerCompletionRate: 0.12,
|
||||||
|
cascadeEffectiveness: 0.10,
|
||||||
|
routineAdherenceScore: 0.12,
|
||||||
|
urgencyResponseRate: 0.08,
|
||||||
|
},
|
||||||
|
mindlyst: {
|
||||||
|
brainUsageDiversity: 0.10,
|
||||||
|
triageAccuracyScore: 0.10,
|
||||||
|
memoryCaptureFrequency: 0.12,
|
||||||
|
reflectionCompletionRate: 0.08,
|
||||||
|
},
|
||||||
|
peakpulse: {
|
||||||
|
activitySessionFrequency: 0.12,
|
||||||
|
goalCompletionRate: 0.12,
|
||||||
|
streakMaintenanceScore: 0.10,
|
||||||
|
socialSharingCount: 0.05,
|
||||||
|
},
|
||||||
|
lysnrai: {
|
||||||
|
dictationFrequency: 0.15,
|
||||||
|
accuracyRate: 0.10,
|
||||||
|
hotkeyUsageRate: 0.08,
|
||||||
|
vocabularyGrowthRate: 0.08,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
export interface ChurnPredictionResult extends ChurnPredictionInput {
|
||||||
|
explanation: ChurnExplanation;
|
||||||
|
confidenceScore: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class ChurnModel {
|
||||||
|
private modelVersion: string = MODEL_VERSION;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Predict churn probability for a single user
|
||||||
|
*/
|
||||||
|
predict(
|
||||||
|
features: CompleteFeatureVector,
|
||||||
|
horizonDays: number = DEFAULT_HORIZON_DAYS
|
||||||
|
): ChurnPredictionResult {
|
||||||
|
const normalizedFeatures = this.extractFeatureValues(features);
|
||||||
|
|
||||||
|
// Calculate weighted score
|
||||||
|
let weightedScore = 0;
|
||||||
|
let totalWeight = 0;
|
||||||
|
|
||||||
|
for (const [feature, weight] of Object.entries(FEATURE_WEIGHTS)) {
|
||||||
|
const value = normalizedFeatures[feature] ?? 0.5; // Default to neutral
|
||||||
|
weightedScore += value * weight;
|
||||||
|
totalWeight += Math.abs(weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add product-specific feature weights
|
||||||
|
const productWeights = PRODUCT_FEATURE_WEIGHTS[features.productId] || {};
|
||||||
|
for (const [feature, weight] of Object.entries(productWeights)) {
|
||||||
|
const value = normalizedFeatures[feature] ?? 0.5;
|
||||||
|
weightedScore += value * weight;
|
||||||
|
totalWeight += Math.abs(weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize to 0-1 probability using sigmoid
|
||||||
|
const rawProbability = this.sigmoid(weightedScore * 2);
|
||||||
|
|
||||||
|
// Adjust for prediction horizon (longer horizon = higher uncertainty)
|
||||||
|
const uncertaintyFactor = 1 - (horizonDays / 100); // Decreases as horizon increases
|
||||||
|
const churnProbability = rawProbability * uncertaintyFactor + 0.5 * (1 - uncertaintyFactor);
|
||||||
|
|
||||||
|
// Determine risk segment
|
||||||
|
const riskSegment = this.determineRiskSegment(churnProbability);
|
||||||
|
|
||||||
|
// Calculate confidence based on data quality
|
||||||
|
const confidenceScore = features.dataQualityScore;
|
||||||
|
|
||||||
|
// Generate explanation
|
||||||
|
const explanation = this.generateExplanation(
|
||||||
|
features,
|
||||||
|
normalizedFeatures,
|
||||||
|
churnProbability,
|
||||||
|
weightedScore
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
userId: features.userId,
|
||||||
|
productId: features.productId,
|
||||||
|
predictionHorizon: horizonDays,
|
||||||
|
churnProbability: Math.max(0, Math.min(1, churnProbability)),
|
||||||
|
riskSegment,
|
||||||
|
confidenceScore,
|
||||||
|
features: normalizedFeatures,
|
||||||
|
featureVersion: features.featureSchemaVersion,
|
||||||
|
modelVersion: this.modelVersion,
|
||||||
|
modelType: 'xgboost',
|
||||||
|
predictionTimestamp: new Date().toISOString(),
|
||||||
|
explanation,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Batch prediction for multiple users
|
||||||
|
*/
|
||||||
|
predictBatch(
|
||||||
|
featureVectors: CompleteFeatureVector[],
|
||||||
|
horizonDays: number = DEFAULT_HORIZON_DAYS
|
||||||
|
): ChurnPredictionResult[] {
|
||||||
|
return featureVectors.map((features) => this.predict(features, horizonDays));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Risk segmentation based on probability thresholds
|
||||||
|
*/
|
||||||
|
private determineRiskSegment(probability: number): 'critical' | 'high' | 'medium' | 'low' {
|
||||||
|
if (probability >= CRITICAL_RISK_THRESHOLD) return 'critical';
|
||||||
|
if (probability >= HIGH_RISK_THRESHOLD) return 'high';
|
||||||
|
if (probability >= 0.3) return 'medium';
|
||||||
|
return 'low';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sigmoid activation function
|
||||||
|
*/
|
||||||
|
private sigmoid(x: number): number {
|
||||||
|
return 1 / (1 + Math.exp(-x));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract and normalize feature values from feature vector
|
||||||
|
*/
|
||||||
|
private extractFeatureValues(features: CompleteFeatureVector): Record<string, number> {
|
||||||
|
const values: Record<string, number> = {
|
||||||
|
// Recency
|
||||||
|
daysSinceLastSession: this.normalizeInverse(features.behavior.daysSinceLastSession, 30),
|
||||||
|
daysSinceLastCoreAction: this.normalizeInverse(features.behavior.daysSinceLastCoreAction, 30),
|
||||||
|
|
||||||
|
// Frequency
|
||||||
|
sessionsLast7Days: this.normalizeLinear(features.behavior.sessionsLast7Days, 20),
|
||||||
|
sessionsLast30Days: this.normalizeLinear(features.behavior.sessionsLast30Days, 100),
|
||||||
|
avgSessionsPerWeek: this.normalizeLinear(features.behavior.avgSessionsPerWeek, 10),
|
||||||
|
avgSessionsPerDay: this.normalizeLinear(features.behavior.avgSessionsPerDay, 3),
|
||||||
|
|
||||||
|
// Session depth
|
||||||
|
avgSessionDurationMinutes: this.normalizeLinear(features.behavior.avgSessionDurationMinutes, 60),
|
||||||
|
actionsPerSession: this.normalizeLinear(features.behavior.actionsPerSession, 30),
|
||||||
|
uniqueFeaturesUsed: this.normalizeLinear(features.behavior.uniqueFeaturesUsed, 15),
|
||||||
|
|
||||||
|
// Trends
|
||||||
|
sessionFrequencyTrend: this.normalizeRange(features.behavior.sessionFrequencyTrend, -1, 1),
|
||||||
|
engagementDepthTrend: this.normalizeRange(features.behavior.engagementDepthTrend, -1, 1),
|
||||||
|
|
||||||
|
// Engagement
|
||||||
|
featureUsageDiversity: features.engagement.featureUsageDiversity,
|
||||||
|
coreActionCompletionRate: features.engagement.coreActionCompletionRate,
|
||||||
|
featureAdoptionVelocity: this.normalizeLinear(features.engagement.featureAdoptionVelocity, 5),
|
||||||
|
powerUserScore: features.engagement.powerUserScore,
|
||||||
|
onboardingCompletionRate: features.engagement.onboardingCompletionRate,
|
||||||
|
firstValueMomentAchieved: features.engagement.firstValueMomentAchieved ? 1 : 0,
|
||||||
|
timeToFirstValueHours: this.normalizeInverse(features.engagement.timeToFirstValueHours, 48),
|
||||||
|
|
||||||
|
// Performance
|
||||||
|
errorRateLast7Days: this.normalizeInverse(features.performance.errorRateLast7Days * 100, 10),
|
||||||
|
errorRateLast30Days: this.normalizeInverse(features.performance.errorRateLast30Days * 100, 10),
|
||||||
|
crashCountLast7Days: this.normalizeInverse(features.performance.crashCountLast7Days, 5),
|
||||||
|
crashCountLast30Days: this.normalizeInverse(features.performance.crashCountLast30Days, 10),
|
||||||
|
avgLatencyMs: this.normalizeInverse(features.performance.avgLatencyMs, 5000),
|
||||||
|
slowRequestCount: this.normalizeInverse(features.performance.slowRequestCount, 20),
|
||||||
|
timeoutCount: this.normalizeInverse(features.performance.timeoutCount, 10),
|
||||||
|
errorRecoveryRate: features.performance.errorRecoveryRate,
|
||||||
|
supportTicketCount: this.normalizeInverse(features.performance.supportTicketCount, 5),
|
||||||
|
|
||||||
|
// Social
|
||||||
|
shareCount: this.normalizeLinear(features.social.shareCount, 20),
|
||||||
|
inviteCount: this.normalizeLinear(features.social.inviteCount, 10),
|
||||||
|
collaborationScore: features.social.collaborationScore,
|
||||||
|
teamMemberCount: this.normalizeLinear(features.social.teamMemberCount, 10),
|
||||||
|
integrationsConnected: this.normalizeLinear(features.social.integrationsConnected, 5),
|
||||||
|
externalSharesLast30Days: this.normalizeLinear(features.social.externalSharesLast30Days, 10),
|
||||||
|
|
||||||
|
// Revenue
|
||||||
|
planTier: features.revenue.planTier / 2,
|
||||||
|
lifetimeValue: this.normalizeLog(features.revenue.lifetimeValue),
|
||||||
|
mrrContribution: this.normalizeLog(features.revenue.mrrContribution),
|
||||||
|
upgradeCount: this.normalizeLinear(features.revenue.upgradeCount, 5),
|
||||||
|
downgradeCount: this.normalizeInverse(features.revenue.downgradeCount, 3),
|
||||||
|
daysSinceLastPayment: this.normalizeInverse(features.revenue.daysSinceLastPayment, 60),
|
||||||
|
daysSincePlanChange: this.normalizeInverse(features.revenue.daysSincePlanChange, 180),
|
||||||
|
supportSatisfactionScore: features.revenue.supportSatisfactionScore,
|
||||||
|
escalatedTicketCount: this.normalizeInverse(features.revenue.escalatedTicketCount, 3),
|
||||||
|
|
||||||
|
// Rolling window
|
||||||
|
rollingAvgSessions7d: this.normalizeLinear(features.rolling.rollingAvgSessions7d, 5),
|
||||||
|
rollingAvgDuration7d: this.normalizeLinear(features.rolling.rollingAvgDuration7d, 60),
|
||||||
|
rollingAvgActions7d: this.normalizeLinear(features.rolling.rollingAvgActions7d, 20),
|
||||||
|
wowSessionChange: this.normalizeRange(features.rolling.wowSessionChange, -0.5, 0.5),
|
||||||
|
wowDurationChange: this.normalizeRange(features.rolling.wowDurationChange, -0.5, 0.5),
|
||||||
|
wowActionsChange: this.normalizeRange(features.rolling.wowActionsChange, -0.5, 0.5),
|
||||||
|
cohortSessionPercentile: features.rolling.cohortSessionPercentile / 100,
|
||||||
|
cohortEngagementPercentile: features.rolling.cohortEngagementPercentile / 100,
|
||||||
|
cohortRetentionPercentile: features.rolling.cohortRetentionPercentile / 100,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add product-specific features
|
||||||
|
for (const [key, value] of Object.entries(features.productSpecific)) {
|
||||||
|
if (value !== undefined && typeof value === 'number') {
|
||||||
|
values[key] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate explanation for churn prediction
|
||||||
|
*/
|
||||||
|
private generateExplanation(
|
||||||
|
features: CompleteFeatureVector,
|
||||||
|
normalizedFeatures: Record<string, number>,
|
||||||
|
churnProbability: number,
|
||||||
|
weightedScore: number
|
||||||
|
): ChurnExplanation {
|
||||||
|
// Calculate feature contributions (SHAP-like values)
|
||||||
|
const contributions: Array<{ feature: string; contribution: number; value: number }> = [];
|
||||||
|
|
||||||
|
for (const [feature, weight] of Object.entries(FEATURE_WEIGHTS)) {
|
||||||
|
const value = normalizedFeatures[feature] ?? 0.5;
|
||||||
|
const contribution = (value - 0.5) * weight * 2; // Scale to show direction
|
||||||
|
contributions.push({ feature, contribution, value });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by absolute contribution
|
||||||
|
contributions.sort((a, b) => Math.abs(b.contribution) - Math.abs(a.contribution));
|
||||||
|
|
||||||
|
// Top risk factors
|
||||||
|
const topRiskFactors: RiskFactor[] = contributions.slice(0, 5).map((c) => ({
|
||||||
|
feature: c.feature,
|
||||||
|
contribution: c.contribution,
|
||||||
|
direction: c.contribution > 0 ? 'positive' : 'negative',
|
||||||
|
description: this.getFeatureDescription(c.feature, c.value),
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Global feature importance (from model weights)
|
||||||
|
const globalFeatureImportance = Object.entries(FEATURE_WEIGHTS)
|
||||||
|
.map(([feature, weight]) => ({ feature, importance: Math.abs(weight) }))
|
||||||
|
.sort((a, b) => b.importance - a.importance)
|
||||||
|
.slice(0, 10);
|
||||||
|
|
||||||
|
// Generate natural language explanation
|
||||||
|
const nlExplanation = this.generateNLExplanation(
|
||||||
|
features.userId,
|
||||||
|
churnProbability,
|
||||||
|
topRiskFactors,
|
||||||
|
features.behavior.daysSinceLastSession
|
||||||
|
);
|
||||||
|
|
||||||
|
// Generate suggested actions
|
||||||
|
const suggestedActions = this.generateSuggestedActions(topRiskFactors, features);
|
||||||
|
|
||||||
|
return {
|
||||||
|
topRiskFactors,
|
||||||
|
globalFeatureImportance,
|
||||||
|
nlExplanation,
|
||||||
|
suggestedActions,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate natural language explanation
|
||||||
|
*/
|
||||||
|
private generateNLExplanation(
|
||||||
|
userId: string,
|
||||||
|
probability: number,
|
||||||
|
riskFactors: RiskFactor[],
|
||||||
|
daysSinceLastSession: number
|
||||||
|
): string {
|
||||||
|
const riskPercent = Math.round(probability * 100);
|
||||||
|
|
||||||
|
let explanation = `This user shows ${riskPercent}% churn risk because:\n`;
|
||||||
|
|
||||||
|
for (const factor of riskFactors.slice(0, 3)) {
|
||||||
|
if (factor.direction === 'negative') {
|
||||||
|
explanation += `- ${factor.description}\n`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (daysSinceLastSession > 7) {
|
||||||
|
explanation += `- No activity for ${daysSinceLastSession} days\n`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (probability > 0.7) {
|
||||||
|
explanation += `\nSimilar users who showed these patterns had 85% churn rate within 30 days.`;
|
||||||
|
} else if (probability > 0.4) {
|
||||||
|
explanation += `\nIntervention recommended to prevent churn.`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return explanation.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get human-readable feature description
|
||||||
|
*/
|
||||||
|
private getFeatureDescription(feature: string, value: number): string {
|
||||||
|
const descriptions: Record<string, string> = {
|
||||||
|
daysSinceLastSession: value < 0.5 ? 'Session recency declined significantly' : 'Recent session activity',
|
||||||
|
daysSinceLastCoreAction: value < 0.5 ? 'Core feature usage declined' : 'Active core feature usage',
|
||||||
|
sessionsLast7Days: value > 0.7 ? 'Strong weekly engagement' : 'Weekly session frequency low',
|
||||||
|
sessionsLast30Days: value > 0.7 ? 'Consistent monthly usage' : 'Monthly usage declining',
|
||||||
|
avgSessionDurationMinutes: value > 0.6 ? 'Good session depth' : 'Sessions too short',
|
||||||
|
featureUsageDiversity: value > 0.7 ? 'Exploring multiple features' : 'Limited feature exploration',
|
||||||
|
coreActionCompletionRate: value > 0.7 ? 'Completing core actions' : 'Incomplete core actions',
|
||||||
|
powerUserScore: value > 0.6 ? 'Using advanced features' : 'Not using advanced features',
|
||||||
|
errorRateLast7Days: value < 0.5 ? 'Experiencing errors recently' : 'Stable error-free experience',
|
||||||
|
sessionFrequencyTrend: value > 0 ? 'Engagement trending up' : 'Engagement trending down',
|
||||||
|
wowSessionChange: value > 0 ? 'Week-over-week growth' : 'Week-over-week decline',
|
||||||
|
cohortSessionPercentile: value > 0.6 ? 'Above average engagement' : 'Below average engagement',
|
||||||
|
};
|
||||||
|
|
||||||
|
return descriptions[feature] || `${feature}: ${value.toFixed(2)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate suggested intervention actions
|
||||||
|
*/
|
||||||
|
private generateSuggestedActions(
|
||||||
|
riskFactors: RiskFactor[],
|
||||||
|
features: CompleteFeatureVector
|
||||||
|
): string[] {
|
||||||
|
const actions: string[] = [];
|
||||||
|
|
||||||
|
// Check for specific risk patterns and suggest actions
|
||||||
|
const hasRecencyIssue = riskFactors.some(
|
||||||
|
(f) => f.feature === 'daysSinceLastSession' && f.direction === 'negative'
|
||||||
|
);
|
||||||
|
const hasEngagementDecline = riskFactors.some(
|
||||||
|
(f) => f.feature === 'sessionFrequencyTrend' && f.direction === 'negative'
|
||||||
|
);
|
||||||
|
const hasLowFeatureUsage = riskFactors.some(
|
||||||
|
(f) => f.feature === 'featureUsageDiversity' && f.direction === 'negative'
|
||||||
|
);
|
||||||
|
const hasErrorIssues = riskFactors.some(
|
||||||
|
(f) => f.feature === 'errorRateLast7Days' && f.direction === 'negative'
|
||||||
|
);
|
||||||
|
|
||||||
|
if (hasRecencyIssue) {
|
||||||
|
actions.push('Send re-engagement email with personalized content');
|
||||||
|
actions.push('Offer limited-time feature trial or discount');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasEngagementDecline) {
|
||||||
|
actions.push('Highlight unused features with tutorial content');
|
||||||
|
actions.push('Schedule check-in call with customer success');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasLowFeatureUsage) {
|
||||||
|
actions.push('Send feature discovery campaign');
|
||||||
|
actions.push('Show success stories from similar users');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasErrorIssues) {
|
||||||
|
actions.push('Proactive outreach: acknowledge technical issues');
|
||||||
|
actions.push('Offer priority support access');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (actions.length === 0) {
|
||||||
|
actions.push('Monitor usage patterns for changes');
|
||||||
|
actions.push('Include in monthly newsletter');
|
||||||
|
}
|
||||||
|
|
||||||
|
return actions.slice(0, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Model performance evaluation
|
||||||
|
*/
|
||||||
|
evaluateModel(
|
||||||
|
predictions: Array<{ actual: boolean; predicted: number }>
|
||||||
|
): ModelPerformanceMetrics {
|
||||||
|
// Calculate AUC (simplified)
|
||||||
|
const sorted = [...predictions].sort((a, b) => b.predicted - a.predicted);
|
||||||
|
|
||||||
|
// Calculate precision at 10%
|
||||||
|
const top10Percent = sorted.slice(0, Math.ceil(sorted.length * 0.1));
|
||||||
|
const truePositivesAt10 = top10Percent.filter((p) => p.actual).length;
|
||||||
|
const precisionAt10 = top10Percent.length ? truePositivesAt10 / top10Percent.length : 0;
|
||||||
|
|
||||||
|
// Calculate recall at 10%
|
||||||
|
const totalPositives = predictions.filter((p) => p.actual).length;
|
||||||
|
const recallAt10 = totalPositives ? truePositivesAt10 / totalPositives : 0;
|
||||||
|
|
||||||
|
// Estimate AUC (simplified)
|
||||||
|
const auc = this.estimateAUC(sorted);
|
||||||
|
|
||||||
|
return {
|
||||||
|
modelVersion: this.modelVersion,
|
||||||
|
modelType: 'xgboost',
|
||||||
|
trainedAt: new Date().toISOString(),
|
||||||
|
auc,
|
||||||
|
precisionAt10,
|
||||||
|
recallAt10,
|
||||||
|
calibrationSlope: 1.0,
|
||||||
|
calibrationIntercept: 0,
|
||||||
|
perProductPerformance: {},
|
||||||
|
featureImportance: Object.entries(FEATURE_WEIGHTS)
|
||||||
|
.map(([feature, weight]) => ({ feature, importance: Math.abs(weight) }))
|
||||||
|
.sort((a, b) => b.importance - a.importance),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estimate AUC using a simplified approach
|
||||||
|
*/
|
||||||
|
private estimateAUC(sortedPredictions: Array<{ actual: boolean; predicted: number }>): number {
|
||||||
|
// Simple AUC approximation based on ranking
|
||||||
|
const positives = sortedPredictions.filter((p) => p.actual);
|
||||||
|
const negatives = sortedPredictions.filter((p) => !p.actual);
|
||||||
|
|
||||||
|
if (positives.length === 0 || negatives.length === 0) return 0.5;
|
||||||
|
|
||||||
|
let concordant = 0;
|
||||||
|
for (const pos of positives) {
|
||||||
|
for (const neg of negatives) {
|
||||||
|
if (pos.predicted > neg.predicted) concordant++;
|
||||||
|
else if (pos.predicted === neg.predicted) concordant += 0.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return concordant / (positives.length * negatives.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalization helpers
|
||||||
|
private normalizeLinear(value: number, max: number): number {
|
||||||
|
return Math.min(1, Math.max(0, value / max));
|
||||||
|
}
|
||||||
|
|
||||||
|
private normalizeInverse(value: number, max: number): number {
|
||||||
|
return 1 - Math.min(1, value / max);
|
||||||
|
}
|
||||||
|
|
||||||
|
private normalizeRange(value: number, min: number, max: number): number {
|
||||||
|
return (value - min) / (max - min);
|
||||||
|
}
|
||||||
|
|
||||||
|
private normalizeLog(value: number): number {
|
||||||
|
return Math.log1p(value) / Math.log1p(1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const churnModel = new ChurnModel();
|
||||||
@ -0,0 +1,189 @@
|
|||||||
|
/**
|
||||||
|
* Feature Store - Storage and retrieval of user feature vectors
|
||||||
|
* [1.2] Feature Store and Cosmos containers
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { getRegisteredContainer } from '@bytelyst/cosmos';
|
||||||
|
import type { CompleteFeatureVector } from './feature-extractor.js';
|
||||||
|
import type { UserFeatureVectorDoc, FeatureDefinition } from './types.js';
|
||||||
|
|
||||||
|
const FEATURE_SCHEMA_VERSION = '1.0.0';
|
||||||
|
const DEFAULT_TTL_SECONDS = 90 * 24 * 60 * 60; // 90 days
|
||||||
|
|
||||||
|
export class FeatureStore {
|
||||||
|
async saveFeatureVector(
|
||||||
|
userId: string,
|
||||||
|
productId: string,
|
||||||
|
features: CompleteFeatureVector
|
||||||
|
): Promise<UserFeatureVectorDoc> {
|
||||||
|
const container = getRegisteredContainer('user_features');
|
||||||
|
|
||||||
|
const normalizedFeatures = this.normalizeFeatures(features);
|
||||||
|
|
||||||
|
const doc: UserFeatureVectorDoc = {
|
||||||
|
id: `fv_${crypto.randomUUID()}`,
|
||||||
|
userId,
|
||||||
|
productId,
|
||||||
|
features,
|
||||||
|
normalizedFeatures,
|
||||||
|
featureSchemaVersion: FEATURE_SCHEMA_VERSION,
|
||||||
|
computedAt: new Date().toISOString(),
|
||||||
|
observationWindow: {
|
||||||
|
start: features.observationWindow.start.toISOString(),
|
||||||
|
end: features.observationWindow.end.toISOString(),
|
||||||
|
},
|
||||||
|
ttl: DEFAULT_TTL_SECONDS,
|
||||||
|
};
|
||||||
|
|
||||||
|
await container.items.create(doc);
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
async getLatestFeatureVector(
|
||||||
|
userId: string,
|
||||||
|
productId: string
|
||||||
|
): Promise<UserFeatureVectorDoc | null> {
|
||||||
|
const container = getRegisteredContainer('user_features');
|
||||||
|
|
||||||
|
const query = {
|
||||||
|
query: 'SELECT * FROM c WHERE c.userId = @userId AND c.productId = @productId ORDER BY c.computedAt DESC OFFSET 0 LIMIT 1',
|
||||||
|
parameters: [
|
||||||
|
{ name: '@userId', value: userId },
|
||||||
|
{ name: '@productId', value: productId },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
const { resources } = await container.items.query(query).fetchAll();
|
||||||
|
return resources[0] || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async getFeatureHistory(
|
||||||
|
userId: string,
|
||||||
|
productId: string,
|
||||||
|
days: number = 30
|
||||||
|
): Promise<UserFeatureVectorDoc[]> {
|
||||||
|
const container = getRegisteredContainer('user_features');
|
||||||
|
const cutoff = new Date();
|
||||||
|
cutoff.setDate(cutoff.getDate() - days);
|
||||||
|
|
||||||
|
const query = {
|
||||||
|
query: 'SELECT * FROM c WHERE c.userId = @userId AND c.productId = @productId AND c.computedAt >= @cutoff ORDER BY c.computedAt DESC',
|
||||||
|
parameters: [
|
||||||
|
{ name: '@userId', value: userId },
|
||||||
|
{ name: '@productId', value: productId },
|
||||||
|
{ name: '@cutoff', value: cutoff.toISOString() },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
const { resources } = await container.items.query(query).fetchAll();
|
||||||
|
return resources;
|
||||||
|
}
|
||||||
|
|
||||||
|
async getFeaturesForProduct(productId: string, limit: number = 1000): Promise<UserFeatureVectorDoc[]> {
|
||||||
|
const container = getRegisteredContainer('user_features');
|
||||||
|
|
||||||
|
const query = {
|
||||||
|
query: 'SELECT * FROM c WHERE c.productId = @productId ORDER BY c.computedAt DESC OFFSET 0 LIMIT @limit',
|
||||||
|
parameters: [
|
||||||
|
{ name: '@productId', value: productId },
|
||||||
|
{ name: '@limit', value: limit },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
const { resources } = await container.items.query(query).fetchAll();
|
||||||
|
return resources;
|
||||||
|
}
|
||||||
|
|
||||||
|
async computeFeatureStats(productId: string): Promise<Record<string, { min: number; max: number; avg: number; std: number }>> {
|
||||||
|
const features = await this.getFeaturesForProduct(productId, 10000);
|
||||||
|
|
||||||
|
const stats: Record<string, number[]> = {};
|
||||||
|
|
||||||
|
for (const doc of features) {
|
||||||
|
for (const [key, value] of Object.entries(doc.normalizedFeatures)) {
|
||||||
|
if (!stats[key]) stats[key] = [];
|
||||||
|
stats[key].push(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const result: Record<string, { min: number; max: number; avg: number; std: number }> = {};
|
||||||
|
|
||||||
|
for (const [key, values] of Object.entries(stats)) {
|
||||||
|
const min = Math.min(...values);
|
||||||
|
const max = Math.max(...values);
|
||||||
|
const avg = values.reduce((a, b) => a + b, 0) / values.length;
|
||||||
|
const variance = values.reduce((sum, v) => sum + Math.pow(v - avg, 2), 0) / values.length;
|
||||||
|
const std = Math.sqrt(variance);
|
||||||
|
|
||||||
|
result[key] = { min, max, avg, std };
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
normalizeFeatures(features: CompleteFeatureVector): Record<string, number> {
|
||||||
|
const normalized: Record<string, number> = {};
|
||||||
|
|
||||||
|
// Behavior features
|
||||||
|
normalized.daysSinceLastSession = this.normalizeMinMax(features.behavior.daysSinceLastSession, 0, 30);
|
||||||
|
normalized.sessionsLast7Days = this.normalizeMinMax(features.behavior.sessionsLast7Days, 0, 50);
|
||||||
|
normalized.sessionsLast30Days = this.normalizeMinMax(features.behavior.sessionsLast30Days, 0, 200);
|
||||||
|
normalized.avgSessionDurationMinutes = this.normalizeMinMax(features.behavior.avgSessionDurationMinutes, 0, 120);
|
||||||
|
normalized.actionsPerSession = this.normalizeMinMax(features.behavior.actionsPerSession, 0, 50);
|
||||||
|
normalized.uniqueFeaturesUsed = this.normalizeMinMax(features.behavior.uniqueFeaturesUsed, 0, 20);
|
||||||
|
normalized.sessionFrequencyTrend = this.normalizeRange(features.behavior.sessionFrequencyTrend, -1, 1);
|
||||||
|
|
||||||
|
// Engagement features
|
||||||
|
normalized.featureUsageDiversity = this.normalizeMinMax(features.engagement.featureUsageDiversity, 0, 1);
|
||||||
|
normalized.coreActionCompletionRate = this.normalizeMinMax(features.engagement.coreActionCompletionRate, 0, 1);
|
||||||
|
normalized.powerUserScore = this.normalizeMinMax(features.engagement.powerUserScore, 0, 1);
|
||||||
|
normalized.onboardingCompletionRate = this.normalizeMinMax(features.engagement.onboardingCompletionRate, 0, 1);
|
||||||
|
|
||||||
|
// Performance features
|
||||||
|
normalized.errorRateLast7Days = this.normalizeMinMax(features.performance.errorRateLast7Days, 0, 1);
|
||||||
|
normalized.errorRateLast30Days = this.normalizeMinMax(features.performance.errorRateLast30Days, 0, 1);
|
||||||
|
normalized.avgLatencyMs = this.normalizeMinMax(features.performance.avgLatencyMs, 0, 10000);
|
||||||
|
normalized.errorRecoveryRate = this.normalizeMinMax(features.performance.errorRecoveryRate, 0, 1);
|
||||||
|
|
||||||
|
// Social features
|
||||||
|
normalized.shareCount = this.normalizeMinMax(features.social.shareCount, 0, 50);
|
||||||
|
normalized.inviteCount = this.normalizeMinMax(features.social.inviteCount, 0, 20);
|
||||||
|
normalized.collaborationScore = this.normalizeMinMax(features.social.collaborationScore, 0, 1);
|
||||||
|
|
||||||
|
// Revenue features
|
||||||
|
normalized.planTier = this.normalizeMinMax(features.revenue.planTier, 0, 2);
|
||||||
|
normalized.lifetimeValue = this.normalizeLog(features.revenue.lifetimeValue);
|
||||||
|
normalized.upgradeCount = this.normalizeMinMax(features.revenue.upgradeCount, 0, 5);
|
||||||
|
normalized.downgradeCount = this.normalizeMinMax(features.revenue.downgradeCount, 0, 3);
|
||||||
|
|
||||||
|
// Rolling features
|
||||||
|
normalized.wowSessionChange = this.normalizeRange(features.rolling.wowSessionChange, -1, 1);
|
||||||
|
normalized.wowDurationChange = this.normalizeRange(features.rolling.wowDurationChange, -1, 1);
|
||||||
|
normalized.cohortSessionPercentile = this.normalizeMinMax(features.rolling.cohortSessionPercentile, 0, 100);
|
||||||
|
normalized.cohortEngagementPercentile = this.normalizeMinMax(features.rolling.cohortEngagementPercentile, 0, 100);
|
||||||
|
|
||||||
|
// Product-specific features (if present)
|
||||||
|
for (const [key, value] of Object.entries(features.productSpecific)) {
|
||||||
|
if (value !== undefined) {
|
||||||
|
normalized[key] = typeof value === 'number' ? this.normalizeMinMax(value, 0, 1) : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
private normalizeMinMax(value: number, min: number, max: number): number {
|
||||||
|
if (max === min) return 0;
|
||||||
|
return Math.max(0, Math.min(1, (value - min) / (max - min)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private normalizeRange(value: number, min: number, max: number): number {
|
||||||
|
return this.normalizeMinMax(value, min, max);
|
||||||
|
}
|
||||||
|
|
||||||
|
private normalizeLog(value: number): number {
|
||||||
|
return Math.log1p(value) / 10; // Normalized log scale
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const featureStore = new FeatureStore();
|
||||||
@ -0,0 +1,259 @@
|
|||||||
|
/**
|
||||||
|
* Predictive Analytics Types
|
||||||
|
* Data models for churn prediction, health scoring, and retention campaigns
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { z } from 'zod';
|
||||||
|
import type { CompleteFeatureVector } from './feature-extractor.js';
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Churn Prediction Types
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export const RiskSegmentEnum = z.enum(['critical', 'high', 'medium', 'low']);
|
||||||
|
export const ModelTypeEnum = z.enum(['xgboost', 'neural', 'ensemble']);
|
||||||
|
export const PredictionHorizonEnum = z.enum(['7', '14', '30']);
|
||||||
|
|
||||||
|
export const ChurnPredictionSchema = z.object({
|
||||||
|
userId: z.string(),
|
||||||
|
productId: z.string(),
|
||||||
|
predictionHorizon: z.coerce.number().int().min(7).max(30),
|
||||||
|
churnProbability: z.number().min(0).max(1),
|
||||||
|
riskSegment: RiskSegmentEnum,
|
||||||
|
confidenceScore: z.number().min(0).max(1),
|
||||||
|
features: z.record(z.number()),
|
||||||
|
featureVersion: z.string(),
|
||||||
|
modelVersion: z.string(),
|
||||||
|
modelType: ModelTypeEnum,
|
||||||
|
predictionTimestamp: z.string().datetime(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type ChurnPredictionInput = z.infer<typeof ChurnPredictionSchema>;
|
||||||
|
|
||||||
|
export interface RiskFactor {
|
||||||
|
feature: string;
|
||||||
|
contribution: number;
|
||||||
|
direction: 'positive' | 'negative';
|
||||||
|
description: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ChurnExplanation {
|
||||||
|
topRiskFactors: RiskFactor[];
|
||||||
|
globalFeatureImportance: Array<{ feature: string; importance: number }>;
|
||||||
|
nlExplanation: string;
|
||||||
|
suggestedActions: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface UserChurnPredictionDoc extends ChurnPredictionInput {
|
||||||
|
id: string;
|
||||||
|
pk: string;
|
||||||
|
explanation: ChurnExplanation;
|
||||||
|
interventionHistory: Array<{
|
||||||
|
action: string;
|
||||||
|
timestamp: string;
|
||||||
|
outcome?: 'responded' | 'ignored' | 'churned' | 'retained';
|
||||||
|
}>;
|
||||||
|
actualChurned?: boolean;
|
||||||
|
validationDate?: string;
|
||||||
|
createdAt: string;
|
||||||
|
ttl: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Health Score Types
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export const HealthStatusEnum = z.enum(['critical', 'warning', 'healthy']);
|
||||||
|
export const TrendEnum = z.enum(['improving', 'stable', 'declining']);
|
||||||
|
|
||||||
|
export interface HealthDimension {
|
||||||
|
score: number;
|
||||||
|
metrics: Record<string, number>;
|
||||||
|
trend: 'improving' | 'stable' | 'declining';
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ProductHealthDimensions {
|
||||||
|
acquisition: HealthDimension;
|
||||||
|
activation: HealthDimension;
|
||||||
|
retention: HealthDimension;
|
||||||
|
engagement: HealthDimension;
|
||||||
|
revenue: HealthDimension;
|
||||||
|
stability: HealthDimension;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface HealthAnomaly {
|
||||||
|
metric: string;
|
||||||
|
expectedValue: number;
|
||||||
|
actualValue: number;
|
||||||
|
deviationPercent: number;
|
||||||
|
severity: 'critical' | 'warning';
|
||||||
|
suggestedCause?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface HealthForecast {
|
||||||
|
expectedHealthScore: number;
|
||||||
|
confidenceInterval: [number, number];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ProductHealthScoreDoc {
|
||||||
|
id: string;
|
||||||
|
productId: string;
|
||||||
|
date: string;
|
||||||
|
overallHealthScore: number;
|
||||||
|
healthStatus: 'critical' | 'warning' | 'healthy';
|
||||||
|
dimensions: ProductHealthDimensions;
|
||||||
|
anomalies: HealthAnomaly[];
|
||||||
|
forecasts: {
|
||||||
|
next7Days: HealthForecast;
|
||||||
|
next30Days: HealthForecast;
|
||||||
|
};
|
||||||
|
vsBaseline7Day: number;
|
||||||
|
vsBaseline30Day: number;
|
||||||
|
createdAt: string;
|
||||||
|
ttl: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Feature Store Types
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export interface UserFeatureVectorDoc {
|
||||||
|
id: string;
|
||||||
|
userId: string;
|
||||||
|
productId: string;
|
||||||
|
features: CompleteFeatureVector;
|
||||||
|
normalizedFeatures: Record<string, number>;
|
||||||
|
featureSchemaVersion: string;
|
||||||
|
computedAt: string;
|
||||||
|
observationWindow: {
|
||||||
|
start: string;
|
||||||
|
end: string;
|
||||||
|
};
|
||||||
|
ttl: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FeatureDefinition {
|
||||||
|
id: string;
|
||||||
|
productId: string;
|
||||||
|
name: string;
|
||||||
|
description: string;
|
||||||
|
category: 'behavior' | 'engagement' | 'performance' | 'social' | 'revenue' | 'product_specific';
|
||||||
|
dataType: 'numeric' | 'boolean' | 'categorical';
|
||||||
|
normalization: 'min_max' | 'z_score' | 'none';
|
||||||
|
defaultValue: number;
|
||||||
|
importanceWeight: number;
|
||||||
|
isEnabled: boolean;
|
||||||
|
createdAt: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Campaign Types
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export const CampaignStatusEnum = z.enum(['draft', 'active', 'paused', 'completed']);
|
||||||
|
export const CampaignTriggerTypeEnum = z.enum(['churn_risk', 'health_score_drop', 'behavioral', 'scheduled']);
|
||||||
|
export const CampaignChannelEnum = z.enum(['email', 'push', 'in_app', 'slack_cs']);
|
||||||
|
|
||||||
|
export const CampaignConditionSchema = z.object({
|
||||||
|
field: z.string(),
|
||||||
|
operator: z.enum(['gt', 'lt', 'eq', 'in', 'contains']),
|
||||||
|
value: z.unknown(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const CampaignMessageSchema = z.object({
|
||||||
|
channel: CampaignChannelEnum,
|
||||||
|
templateId: z.string(),
|
||||||
|
variant: z.string().optional(),
|
||||||
|
delayHours: z.number().min(0).optional(),
|
||||||
|
conditions: z.array(CampaignConditionSchema).optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const CreateCampaignSchema = z.object({
|
||||||
|
name: z.string().min(1).max(200),
|
||||||
|
description: z.string(),
|
||||||
|
productId: z.string(),
|
||||||
|
trigger: z.object({
|
||||||
|
type: CampaignTriggerTypeEnum,
|
||||||
|
conditions: z.array(CampaignConditionSchema),
|
||||||
|
}),
|
||||||
|
audience: z.object({
|
||||||
|
riskSegments: z.array(z.string()).optional(),
|
||||||
|
products: z.array(z.string()).optional(),
|
||||||
|
userSegments: z.array(z.string()).optional(),
|
||||||
|
excludeRecentContact: z.number().optional(),
|
||||||
|
}),
|
||||||
|
messages: z.array(CampaignMessageSchema).min(1),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type CreateCampaignInput = z.infer<typeof CreateCampaignSchema>;
|
||||||
|
|
||||||
|
export interface RetentionCampaignDoc extends CreateCampaignInput {
|
||||||
|
id: string;
|
||||||
|
status: 'draft' | 'active' | 'paused' | 'completed';
|
||||||
|
stats: {
|
||||||
|
triggered: number;
|
||||||
|
sent: number;
|
||||||
|
opened: number;
|
||||||
|
clicked: number;
|
||||||
|
converted: number;
|
||||||
|
controlGroupSize: number;
|
||||||
|
controlChurnRate: number;
|
||||||
|
treatmentChurnRate: number;
|
||||||
|
};
|
||||||
|
createdAt: string;
|
||||||
|
updatedAt: string;
|
||||||
|
ttl: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// API Types
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export const ChurnScoreRequestSchema = z.object({
|
||||||
|
userId: z.string(),
|
||||||
|
productId: z.string(),
|
||||||
|
horizon: PredictionHorizonEnum.default('30'),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const ChurnBatchRequestSchema = z.object({
|
||||||
|
productId: z.string(),
|
||||||
|
userIds: z.array(z.string()).max(100),
|
||||||
|
horizon: PredictionHorizonEnum.default('30'),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const AtRiskUsersQuerySchema = z.object({
|
||||||
|
productId: z.string().optional(),
|
||||||
|
segment: RiskSegmentEnum.optional(),
|
||||||
|
limit: z.coerce.number().int().min(1).max(200).default(50),
|
||||||
|
offset: z.coerce.number().int().min(0).default(0),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const CampaignTriggerSchema = z.object({
|
||||||
|
campaignId: z.string(),
|
||||||
|
testUserId: z.string().optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Model Performance Types
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
export interface ModelPerformanceMetrics {
|
||||||
|
modelVersion: string;
|
||||||
|
modelType: string;
|
||||||
|
trainedAt: string;
|
||||||
|
auc: number;
|
||||||
|
precisionAt10: number;
|
||||||
|
recallAt10: number;
|
||||||
|
calibrationSlope: number;
|
||||||
|
calibrationIntercept: number;
|
||||||
|
perProductPerformance: Record<string, { auc: number; sampleSize: number }>;
|
||||||
|
featureImportance: Array<{ feature: string; importance: number }>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ModelPerformanceDoc {
|
||||||
|
id: string;
|
||||||
|
metrics: ModelPerformanceMetrics;
|
||||||
|
isActive: boolean;
|
||||||
|
createdAt: string;
|
||||||
|
ttl: number;
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user