diff --git a/services/platform-service/src/lib/config.ts b/services/platform-service/src/lib/config.ts index a72e3274..6ac2cf8e 100644 --- a/services/platform-service/src/lib/config.ts +++ b/services/platform-service/src/lib/config.ts @@ -17,6 +17,9 @@ const envSchema = z.object({ // ── Growth (merged) ── WEBHOOK_INVITATION_REDEEMED_URL: z.string().optional(), WEBHOOK_REFERRAL_STATUS_URL: z.string().optional(), + // ── AI Diagnostics ── + AZURE_OPENAI_KEY: z.string().optional(), + AZURE_OPENAI_ENDPOINT: z.string().optional(), // ── Billing (merged) ── STRIPE_SECRET_KEY: z.string().optional(), STRIPE_WEBHOOK_SECRET: z.string().optional(), diff --git a/services/platform-service/src/modules/ab-testing/guardrails.ts b/services/platform-service/src/modules/ab-testing/guardrails.ts new file mode 100644 index 00000000..e0976caa --- /dev/null +++ b/services/platform-service/src/modules/ab-testing/guardrails.ts @@ -0,0 +1,278 @@ +/** + * A/B Testing — Early Stopping Guardrails. + * Auto-promotion, business hours check, approval requirements, safety limits. + */ + +import type { ExperimentDoc, VariantDoc } from './types.js'; +import { generateExperimentResult, checkEarlyStopping } from './statistics.js'; + +export interface GuardrailCheck { + passed: boolean; + violation?: string; + severity: 'info' | 'warning' | 'blocking'; +} + +/** + * Run all guardrail checks before allowing early stop or auto-promotion. + */ +export function runGuardrails( + experiment: ExperimentDoc, + variants: VariantDoc[], + daysRunning: number, + isRevenueImpacting: boolean +): GuardrailCheck[] { + const checks: GuardrailCheck[] = []; + + // 1. Minimum sample size + const minSampleCheck = checkMinSampleSize(experiment, variants); + checks.push(minSampleCheck); + + // 2. Business hours (safety for auto-actions) + const businessHoursCheck = checkBusinessHours(); + checks.push(businessHoursCheck); + + // 3. Approval requirement + const approvalCheck = checkApprovalRequired(experiment, isRevenueImpacting); + checks.push(approvalCheck); + + // 4. Max duration safety limit + const durationCheck = checkMaxDuration(experiment, daysRunning); + checks.push(durationCheck); + + // 5. Statistical significance threshold + const significanceCheck = checkStatisticalSignificance(experiment, variants, daysRunning); + checks.push(significanceCheck); + + // 6. Variant balance check + const balanceCheck = checkVariantBalance(variants); + checks.push(balanceCheck); + + return checks; +} + +/** + * Check minimum sample size per variant. + */ +function checkMinSampleSize(experiment: ExperimentDoc, variants: VariantDoc[]): GuardrailCheck { + const minRequired = experiment.guardrails.minSampleSizePerVariant; + const violations: string[] = []; + + for (const v of variants) { + if (v.stats.participants < minRequired) { + violations.push(`${v.name}: ${v.stats.participants}/${minRequired}`); + } + } + + if (violations.length > 0) { + return { + passed: false, + violation: `Insufficient samples: ${violations.join(', ')}`, + severity: 'blocking', + }; + } + + return { passed: true, severity: 'info' }; +} + +/** + * Check if current time is within business hours. + * Prevents auto-promotion during off-hours. + */ +function checkBusinessHours(): GuardrailCheck { + const now = new Date(); + const hour = now.getHours(); + const day = now.getDay(); // 0 = Sunday, 6 = Saturday + + // Business hours: Monday-Friday, 9 AM - 6 PM + const isBusinessDay = day >= 1 && day <= 5; + const isBusinessHour = hour >= 9 && hour < 18; + + if (!isBusinessDay) { + return { + passed: false, + violation: 'Auto-promotion disabled on weekends', + severity: 'warning', + }; + } + + if (!isBusinessHour) { + return { + passed: false, + violation: 'Auto-promotion disabled outside business hours (9 AM - 6 PM)', + severity: 'warning', + }; + } + + return { passed: true, severity: 'info' }; +} + +/** + * Check if approval is required for this experiment. + */ +function checkApprovalRequired( + experiment: ExperimentDoc, + isRevenueImpacting: boolean +): GuardrailCheck { + const requirement = experiment.guardrails.requireApprovalFor; + + if (requirement === 'all') { + return { + passed: false, + violation: 'Approval required: all experiments require manual approval', + severity: 'blocking', + }; + } + + if (requirement === 'revenue' && isRevenueImpacting) { + return { + passed: false, + violation: 'Approval required: revenue-impacting experiment', + severity: 'blocking', + }; + } + + return { passed: true, severity: 'info' }; +} + +/** + * Check max duration safety limit. + */ +function checkMaxDuration(experiment: ExperimentDoc, daysRunning: number): GuardrailCheck { + const maxDays = experiment.guardrails.maxDurationDays; + + if (daysRunning >= maxDays) { + return { + passed: false, + violation: `Max duration reached: ${daysRunning}/${maxDays} days`, + severity: 'warning', + }; + } + + // Warn at 80% of max duration + if (daysRunning >= maxDays * 0.8) { + return { + passed: true, + violation: `Approaching max duration: ${daysRunning}/${maxDays} days`, + severity: 'warning', + }; + } + + return { passed: true, severity: 'info' }; +} + +/** + * Check statistical significance threshold. + */ +function checkStatisticalSignificance( + experiment: ExperimentDoc, + variants: VariantDoc[], + daysRunning: number +): GuardrailCheck { + const earlyStop = checkEarlyStopping(experiment, variants, daysRunning); + + if (earlyStop.shouldStop && earlyStop.confidence >= experiment.guardrails.winnerThreshold / 100) { + return { + passed: true, + violation: `Statistical significance reached: ${(earlyStop.confidence * 100).toFixed(1)}%`, + severity: 'info', + }; + } + + return { + passed: false, + violation: `Not statistically significant: ${(earlyStop.confidence * 100).toFixed(1)}% < ${experiment.guardrails.winnerThreshold}%`, + severity: 'blocking', + }; +} + +/** + * Check if variants have balanced traffic allocation. + */ +function checkVariantBalance(variants: VariantDoc[]): GuardrailCheck { + const participants = variants.map(v => v.stats.participants); + const total = participants.reduce((a, b) => a + b, 0); + + if (total === 0) { + return { passed: true, severity: 'info' }; + } + + const ratios = participants.map(p => p / total); + const expectedRatio = 1 / variants.length; + + // Check if any variant is > 2x or < 0.5x the expected ratio + const imbalances: string[] = []; + for (let i = 0; i < variants.length; i++) { + const ratio = ratios[i]; + if (ratio > expectedRatio * 2) { + imbalances.push(`${variants[i].name}: ${(ratio * 100).toFixed(1)}% (expected ~${(expectedRatio * 100).toFixed(1)}%)`); + } + } + + if (imbalances.length > 0) { + return { + passed: false, + violation: `Traffic imbalance detected: ${imbalances.join(', ')}`, + severity: 'warning', + }; + } + + return { passed: true, severity: 'info' }; +} + +/** + * Check if all blocking guardrails pass. + */ +export function canAutoPromote(checks: GuardrailCheck[]): boolean { + return checks.every(c => c.passed || c.severity !== 'blocking'); +} + +/** + * Get summary of guardrail violations. + */ +export function getGuardrailViolations(checks: GuardrailCheck[]): string[] { + return checks + .filter(c => !c.passed && c.violation) + .map(c => c.violation!); +} + +/** + * Auto-promotion decision with full guardrail evaluation. + */ +export interface AutoPromotionResult { + canPromote: boolean; + winnerVariantId?: string; + violations: string[]; + warnings: string[]; + result: ReturnType; +} + +export function evaluateAutoPromotion( + experiment: ExperimentDoc, + variants: VariantDoc[], + daysRunning: number, + isRevenueImpacting: boolean +): AutoPromotionResult { + const result = generateExperimentResult(experiment, variants, daysRunning); + const checks = runGuardrails(experiment, variants, daysRunning, isRevenueImpacting); + + const violations = checks + .filter(c => !c.passed && c.severity === 'blocking') + .map(c => c.violation!); + + const warnings = checks + .filter(c => !c.passed && c.severity === 'warning') + .map(c => c.violation!); + + const canPromote = + result.status === 'winner_found' && + canAutoPromote(checks) && + experiment.guardrails.autoStopEnabled; + + return { + canPromote, + winnerVariantId: result.winnerVariantId, + violations, + warnings, + result, + }; +} diff --git a/services/platform-service/src/modules/ab-testing/repository.ts b/services/platform-service/src/modules/ab-testing/repository.ts new file mode 100644 index 00000000..5d2a0f6d --- /dev/null +++ b/services/platform-service/src/modules/ab-testing/repository.ts @@ -0,0 +1,546 @@ +/** + * Intelligent A/B Testing — Repository layer. + * Cosmos DB CRUD for experiments, variants, assignments, events, metrics. + */ + +import { getRegisteredContainer } from '@bytelyst/cosmos'; +import crypto from 'node:crypto'; +import type { + ExperimentDoc, + VariantDoc, + ExperimentAssignmentDoc, + ExperimentEventDoc, + ExperimentMetricDoc, + ExperimentSuggestion, + CreateExperimentInput, + UpdateExperimentInput, +} from './types.js'; +import { assignVariant, assignByStrategy, isInExperimentBucket, type StrategyContext } from './bucketing.js'; +import type { TargetingContext, TargetingConfig } from './targeting.js'; +import { matchesTargeting } from './targeting.js'; + +// ───────────────────────────────────────────────────────────────────────────── +// Container Access +// ───────────────────────────────────────────────────────────────────────────── + +function getExperimentContainer() { + return getRegisteredContainer('experiments'); +} + +function getVariantContainer() { + return getRegisteredContainer('ab_testing_variants'); +} + +function getAssignmentContainer() { + return getRegisteredContainer('experiment_assignments'); +} + +function getEventContainer() { + return getRegisteredContainer('ab_testing_events'); +} + +function getMetricContainer() { + return getRegisteredContainer('ab_testing_metrics'); +} + +function getSuggestionsContainer() { + return getRegisteredContainer('experiment_suggestions'); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Experiment CRUD +// ───────────────────────────────────────────────────────────────────────────── + +export async function listExperiments(productId: string): Promise { + const { resources } = await getExperimentContainer() + .items.query({ + query: 'SELECT * FROM c WHERE c.productId = @pid ORDER BY c.createdAt DESC', + parameters: [{ name: '@pid', value: productId }], + }) + .fetchAll(); + return resources; +} + +export async function listRunningExperiments(productId: string): Promise { + const { resources } = await getExperimentContainer() + .items.query({ + query: 'SELECT * FROM c WHERE c.productId = @pid AND c.status = @status', + parameters: [ + { name: '@pid', value: productId }, + { name: '@status', value: 'running' }, + ], + }) + .fetchAll(); + return resources; +} + +export async function getExperiment(id: string): Promise { + try { + const { resource } = await getExperimentContainer().item(id, id).read(); + return resource ?? null; + } catch { + return null; + } +} + +export async function createExperiment( + productId: string, + input: CreateExperimentInput +): Promise { + const now = new Date().toISOString(); + const experimentId = `exp_${crypto.randomUUID()}`; + + // Create variants + const variantIds: string[] = []; + let controlVariantId = ''; + const numVariants = input.variants.length; + const baseAllocation = Math.floor(100 / numVariants); + const remainder = 100 - baseAllocation * numVariants; + + for (let i = 0; i < input.variants.length; i++) { + const v = input.variants[i]; + const variantId = `var_${crypto.randomUUID()}`; + variantIds.push(variantId); + + if (v.isControl) { + controlVariantId = variantId; + } + + const variantDoc: VariantDoc = { + id: variantId, + experimentId, + name: v.name, + description: v.description ?? '', + isControl: v.isControl, + flagConfig: v.flagConfig, + currentAllocationPercent: baseAllocation + (i < remainder ? 1 : 0), + stats: { + participants: 0, + events: 0, + primaryMetricValue: 0, + }, + createdAt: now, + updatedAt: now, + }; + + await getVariantContainer().items.create(variantDoc); + } + + // Default to first variant as control if none specified + if (!controlVariantId) { + controlVariantId = variantIds[0]; + } + + const experiment: ExperimentDoc = { + id: experimentId, + productId, + name: input.name, + description: input.description ?? '', + hypothesis: input.hypothesis, + status: 'draft', + controlVariantId, + variantIds, + allocationStrategy: input.allocationStrategy, + targetPercent: input.targetPercent, + targeting: input.targeting, + primaryMetric: input.primaryMetric, + secondaryMetrics: input.secondaryMetrics, + guardrails: input.guardrails, + startAt: input.startAt, + totalParticipants: 0, + totalEvents: 0, + createdAt: now, + updatedAt: now, + }; + + await getExperimentContainer().items.create(experiment); + return experiment; +} + +export async function updateExperiment( + id: string, + updates: UpdateExperimentInput +): Promise { + const existing = await getExperiment(id); + if (!existing) return null; + + const now = new Date().toISOString(); + const updated: ExperimentDoc = { + ...existing, + ...updates, + updatedAt: now, + }; + + if (updates.status === 'running' && !existing.startedAt) { + updated.startedAt = now; + } + if ((updates.status === 'completed' || updates.status === 'stopped') && !existing.completedAt) { + updated.completedAt = now; + // Set TTL for 2 years + updated.ttl = 2 * 365 * 86400; + } + + await getExperimentContainer().item(id, id).replace(updated); + return updated; +} + +export async function deleteExperiment(id: string): Promise { + try { + // Delete variants first + const { resources: variants } = await getVariantContainer() + .items.query({ query: 'SELECT * FROM c WHERE c.experimentId = @eid', parameters: [{ name: '@eid', value: id }] }) + .fetchAll(); + + for (const v of variants) { + await getVariantContainer().item(v.id, id).delete(); + } + + await getExperimentContainer().item(id, id).delete(); + return true; + } catch { + return false; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Variant Operations +// ───────────────────────────────────────────────────────────────────────────── + +export async function getVariant(id: string, experimentId: string): Promise { + try { + const { resource } = await getVariantContainer().item(id, experimentId).read(); + return resource ?? null; + } catch { + return null; + } +} + +export async function listVariants(experimentId: string): Promise { + const { resources } = await getVariantContainer() + .items.query({ + query: 'SELECT * FROM c WHERE c.experimentId = @eid', + parameters: [{ name: '@eid', value: experimentId }], + }) + .fetchAll(); + return resources; +} + +export async function updateVariantStats( + variantId: string, + experimentId: string, + stats: Partial +): Promise { + const variant = await getVariant(variantId, experimentId); + if (!variant) return; + + const updated: VariantDoc = { + ...variant, + stats: { ...variant.stats, ...stats }, + updatedAt: new Date().toISOString(), + }; + + await getVariantContainer().item(variantId, experimentId).replace(updated); +} + +export async function updateVariantAllocation( + variantId: string, + experimentId: string, + newAllocation: number +): Promise { + const variant = await getVariant(variantId, experimentId); + if (!variant) return; + + const updated: VariantDoc = { + ...variant, + currentAllocationPercent: newAllocation, + updatedAt: new Date().toISOString(), + }; + + await getVariantContainer().item(variantId, experimentId).replace(updated); +} + +export async function updateVariantBayesianResults( + variantId: string, + experimentId: string, + results: VariantDoc['bayesianResults'] +): Promise { + const variant = await getVariant(variantId, experimentId); + if (!variant) return; + + const updated: VariantDoc = { + ...variant, + bayesianResults: results, + updatedAt: new Date().toISOString(), + }; + + await getVariantContainer().item(variantId, experimentId).replace(updated); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Assignment Operations +// ───────────────────────────────────────────────────────────────────────────── + +export interface AssignmentResult { + assignment: ExperimentAssignmentDoc; + variant: VariantDoc; + isNew: boolean; +} + +export async function getAssignment( + experimentId: string, + userId: string +): Promise { + // Query by userId partition key + const { resources } = await getAssignmentContainer() + .items.query({ + query: 'SELECT * FROM c WHERE c.experimentId = @eid AND c.userId = @uid', + parameters: [ + { name: '@eid', value: experimentId }, + { name: '@uid', value: userId }, + ], + }) + .fetchAll(); + return resources[0] ?? null; +} + +export async function getOrCreateAssignment( + experiment: ExperimentDoc, + userId: string, + targetingContext: TargetingContext +): Promise { + // Check targeting first + if (!matchesTargeting(targetingContext, experiment.targeting)) { + return null; + } + + // Check traffic percentage + if (!isInExperimentBucket(experiment.id, userId, experiment.targetPercent)) { + return null; + } + + // Check for existing assignment + const existing = await getAssignment(experiment.id, userId); + if (existing) { + const variant = await getVariant(existing.variantId, experiment.id); + if (!variant) return null; + return { assignment: existing, variant, isNew: false }; + } + + // Get all variants + const variants = await listVariants(experiment.id); + if (variants.length === 0) return null; + + const controlVariant = variants.find(v => v.isControl) ?? variants[0]; + + // Assign variant based on strategy + let assignedVariantId: string; + + if (experiment.allocationStrategy === 'random') { + // Use FNV-1a bucketing for random strategy + assignedVariantId = assignVariant( + experiment.id, + userId, + variants.map(v => ({ key: v.id, weight: v.currentAllocationPercent })) + ); + } else { + // Use adaptive strategies + const ctx: StrategyContext = { + variants, + controlVariant, + totalParticipants: experiment.totalParticipants, + explorationRate: 0.1, // Default epsilon for epsilon-greedy + }; + assignedVariantId = assignByStrategy(experiment.allocationStrategy, ctx); + } + + const assignedVariant = variants.find(v => v.id === assignedVariantId) ?? variants[0]; + + // Create assignment record + const now = new Date().toISOString(); + const assignment: ExperimentAssignmentDoc = { + id: `ea_${crypto.randomUUID()}`, + userId, + experimentId: experiment.id, + variantId: assignedVariant.id, + assignedAt: now, + assignmentContext: { + platform: targetingContext.platform ?? 'unknown', + appVersion: targetingContext.appVersion ?? 'unknown', + osVersion: targetingContext.osVersion ?? 'unknown', + deviceModel: targetingContext.deviceModel, + region: targetingContext.region, + }, + eventCount: 0, + }; + + await getAssignmentContainer().items.create(assignment); + + // Update participant count + await updateVariantStats(assignedVariant.id, experiment.id, { + participants: assignedVariant.stats.participants + 1, + }); + + // Update experiment total + const expUpdate: Partial = { + totalParticipants: experiment.totalParticipants + 1, + updatedAt: now, + }; + await getExperimentContainer().item(experiment.id, experiment.id).patch({ + operations: [ + { op: 'incr', path: '/totalParticipants', value: 1 }, + { op: 'set', path: '/updatedAt', value: now }, + ], + }); + + return { assignment, variant: assignedVariant, isNew: true }; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Event Tracking +// ───────────────────────────────────────────────────────────────────────────── + +export async function trackEvent( + experimentId: string, + userId: string, + assignmentId: string, + variantId: string, + metricName: string, + metricType: ExperimentEventDoc['metricType'], + value: number, + converted: boolean, + platform: string, + appVersion: string, + eventMetadata?: Record +): Promise { + const now = new Date().toISOString(); + const event: ExperimentEventDoc = { + id: `ee_${crypto.randomUUID()}`, + experimentId, + timestamp: now, + userId, + variantId, + assignmentId, + metricName, + metricType, + value, + converted, + eventMetadata, + platform, + appVersion, + ttl: 90 * 86400, // 90 days + }; + + await getEventContainer().items.create(event); + + // Update assignment event count + await getAssignmentContainer().item(assignmentId, userId).patch({ + operations: [ + { op: 'incr', path: '/eventCount', value: 1 }, + { op: 'set', path: '/lastEventAt', value: now }, + ], + }); + + // Update experiment total events + await getExperimentContainer().item(experimentId, experimentId).patch({ + operations: [ + { op: 'incr', path: '/totalEvents', value: 1 }, + { op: 'set', path: '/updatedAt', value: now }, + ], + }); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Metrics Aggregation +// ───────────────────────────────────────────────────────────────────────────── + +export async function getOrCreateMetric( + experimentId: string, + metricName: string, + variantId: string +): Promise { + const id = `em_${experimentId}:${metricName}:${variantId}`; + const existing = await getMetricContainer().item(id, experimentId).read(); + if (existing.resource) return existing.resource; + + const now = new Date().toISOString(); + const metric: ExperimentMetricDoc = { + id, + experimentId, + metricName, + variantId, + count: 0, + sum: 0, + mean: 0, + stdDev: 0, + min: 0, + max: 0, + conversions: 0, + conversionRate: 0, + updatedAt: now, + }; + + await getMetricContainer().items.create(metric); + return metric; +} + +export async function updateMetricAggregation( + experimentId: string, + metricName: string, + variantId: string, + value: number, + converted: boolean +): Promise { + const metric = await getOrCreateMetric(experimentId, metricName, variantId); + + const newCount = metric.count + 1; + const newSum = metric.sum + value; + const newMean = newSum / newCount; + + // Welford's online algorithm for variance + const delta = value - metric.mean; + const delta2 = value - newMean; + const newVariance = ((metric.count * metric.stdDev * metric.stdDev) + delta * delta2) / Math.max(1, newCount); + const newStdDev = Math.sqrt(newVariance); + + const updated: ExperimentMetricDoc = { + ...metric, + count: newCount, + sum: newSum, + mean: newMean, + stdDev: newStdDev, + min: metric.count === 0 ? value : Math.min(metric.min, value), + max: metric.count === 0 ? value : Math.max(metric.max, value), + conversions: metric.conversions + (converted ? 1 : 0), + conversionRate: (metric.conversions + (converted ? 1 : 0)) / newCount, + updatedAt: new Date().toISOString(), + }; + + await getMetricContainer().item(metric.id, experimentId).replace(updated); +} + +// ───────────────────────────────────────────────────────────────────────────── +// AI Suggestions +// ───────────────────────────────────────────────────────────────────────────── + +export async function listSuggestions(productId: string): Promise { + const { resources } = await getSuggestionsContainer() + .items.query({ + query: 'SELECT * FROM c WHERE c.productId = @pid ORDER BY c.priority DESC, c.createdAt DESC', + parameters: [{ name: '@pid', value: productId }], + }) + .fetchAll(); + return resources; +} + +export async function createSuggestion( + productId: string, + suggestion: Omit +): Promise { + const doc: ExperimentSuggestion = { + ...suggestion, + id: `es_${crypto.randomUUID()}`, + createdAt: new Date().toISOString(), + }; + await getSuggestionsContainer().items.create({ ...doc, productId }); + return doc; +} diff --git a/services/platform-service/src/modules/ab-testing/statistics.ts b/services/platform-service/src/modules/ab-testing/statistics.ts new file mode 100644 index 00000000..e744dde4 --- /dev/null +++ b/services/platform-service/src/modules/ab-testing/statistics.ts @@ -0,0 +1,653 @@ +/** + * Intelligent A/B Testing — Bayesian Statistics Engine. + * Beta-Binomial for conversions, Normal for continuous metrics. + * Probability calculations, credible intervals, early stopping rules. + */ + +import type { VariantDoc, MetricType, ExperimentResult, ExperimentDoc } from './types.js'; + +// ───────────────────────────────────────────────────────────────────────────── +// Beta Distribution (for conversion metrics) +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Beta distribution parameters. + * Beta(α, β) where α = successes + 1, β = failures + 1 + */ +export interface BetaParams { + alpha: number; + beta: number; +} + +/** + * Compute Beta parameters from variant stats. + * Uses uninformative prior Beta(1, 1) = Uniform(0, 1) + */ +export function betaFromVariant(variant: VariantDoc): BetaParams { + const conversions = variant.stats.conversions ?? 0; + const participants = variant.stats.participants || 1; + const failures = participants - conversions; + + // Posterior: Beta(conversions + 1, failures + 1) + return { + alpha: conversions + 1, + beta: failures + 1, + }; +} + +/** + * Sample from Beta distribution using Gamma ratio method. + * Beta(α, β) ~ Gamma(α) / (Gamma(α) + Gamma(β)) + */ +export function sampleBeta(alpha: number, beta: number): number { + const x = sampleGamma(alpha, 1); + const y = sampleGamma(beta, 1); + return x / (x + y); +} + +/** + * Beta PDF (probability density function). + * Used for analytical probability calculations. + */ +export function betaPdf(x: number, alpha: number, beta: number): number { + if (x <= 0 || x >= 1) return 0; + const B = gamma(alpha) * gamma(beta) / gamma(alpha + beta); + return (Math.pow(x, alpha - 1) * Math.pow(1 - x, beta - 1)) / B; +} + +/** + * Beta CDF (cumulative distribution function). + * Uses regularized incomplete beta function approximation. + */ +export function betaCdf(x: number, alpha: number, beta: number): number { + if (x <= 0) return 0; + if (x >= 1) return 1; + + // Use incomplete beta approximation (numerical integration) + const steps = 100; + const dx = x / steps; + let sum = 0; + for (let i = 0; i < steps; i++) { + const xi = i * dx + dx / 2; + sum += betaPdf(xi, alpha, beta) * dx; + } + return sum; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Normal Distribution (for continuous metrics) +// ───────────────────────────────────────────────────────────────────────────── + +export interface NormalParams { + mean: number; + stdDev: number; +} + +/** + * Compute Normal parameters from variant stats. + * Uses conjugate prior (Normal-Inverse-Gamma). + */ +export function normalFromVariant(variant: VariantDoc): NormalParams { + const mean = variant.stats.primaryMetricValue ?? 0; + const n = variant.stats.participants || 1; + const sampleStd = variant.stats.primaryMetricStdDev ?? 1; + + // Posterior standard deviation (reduced uncertainty with more samples) + const posteriorStdDev = sampleStd / Math.sqrt(n); + + return { mean, stdDev: posteriorStdDev }; +} + +/** + * Sample from Normal distribution (Box-Muller transform). + */ +export function sampleNormal(mean: number, stdDev: number): number { + const u1 = Math.random(); + const u2 = Math.random(); + const z0 = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2); + return mean + z0 * stdDev; +} + +/** + * Normal PDF. + */ +export function normalPdf(x: number, mean: number, stdDev: number): number { + const z = (x - mean) / stdDev; + return (1 / (stdDev * Math.sqrt(2 * Math.PI))) * Math.exp(-0.5 * z * z); +} + +/** + * Normal CDF (approximation using error function). + */ +export function normalCdf(x: number, mean: number, stdDev: number): number { + const z = (x - mean) / stdDev; + return 0.5 * (1 + erf(z / Math.sqrt(2))); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Gamma Distribution (for count/duration metrics) +// ───────────────────────────────────────────────────────────────────────────── + +export interface GammaParams { + shape: number; + scale: number; +} + +/** + * Compute Gamma parameters from variant stats. + * Uses method of moments estimation. + */ +export function gammaFromVariant(variant: VariantDoc): GammaParams { + const mean = variant.stats.primaryMetricValue ?? 1; + const variance = Math.pow(variant.stats.primaryMetricStdDev ?? 1, 2); + + // Method of moments: shape = mean² / variance, scale = variance / mean + const shape = (mean * mean) / Math.max(variance, 0.001); + const scale = variance / Math.max(mean, 0.001); + + return { shape, scale }; +} + +/** + * Sample from Gamma distribution (Marsaglia-Tsang method). + */ +export function sampleGamma(shape: number, scale: number): number { + if (shape < 1) { + return sampleGamma(1 + shape, scale) * Math.pow(Math.random(), 1 / shape); + } + + const d = shape - 1 / 3; + const c = 1 / Math.sqrt(9 * d); + + while (true) { + let x = sampleStandardNormal(); + let v = 1 + c * x; + if (v <= 0) continue; + + v = v * v * v; + const u = Math.random(); + + if (u < 1 - 0.0331 * x * x * x * x) { + return d * v * scale; + } + + if (Math.log(u) < 0.5 * x * x + d * (1 - v + Math.log(v))) { + return d * v * scale; + } + } +} + +function sampleStandardNormal(): number { + return sampleNormal(0, 1); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Mathematical Utilities +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Error function approximation (Abramowitz & Stegun). + */ +function erf(x: number): number { + const sign = x < 0 ? -1 : 1; + x = Math.abs(x); + + const a1 = 0.254829592; + const a2 = -0.284496736; + const a3 = 1.421413741; + const a4 = -1.453152027; + const a5 = 1.061405429; + const p = 0.3275911; + + const t = 1 / (1 + p * x); + const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-x * x); + + return sign * y; +} + +/** + * Gamma function approximation (Lanczos approximation). + * Only valid for positive numbers. + */ +function gamma(z: number): number { + if (z < 0.5) { + return Math.PI / (Math.sin(Math.PI * z) * gamma(1 - z)); + } + + z -= 1; + const g = 7; + const C = [ + 0.99999999999980993, 676.5203681218851, -1259.1392167224028, + 771.32342877765313, -176.61502916214059, 12.507343278686905, + -0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7, + ]; + + let x = C[0]; + for (let i = 1; i < g + 2; i++) { + x += C[i] / (z + i); + } + + const t = z + g + 0.5; + return Math.sqrt(2 * Math.PI) * Math.pow(t, z + 0.5) * Math.exp(-t) * x; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Probability Calculations +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Calculate probability that variant beats control using Monte Carlo simulation. + * P(variant > control) via sampling from posterior distributions. + */ +export function probabilityVariantBeatsControl( + variant: VariantDoc, + control: VariantDoc, + metricType: MetricType, + samples = 10000 +): number { + let wins = 0; + + for (let i = 0; i < samples; i++) { + const variantSample = sampleFromPosterior(variant, metricType); + const controlSample = sampleFromPosterior(control, metricType); + if (variantSample > controlSample) wins++; + } + + return wins / samples; +} + +/** + * Calculate probability that variant beats all other variants. + */ +export function probabilityVariantBeatsAll( + variant: VariantDoc, + allVariants: VariantDoc[], + metricType: MetricType, + samples = 10000 +): number { + const others = allVariants.filter(v => v.id !== variant.id); + if (others.length === 0) return 1; + + let wins = 0; + + for (let i = 0; i < samples; i++) { + const variantSample = sampleFromPosterior(variant, metricType); + const otherSamples = others.map(v => sampleFromPosterior(v, metricType)); + const maxOther = Math.max(...otherSamples); + if (variantSample > maxOther) wins++; + } + + return wins / samples; +} + +/** + * Calculate expected loss if we choose this variant. + * E[loss] = E[max(0, best_other - this)] + */ +export function expectedLossIfChosen( + variant: VariantDoc, + allVariants: VariantDoc[], + metricType: MetricType, + samples = 10000 +): number { + const others = allVariants.filter(v => v.id !== variant.id); + if (others.length === 0) return 0; + + let totalLoss = 0; + + for (let i = 0; i < samples; i++) { + const variantSample = sampleFromPosterior(variant, metricType); + const otherSamples = others.map(v => sampleFromPosterior(v, metricType)); + const bestOther = Math.max(...otherSamples); + const loss = Math.max(0, bestOther - variantSample); + totalLoss += loss; + } + + return totalLoss / samples; +} + +/** + * Sample from appropriate posterior distribution based on metric type. + */ +function sampleFromPosterior(variant: VariantDoc, metricType: MetricType): number { + switch (metricType) { + case 'conversion': { + const beta = betaFromVariant(variant); + return sampleBeta(beta.alpha, beta.beta); + } + case 'count': + case 'duration': { + const gamma = gammaFromVariant(variant); + return sampleGamma(gamma.shape, gamma.scale); + } + case 'revenue': + case 'custom': + default: { + const normal = normalFromVariant(variant); + return sampleNormal(normal.mean, normal.stdDev); + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Credible Intervals +// ───────────────────────────────────────────────────────────────────────────── + +export interface CredibleInterval { + lower: number; // 2.5th percentile + mean: number; + upper: number; // 97.5th percentile +} + +/** + * Calculate 95% credible interval for variant's metric. + */ +export function calculateCredibleInterval( + variant: VariantDoc, + metricType: MetricType, + samples = 10000 +): CredibleInterval { + const samples_array: number[] = []; + + for (let i = 0; i < samples; i++) { + samples_array.push(sampleFromPosterior(variant, metricType)); + } + + samples_array.sort((a, b) => a - b); + + const lowerIndex = Math.floor(samples * 0.025); + const upperIndex = Math.floor(samples * 0.975); + + return { + lower: samples_array[lowerIndex], + mean: samples_array.reduce((a, b) => a + b, 0) / samples, + upper: samples_array[upperIndex], + }; +} + +/** + * Analytical credible interval for Beta distribution. + * Uses inverse Beta CDF (approximation via binary search). + */ +export function betaCredibleInterval(alpha: number, beta: number): CredibleInterval { + // Binary search for percentiles + const lower = betaQuantile(alpha, beta, 0.025); + const upper = betaQuantile(alpha, beta, 0.975); + const mean = alpha / (alpha + beta); + + return { lower, mean, upper }; +} + +/** + * Find quantile of Beta distribution via binary search. + */ +function betaQuantile(alpha: number, beta: number, p: number): number { + let low = 0; + let high = 1; + let mid = 0.5; + + for (let iter = 0; iter < 50; iter++) { + mid = (low + high) / 2; + const cdf = betaCdf(mid, alpha, beta); + if (cdf < p) { + low = mid; + } else { + high = mid; + } + } + + return mid; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Early Stopping Rules +// ───────────────────────────────────────────────────────────────────────────── + +export interface EarlyStoppingResult { + shouldStop: boolean; + reason?: string; + winnerVariantId?: string; + confidence: number; +} + +/** + * Check if experiment should stop early based on statistical criteria. + */ +export function checkEarlyStopping( + experiment: ExperimentDoc, + variants: VariantDoc[], + daysRunning: number +): EarlyStoppingResult { + // Minimum sample size guardrail + const minSamples = experiment.guardrails.minSampleSizePerVariant; + const allHaveMinSamples = variants.every(v => v.stats.participants >= minSamples); + + if (!allHaveMinSamples) { + return { shouldStop: false, confidence: 0 }; + } + + const controlVariant = variants.find(v => v.isControl); + if (!controlVariant || variants.length < 2) { + return { shouldStop: false, confidence: 0 }; + } + + // Calculate probabilities for each variant + const results = variants.map(variant => ({ + variant, + probBeatsControl: variant.isControl + ? 0 + : probabilityVariantBeatsControl(variant, controlVariant, experiment.primaryMetric.type), + })); + + // Find variant with highest probability + const nonControlResults = results.filter(r => !r.variant.isControl); + const bestResult = nonControlResults.reduce((best, current) => + current.probBeatsControl > best.probBeatsControl ? current : best, + nonControlResults[0] + ); + + const threshold = experiment.guardrails.winnerThreshold / 100; + + // Winner found: variant has > 95% probability of beating control + if (bestResult && bestResult.probBeatsControl >= threshold) { + return { + shouldStop: true, + reason: 'Winner found: variant has > 95% probability of beating control', + winnerVariantId: bestResult.variant.id, + confidence: bestResult.probBeatsControl, + }; + } + + // No winner clear: control has > 95% probability of beating all variants + const controlProbBeatsAll = probabilityVariantBeatsAll( + controlVariant, + variants, + experiment.primaryMetric.type + ); + if (controlProbBeatsAll >= threshold) { + return { + shouldStop: true, + reason: 'No winner: control outperforms all variants with > 95% confidence', + confidence: controlProbBeatsAll, + }; + } + + // Time bound: max duration reached + if (daysRunning >= experiment.guardrails.maxDurationDays) { + return { + shouldStop: true, + reason: 'Max duration reached', + confidence: bestResult?.probBeatsControl ?? 0, + }; + } + + return { + shouldStop: false, + confidence: bestResult?.probBeatsControl ?? 0, + }; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Experiment Results +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Generate comprehensive experiment results. + */ +export function generateExperimentResult( + experiment: ExperimentDoc, + variants: VariantDoc[], + daysRunning: number +): ExperimentResult { + const controlVariant = variants.find(v => v.isControl); + const earlyStop = checkEarlyStopping(experiment, variants, daysRunning); + + const variantResults = variants.map(variant => { + const credibleInterval = calculateCredibleInterval(variant, experiment.primaryMetric.type); + const probBeatsControl = controlVariant + ? probabilityVariantBeatsControl(variant, controlVariant, experiment.primaryMetric.type) + : 0.5; + + // Expected lift relative to control + let expectedLift = 0; + if (controlVariant && controlVariant.stats.primaryMetricValue > 0) { + expectedLift = ((variant.stats.primaryMetricValue - controlVariant.stats.primaryMetricValue) + / controlVariant.stats.primaryMetricValue) * 100; + } + + return { + variantId: variant.id, + variantName: variant.name, + isControl: variant.isControl, + participants: variant.stats.participants, + primaryMetricValue: variant.stats.primaryMetricValue, + probabilityBeatsControl: probBeatsControl, + expectedLiftPercent: expectedLift, + credibleInterval, + }; + }); + + const winnerResult = earlyStop.winnerVariantId + ? variantResults.find(vr => vr.variantId === earlyStop.winnerVariantId) + : undefined; + + // Determine recommended action + let recommendedAction: ExperimentResult['statisticalSummary']['recommendedAction'] = 'continue'; + if (earlyStop.shouldStop) { + if (earlyStop.winnerVariantId) { + recommendedAction = 'ship'; + } else if (earlyStop.reason?.includes('control')) { + recommendedAction = 'rollback'; + } else { + recommendedAction = 'stop'; + } + } + + // Calculate probability that any variant beats control + const probAnyBeatsControl = Math.max(...variantResults.map(vr => vr.probabilityBeatsControl)); + + // Calculate expected loss if we ship the winner + const expectedLoss = earlyStop.winnerVariantId + ? expectedLossIfChosen( + variants.find(v => v.id === earlyStop.winnerVariantId)!, + variants, + experiment.primaryMetric.type + ) + : 0; + + return { + experimentId: experiment.id, + status: earlyStop.shouldStop + ? (earlyStop.winnerVariantId ? 'winner_found' : 'no_winner') + : 'in_progress', + totalParticipants: experiment.totalParticipants, + totalEvents: experiment.totalEvents, + daysRunning, + winnerVariantId: earlyStop.winnerVariantId, + winnerProbability: earlyStop.confidence, + variantResults, + statisticalSummary: { + probabilityAnyBeatsControl: probAnyBeatsControl, + expectedLossIfShipped: expectedLoss, + recommendedAction, + }, + earlyStopped: earlyStop.shouldStop, + stopReason: earlyStop.reason, + generatedAt: new Date().toISOString(), + }; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Statistical Tests +// ───────────────────────────────────────────────────────────────────────────── + +/** + * A/A test: check that two identical variants produce similar results. + * Used for validating the statistical engine. + */ +export function validateAA( + n: number, + trueRate: number, + samples = 1000 +): { passRate: number; bias: number } { + let passes = 0; + let totalError = 0; + + for (let i = 0; i < samples; i++) { + // Simulate two "identical" variants + const aSuccesses = simulateBinomial(n, trueRate); + const bSuccesses = simulateBinomial(n, trueRate); + + const aRate = aSuccesses / n; + const bRate = bSuccesses / n; + + // Check if confidence intervals overlap (simplified) + const aStd = Math.sqrt(aRate * (1 - aRate) / n); + const bStd = Math.sqrt(bRate * (1 - bRate) / n); + + const diff = Math.abs(aRate - bRate); + const pooledStd = Math.sqrt(aStd * aStd + bStd * bStd); + + // Pass if difference is within 2 standard deviations (95% roughly) + if (diff < 2 * pooledStd) passes++; + totalError += diff; + } + + return { + passRate: passes / samples, + bias: totalError / samples, + }; +} + +/** + * Simulate binomial distribution (inverse transform sampling). + */ +function simulateBinomial(n: number, p: number): number { + let successes = 0; + for (let i = 0; i < n; i++) { + if (Math.random() < p) successes++; + } + return successes; +} + +/** + * Power analysis: calculate required sample size for given effect size. + */ +export function calculateSampleSize( + baselineRate: number, + minDetectableEffect: number, // relative change, e.g., 0.05 for 5% + alpha = 0.05, + power = 0.8 +): number { + const zAlpha = 1.96; // ~95% confidence + const zBeta = 0.84; // ~80% power + + const p1 = baselineRate; + const p2 = baselineRate * (1 + minDetectableEffect); + const pAvg = (p1 + p2) / 2; + + const numerator = 2 * zAlpha * Math.sqrt(2 * pAvg * (1 - pAvg)) + zBeta * Math.sqrt(p1 * (1 - p1) + p2 * (1 - p2)); + const denominator = p2 - p1; + + return Math.ceil(Math.pow(numerator / denominator, 2)); +} diff --git a/services/platform-service/src/modules/ab-testing/targeting.ts b/services/platform-service/src/modules/ab-testing/targeting.ts new file mode 100644 index 00000000..ad58606d --- /dev/null +++ b/services/platform-service/src/modules/ab-testing/targeting.ts @@ -0,0 +1,169 @@ +/** + * A/B Testing — Audience targeting and filtering. + * Platform, version, region, segment, and user property matching. + */ + +import type { TargetingConfig } from './types.js'; + +export interface TargetingContext { + platform?: string; // ios, android, web + appVersion?: string; // semver + osVersion?: string; + deviceModel?: string; + region?: string; // country code + userSegments?: string[]; // pro, free, enterprise + userProperties?: Record; +} + +/** + * Check if a user matches the experiment's targeting criteria. + */ +export function matchesTargeting( + context: TargetingContext, + targeting: TargetingConfig +): boolean { + // No targeting = everyone eligible + if (!targeting || Object.keys(targeting).length === 0) { + return true; + } + + // Platform check + if (targeting.platforms && targeting.platforms.length > 0) { + if (!context.platform || !targeting.platforms.includes(context.platform)) { + return false; + } + } + + // App version check (semver range) + if (targeting.appVersions) { + if (!context.appVersion) return false; + if (!matchesVersionRange(context.appVersion, targeting.appVersions)) { + return false; + } + } + + // Region check + if (targeting.regions && targeting.regions.length > 0) { + if (!context.region || !targeting.regions.includes(context.region)) { + return false; + } + } + + // User segment check + if (targeting.userSegments && targeting.userSegments.length > 0) { + const userSegs = context.userSegments ?? []; + const hasMatchingSegment = targeting.userSegments.some(seg => userSegs.includes(seg)); + if (!hasMatchingSegment) { + return false; + } + } + + // User property check + if (targeting.userProperties && Object.keys(targeting.userProperties).length > 0) { + if (!context.userProperties) return false; + for (const [key, value] of Object.entries(targeting.userProperties)) { + if (context.userProperties[key] !== value) { + return false; + } + } + } + + return true; +} + +/** + * Check if version matches the min/max range. + * Supports basic semver comparison. + */ +function matchesVersionRange( + version: string, + range: { min: string; max?: string } +): boolean { + const v = parseSemver(version); + const min = parseSemver(range.min); + + if (!v || !min) return true; // Be permissive on parse failure + + // Check min version + if (compareSemver(v, min) < 0) return false; + + // Check max version if specified + if (range.max) { + const max = parseSemver(range.max); + if (max && compareSemver(v, max) > 0) return false; + } + + return true; +} + +interface Semver { + major: number; + minor: number; + patch: number; +} + +function parseSemver(version: string): Semver | null { + const match = version.match(/^(\d+)(?:\.(\d+))?(?:\.(\d+))?/); + if (!match) return null; + + return { + major: parseInt(match[1], 10), + minor: match[2] ? parseInt(match[2], 10) : 0, + patch: match[3] ? parseInt(match[3], 10) : 0, + }; +} + +function compareSemver(a: Semver, b: Semver): number { + if (a.major !== b.major) return a.major - b.major; + if (a.minor !== b.minor) return a.minor - b.minor; + return a.patch - b.patch; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Exclusion Lists +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Check if user is in an exclusion list (beta users, internal accounts). + */ +export function isExcluded( + userId: string, + exclusionList: string[] +): boolean { + return exclusionList.includes(userId); +} + +/** + * Common exclusion patterns. + */ +export const DEFAULT_EXCLUSIONS = { + // Internal test accounts (patterns) + internalPatterns: [ + /^test@/, + /^admin@/, + /@example\.com$/, + /@test\.com$/, + ], + // Beta user segments to exclude from production experiments + betaSegments: ['beta', 'alpha', 'internal', 'employee'], +}; + +/** + * Check if user matches any exclusion pattern. + */ +export function matchesExclusion( + userId: string, + segments: string[] = [] +): boolean { + // Check patterns + for (const pattern of DEFAULT_EXCLUSIONS.internalPatterns) { + if (pattern.test(userId)) return true; + } + + // Check segments + for (const seg of segments) { + if (DEFAULT_EXCLUSIONS.betaSegments.includes(seg)) return true; + } + + return false; +} diff --git a/services/platform-service/src/modules/ai-diagnostics/embedding-client.ts b/services/platform-service/src/modules/ai-diagnostics/embedding-client.ts new file mode 100644 index 00000000..7ff63d43 --- /dev/null +++ b/services/platform-service/src/modules/ai-diagnostics/embedding-client.ts @@ -0,0 +1,364 @@ +import { config } from '../../lib/config.js'; +import type { FastifyBaseLogger } from 'fastify'; + +// ============================================================================ +// Azure OpenAI Embedding Client +// ============================================================================ + +const EMBEDDING_MODEL = 'text-embedding-3-small'; +const EMBEDDING_DIMENSIONS = 1536; +const MAX_BATCH_SIZE = 100; // Azure OpenAI limit +const MAX_TOKENS_PER_REQUEST = 8192; + +interface EmbeddingResponse { + object: 'list'; + data: Array<{ + object: 'embedding'; + embedding: number[]; + index: number; + }>; + model: string; + usage: { + prompt_tokens: number; + total_tokens: number; + }; +} + +interface EmbeddingResult { + embedding: number[]; + tokenCount: number; + model: string; +} + +interface BatchEmbeddingResult { + embeddings: Array<{ + input: string; + embedding: number[]; + index: number; + tokenCount: number; + }>; + totalTokens: number; + model: string; +} + +/** + * Generates embeddings for error text using Azure OpenAI + */ +export async function generateEmbedding( + text: string, + options: { + model?: string; + dimensions?: number; + } = {} +): Promise { + const model = options.model || EMBEDDING_MODEL; + const apiKey = config.AZURE_OPENAI_KEY; + const endpoint = config.AZURE_OPENAI_ENDPOINT; + + if (!apiKey || !endpoint) { + throw new Error('Azure OpenAI credentials not configured'); + } + + const url = `${endpoint}/openai/deployments/${model}/embeddings?api-version=2024-02-01`; + + try { + const response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'api-key': apiKey, + }, + body: JSON.stringify({ + input: text, + model, + dimensions: options.dimensions || EMBEDDING_DIMENSIONS, + }), + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error(`Azure OpenAI embedding failed: ${response.status} ${error}`); + } + + const data = (await response.json()) as EmbeddingResponse; + + if (!data.data?.[0]?.embedding) { + throw new Error('Invalid embedding response from Azure OpenAI'); + } + + return { + embedding: data.data[0].embedding, + tokenCount: data.usage?.prompt_tokens || 0, + model: data.model, + }; + } catch (error) { + console.error('Failed to generate embedding:', error); + throw error; + } +} + +/** + * Generates embeddings for multiple texts in a batch + * Respects Azure OpenAI batch limits + */ +export async function generateEmbeddingsBatch( + texts: string[], + options: { + model?: string; + dimensions?: number; + batchSize?: number; + } = {} +): Promise { + const batchSize = options.batchSize || MAX_BATCH_SIZE; + const model = options.model || EMBEDDING_MODEL; + + if (texts.length === 0) { + return { embeddings: [], totalTokens: 0, model }; + } + + // Process in chunks to respect batch limits + const chunks: string[][] = []; + for (let i = 0; i < texts.length; i += batchSize) { + chunks.push(texts.slice(i, i + batchSize)); + } + + const allEmbeddings: BatchEmbeddingResult['embeddings'] = []; + let totalTokens = 0; + + for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) { + const chunk = chunks[chunkIndex]; + const apiKey = config.AZURE_OPENAI_KEY; + const endpoint = config.AZURE_OPENAI_ENDPOINT; + + if (!apiKey || !endpoint) { + throw new Error('Azure OpenAI credentials not configured'); + } + + const url = `${endpoint}/openai/deployments/${model}/embeddings?api-version=2024-02-01`; + + try { + const response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'api-key': apiKey, + }, + body: JSON.stringify({ + input: chunk, + model, + dimensions: options.dimensions || EMBEDDING_DIMENSIONS, + }), + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error(`Azure OpenAI batch embedding failed: ${response.status} ${error}`); + } + + const data = (await response.json()) as EmbeddingResponse; + + // Map results back to original inputs + const chunkEmbeddings = data.data.map((item) => ({ + input: chunk[item.index], + embedding: item.embedding, + index: chunkIndex * batchSize + item.index, + tokenCount: Math.floor((data.usage?.prompt_tokens || 0) / chunk.length), // Approximate + })); + + allEmbeddings.push(...chunkEmbeddings); + totalTokens += data.usage?.prompt_tokens || 0; + + // Small delay between batches to avoid rate limits + if (chunkIndex < chunks.length - 1) { + await new Promise((resolve) => setTimeout(resolve, 100)); + } + } catch (error) { + console.error('Failed to generate batch embeddings:', error); + throw error; + } + } + + return { + embeddings: allEmbeddings.sort((a, b) => a.index - b.index), + totalTokens, + model, + }; +} + +// ============================================================================ +// Error-Specific Embedding Functions +// ============================================================================ + +/** + * Creates embedding text for an error cluster + * Combines error type, message template, and stack signature + */ +export function createClusterEmbeddingText( + errorType: string, + messageTemplate: string, + stackSignature: string +): string { + const parts = [ + `Error: ${errorType}`, + `Message: ${messageTemplate}`, + ]; + + if (stackSignature) { + // Include top 3 stack frames for context + const topFrames = stackSignature.split('|').slice(0, 3).join(' -> '); + parts.push(`Stack: ${topFrames}`); + } + + return parts.join('\n'); +} + +/** + * Creates embedding text for semantic error search + */ +export function createSearchEmbeddingText(query: string): string { + // Normalize query for better embedding matching + return query + .toLowerCase() + .replace(/[^\w\s]/g, ' ') + .replace(/\s+/g, ' ') + .trim(); +} + +// ============================================================================ +// Vector Utilities +// ============================================================================ + +/** + * Calculates cosine similarity between two vectors + */ +export function cosineSimilarity(a: number[], b: number[]): number { + if (a.length !== b.length) { + throw new Error('Vectors must have same dimensions'); + } + + let dotProduct = 0; + let normA = 0; + let normB = 0; + + for (let i = 0; i < a.length; i++) { + dotProduct += a[i] * b[i]; + normA += a[i] * a[i]; + normB += b[i] * b[i]; + } + + if (normA === 0 || normB === 0) { + return 0; + } + + return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB)); +} + +/** + * Calculates Euclidean distance between two vectors + */ +export function euclideanDistance(a: number[], b: number[]): number { + if (a.length !== b.length) { + throw new Error('Vectors must have same dimensions'); + } + + let sum = 0; + for (let i = 0; i < a.length; i++) { + const diff = a[i] - b[i]; + sum += diff * diff; + } + + return Math.sqrt(sum); +} + +/** + * Normalizes a vector to unit length + */ +export function normalizeVector(vector: number[]): number[] { + const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0)); + if (norm === 0) return vector; + return vector.map((val) => val / norm); +} + +// ============================================================================ +// Embedding Cache +// ============================================================================ + +interface CacheEntry { + embedding: number[]; + timestamp: number; +} + +class EmbeddingCache { + private cache = new Map(); + private ttlMs: number; + + constructor(ttlHours: number = 24) { + this.ttlMs = ttlHours * 60 * 60 * 1000; + } + + get(text: string): number[] | null { + const key = this.hashText(text); + const entry = this.cache.get(key); + + if (!entry) return null; + + // Check TTL + if (Date.now() - entry.timestamp > this.ttlMs) { + this.cache.delete(key); + return null; + } + + return entry.embedding; + } + + set(text: string, embedding: number[]): void { + const key = this.hashText(text); + this.cache.set(key, { + embedding, + timestamp: Date.now(), + }); + } + + private hashText(text: string): string { + // Simple hash for cache key + let hash = 0; + for (let i = 0; i < text.length; i++) { + const char = text.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; + } + return hash.toString(); + } +} + +// Global cache instance +export const embeddingCache = new EmbeddingCache(24); + +/** + * Generates embedding with caching + */ +export async function generateEmbeddingCached( + text: string, + options: { + model?: string; + dimensions?: number; + } = {} +): Promise { + // Check cache first + const cached = embeddingCache.get(text); + if (cached) { + return { + embedding: cached, + tokenCount: 0, + model: options.model || EMBEDDING_MODEL, + }; + } + + // Generate new embedding + const result = await generateEmbedding(text, options); + + // Cache it + embeddingCache.set(text, result.embedding); + + return result; +} diff --git a/services/platform-service/src/modules/predictive-analytics/feature-extractor.ts b/services/platform-service/src/modules/predictive-analytics/feature-extractor.ts index add0245f..8e27bd09 100644 --- a/services/platform-service/src/modules/predictive-analytics/feature-extractor.ts +++ b/services/platform-service/src/modules/predictive-analytics/feature-extractor.ts @@ -10,150 +10,106 @@ import type { TelemetryEventDoc } from '../telemetry/types.js'; // ============================================================================ export interface UserBehaviorFeatures { - // Recency features daysSinceLastSession: number; daysSinceLastCoreAction: number; hoursSinceLastLogin: number; - - // Frequency features sessionsLast24Hours: number; sessionsLast7Days: number; sessionsLast30Days: number; avgSessionsPerWeek: number; avgSessionsPerDay: number; - - // Session depth avgSessionDurationMinutes: number; totalSessionDurationMinutes: number; actionsPerSession: number; uniqueFeaturesUsed: number; - - // Engagement trends - sessionFrequencyTrend: number; // -1 to 1 (declining to increasing) + sessionFrequencyTrend: number; engagementDepthTrend: number; } export interface EngagementFeatures { - // Feature usage diversity - featureUsageDiversity: number; // 0-1 (normalized unique features / total features) + featureUsageDiversity: number; coreActionCompletionRate: number; - featureAdoptionVelocity: number; // new features tried per week - - // Product-specific engagement - powerUserScore: number; // 0-1 based on advanced feature usage + featureAdoptionVelocity: number; + powerUserScore: number; onboardingCompletionRate: number; firstValueMomentAchieved: boolean; timeToFirstValueHours: number; } export interface PerformanceFeatures { - // Error/stability exposure errorRateLast7Days: number; errorRateLast30Days: number; crashCountLast7Days: number; crashCountLast30Days: number; - - // Performance perception avgLatencyMs: number; slowRequestCount: number; timeoutCount: number; - - // Recovery behavior errorRecoveryRate: number; supportTicketCount: number; } export interface SocialFeatures { - // Sharing/collaboration shareCount: number; inviteCount: number; collaborationScore: number; - - // Network effects teamMemberCount: number; integrationsConnected: number; externalSharesLast30Days: number; } export interface RevenueFeatures { - // Payment history - planTier: number; // 0=free, 1=pro, 2=enterprise + planTier: number; lifetimeValue: number; mrrContribution: number; - - // Plan changes upgradeCount: number; downgradeCount: number; daysSinceLastPayment: number; daysSincePlanChange: number; - - // Support supportTicketCount: number; supportSatisfactionScore: number; escalatedTicketCount: number; } export interface RollingWindowFeatures { - // 7-day rolling averages rollingAvgSessions7d: number; rollingAvgDuration7d: number; rollingAvgActions7d: number; - - // Week-over-week change (acceleration) - wowSessionChange: number; // % change + wowSessionChange: number; wowDurationChange: number; wowActionsChange: number; - - // Cohort comparison (normalized vs similar users) - cohortSessionPercentile: number; // 0-100 + cohortSessionPercentile: number; cohortEngagementPercentile: number; cohortRetentionPercentile: number; } export interface ProductSpecificFeatures { - // NomGap fastCompletionRate?: number; protocolAdherenceScore?: number; streakLength?: number; autophagyEngagementScore?: number; - - // JarvisJr agentDiversityScore?: number; voiceSessionRatio?: number; skillProgressionRate?: number; sessionCompletionRate?: number; - - // ChronoMind timerCompletionRate?: number; cascadeEffectiveness?: number; routineAdherenceScore?: number; urgencyResponseRate?: number; - - // MindLyst brainUsageDiversity?: number; triageAccuracyScore?: number; memoryCaptureFrequency?: number; reflectionCompletionRate?: number; - - // PeakPulse activitySessionFrequency?: number; goalCompletionRate?: number; streakMaintenanceScore?: number; socialSharingCount?: number; - - // LysnrAI dictationFrequency?: number; accuracyRate?: number; hotkeyUsageRate?: number; vocabularyGrowthRate?: number; } -// ============================================================================ -// Time Window Aggregations -// ============================================================================ - export interface TimeWindowFeatures { - // Last 24 hours (recent behavior) recent: { sessionCount: number; totalDuration: number; @@ -161,8 +117,6 @@ export interface TimeWindowFeatures { errorCount: number; uniqueFeatures: string[]; }; - - // Last 7 days (weekly patterns) weekly: { sessionCount: number; totalDuration: number; @@ -171,8 +125,6 @@ export interface TimeWindowFeatures { uniqueFeatures: string[]; daysActive: number; }; - - // Last 30 days (monthly trends) monthly: { sessionCount: number; totalDuration: number; @@ -181,8 +133,6 @@ export interface TimeWindowFeatures { uniqueFeatures: string[]; daysActive: number; }; - - // Life-to-date (all-time totals) lifetime: { totalSessions: number; totalDuration: number; @@ -193,19 +143,11 @@ export interface TimeWindowFeatures { }; } -// ============================================================================ -// Complete Feature Vector -// ============================================================================ - export interface CompleteFeatureVector { userId: string; productId: string; computedAt: Date; - observationWindow: { - start: Date; - end: Date; - }; - + observationWindow: { start: Date; end: Date }; behavior: UserBehaviorFeatures; engagement: EngagementFeatures; performance: PerformanceFeatures; @@ -214,16 +156,10 @@ export interface CompleteFeatureVector { rolling: RollingWindowFeatures; productSpecific: ProductSpecificFeatures; timeWindows: TimeWindowFeatures; - - // Metadata featureSchemaVersion: string; - dataQualityScore: number; // 0-1 based on completeness + dataQualityScore: number; } -// ============================================================================ -// Feature Extraction Functions -// ============================================================================ - const SCHEMA_VERSION = '1.0.0'; export function extractFeaturesFromTelemetry( @@ -235,46 +171,25 @@ export function extractFeaturesFromTelemetry( const observationStart = new Date(referenceDate); observationStart.setDate(observationStart.getDate() - 30); - // Filter events to observation window const windowedEvents = events.filter( - (e) => new Date(e.timestamp) >= observationStart && new Date(e.timestamp) <= referenceDate + (e) => new Date(e.occurredAt) >= observationStart && new Date(e.occurredAt) <= referenceDate ); - // Extract time windows const timeWindows = extractTimeWindows(windowedEvents, referenceDate); - - // Extract behavior features const behavior = extractBehaviorFeatures(windowedEvents, timeWindows, referenceDate); - - // Extract engagement features const engagement = extractEngagementFeatures(windowedEvents, timeWindows); - - // Extract performance features const performance = extractPerformanceFeatures(windowedEvents, timeWindows); - - // Extract social features const social = extractSocialFeatures(windowedEvents); - - // Extract revenue features (from events or external data) const revenue = extractRevenueFeatures(windowedEvents); - - // Extract rolling window features const rolling = extractRollingWindowFeatures(timeWindows); - - // Extract product-specific features const productSpecific = extractProductSpecificFeatures(windowedEvents, productId); - - // Calculate data quality score const dataQualityScore = calculateDataQualityScore(behavior, engagement, performance); return { userId, productId, computedAt: referenceDate, - observationWindow: { - start: observationStart, - end: referenceDate, - }, + observationWindow: { start: observationStart, end: referenceDate }, behavior, engagement, performance, @@ -288,44 +203,55 @@ export function extractFeaturesFromTelemetry( }; } -function extractTimeWindows( - events: TelemetryEventDoc[], - referenceDate: Date -): TimeWindowFeatures { +function extractTimeWindows(events: TelemetryEventDoc[], referenceDate: Date): TimeWindowFeatures { const oneDayAgo = new Date(referenceDate.getTime() - 24 * 60 * 60 * 1000); const sevenDaysAgo = new Date(referenceDate.getTime() - 7 * 24 * 60 * 60 * 1000); const thirtyDaysAgo = new Date(referenceDate.getTime() - 30 * 24 * 60 * 60 * 1000); - const recentEvents = events.filter((e) => new Date(e.timestamp) >= oneDayAgo); - const weeklyEvents = events.filter((e) => new Date(e.timestamp) >= sevenDaysAgo); - const monthlyEvents = events.filter((e) => new Date(e.timestamp) >= thirtyDaysAgo); + const recentEvents = events.filter((e) => new Date(e.occurredAt) >= oneDayAgo); + const weeklyEvents = events.filter((e) => new Date(e.occurredAt) >= sevenDaysAgo); + const monthlyEvents = events.filter((e) => new Date(e.occurredAt) >= thirtyDaysAgo); return { recent: aggregateEvents(recentEvents), - weekly: aggregateEvents(weeklyEvents, true), - monthly: aggregateEvents(monthlyEvents, true), + weekly: aggregateEventsWithDays(weeklyEvents), + monthly: aggregateEventsWithDays(monthlyEvents), lifetime: { totalSessions: countSessions(events), totalDuration: sumDurations(events), totalActions: countActions(events), totalErrors: countErrors(events), allFeaturesUsed: extractUniqueFeatures(events), - accountAgeDays: 30, // Default, should be passed as param + accountAgeDays: 30, }, }; } -function aggregateEvents( - events: TelemetryEventDoc[], - trackDaysActive = false -): { - sessionCount: number; - totalDuration: number; - actionCount: number; - errorCount: number; - uniqueFeatures: string[]; - daysActive?: number; -} { +function aggregateEvents(events: TelemetryEventDoc[]) { + const sessions = new Set(); + const features = new Set(); + let totalDuration = 0; + let actionCount = 0; + let errorCount = 0; + + for (const event of events) { + if (event.sessionId) sessions.add(event.sessionId); + if (event.feature) features.add(event.feature); + if (event.metrics?.duration) totalDuration += event.metrics.duration; + if (event.eventName?.includes('action')) actionCount++; + if (event.eventType === 'error' || event.eventType === 'fatal') errorCount++; + } + + return { + sessionCount: sessions.size, + totalDuration, + actionCount, + errorCount, + uniqueFeatures: Array.from(features), + }; +} + +function aggregateEventsWithDays(events: TelemetryEventDoc[]) { const sessions = new Set(); const features = new Set(); const activeDays = new Set(); @@ -336,14 +262,11 @@ function aggregateEvents( for (const event of events) { if (event.sessionId) sessions.add(event.sessionId); if (event.feature) features.add(event.feature); - if (trackDaysActive) { - const day = event.timestamp.split('T')[0]; - activeDays.add(day); - } - - if (event.eventType === 'action') actionCount++; - if (event.eventType === 'error') errorCount++; - if (event.duration) totalDuration += event.duration; + const day = event.occurredAt.split('T')[0]; + activeDays.add(day); + if (event.metrics?.duration) totalDuration += event.metrics.duration; + if (event.eventName?.includes('action')) actionCount++; + if (event.eventType === 'error' || event.eventType === 'fatal') errorCount++; } return { @@ -352,7 +275,7 @@ function aggregateEvents( actionCount, errorCount, uniqueFeatures: Array.from(features), - daysActive: trackDaysActive ? activeDays.size : undefined, + daysActive: activeDays.size, }; } @@ -364,33 +287,17 @@ function extractBehaviorFeatures( const lastSession = findLastSession(events); const lastCoreAction = findLastCoreAction(events); - const daysSinceLastSession = lastSession - ? daysBetween(lastSession.timestamp, referenceDate) - : 30; - const daysSinceLastCoreAction = lastCoreAction - ? daysBetween(lastCoreAction.timestamp, referenceDate) - : 30; + const daysSinceLastSession = lastSession ? daysBetween(lastSession.occurredAt, referenceDate) : 30; + const daysSinceLastCoreAction = lastCoreAction ? daysBetween(lastCoreAction.occurredAt, referenceDate) : 30; const monthly = timeWindows.monthly; const weekly = timeWindows.weekly; - // Calculate averages - const avgSessionsPerWeek = monthly.daysActive - ? monthly.sessionCount / (monthly.daysActive / 7) - : 0; - const avgSessionsPerDay = monthly.daysActive - ? monthly.sessionCount / monthly.daysActive - : 0; + const avgSessionsPerWeek = monthly.daysActive ? monthly.sessionCount / (monthly.daysActive / 7) : 0; + const avgSessionsPerDay = monthly.daysActive ? monthly.sessionCount / monthly.daysActive : 0; + const avgSessionDurationMinutes = monthly.sessionCount ? monthly.totalDuration / monthly.sessionCount / 60 : 0; + const actionsPerSession = monthly.sessionCount ? monthly.actionCount / monthly.sessionCount : 0; - const avgSessionDurationMinutes = monthly.sessionCount - ? monthly.totalDuration / monthly.sessionCount / 60 - : 0; - - const actionsPerSession = monthly.sessionCount - ? monthly.actionCount / monthly.sessionCount - : 0; - - // Calculate trends const sessionFrequencyTrend = calculateTrend(weekly.sessionCount, monthly.sessionCount / 4); const engagementDepthTrend = calculateTrend( weekly.totalDuration / Math.max(weekly.sessionCount, 1), @@ -421,17 +328,12 @@ function extractEngagementFeatures( ): EngagementFeatures { const monthly = timeWindows.monthly; const allFeatures = extractUniqueFeatures(events); - const totalPossibleFeatures = 20; // Configurable based on product + const totalPossibleFeatures = 20; const featureUsageDiversity = Math.min(allFeatures.length / totalPossibleFeatures, 1); - - // Calculate core action completion (specific events indicate core actions) const coreActionEvents = events.filter((e) => e.eventName?.includes('core_action')); - const coreActionCompletionRate = monthly.actionCount - ? coreActionEvents.length / monthly.actionCount - : 0; + const coreActionCompletionRate = monthly.actionCount ? coreActionEvents.length / monthly.actionCount : 0; - // Power user score based on advanced features const advancedFeatures = allFeatures.filter((f) => ['export', 'integration', 'automation', 'advanced'].some((a) => f.includes(a)) ); @@ -440,7 +342,7 @@ function extractEngagementFeatures( return { featureUsageDiversity, coreActionCompletionRate, - featureAdoptionVelocity: monthly.uniqueFeatures.length / 4, // per week + featureAdoptionVelocity: monthly.uniqueFeatures.length / 4, powerUserScore, onboardingCompletionRate: calculateOnboardingCompletion(events), firstValueMomentAchieved: hasFirstValueMoment(events), @@ -455,29 +357,22 @@ function extractPerformanceFeatures( const monthly = timeWindows.monthly; const weekly = timeWindows.weekly; - const monthlyErrors = countErrors( - events.filter((e) => new Date(e.timestamp) >= new Date(Date.now() - 30 * 24 * 60 * 60 * 1000)) - ); + const monthlyErrors = countErrors(events.filter((e) => new Date(e.occurredAt) >= new Date(Date.now() - 30 * 24 * 60 * 60 * 1000))); const weeklyErrors = weekly.errorCount; - const errorRateLast30Days = monthly.actionCount - ? monthlyErrors / monthly.actionCount - : 0; - const errorRateLast7Days = weekly.actionCount - ? weeklyErrors / weekly.actionCount - : 0; + const errorRateLast30Days = monthly.actionCount ? monthlyErrors / monthly.actionCount : 0; + const errorRateLast7Days = weekly.actionCount ? weeklyErrors / weekly.actionCount : 0; - // Extract latency from events - const latencyEvents = events.filter((e) => e.duration && e.duration < 30000); // Filter outliers + const latencyEvents = events.filter((e) => e.metrics?.duration && e.metrics.duration < 30000); const avgLatencyMs = latencyEvents.length - ? latencyEvents.reduce((sum, e) => sum + (e.duration || 0), 0) / latencyEvents.length + ? latencyEvents.reduce((sum, e) => sum + (e.metrics?.duration || 0), 0) / latencyEvents.length : 0; return { errorRateLast7Days, errorRateLast30Days, - crashCountLast7Days: countCrashes(weeklyEvents(events)), - crashCountLast30Days: countCrashes(monthlyEvents(events)), + crashCountLast7Days: countCrashes(getWeeklyEvents(events)), + crashCountLast30Days: countCrashes(getMonthlyEvents(events)), avgLatencyMs, slowRequestCount: countSlowRequests(events), timeoutCount: countTimeouts(events), @@ -497,14 +392,12 @@ function extractSocialFeatures(events: TelemetryEventDoc[]): SocialFeatures { collaborationScore: calculateCollaborationScore(events), teamMemberCount: extractTeamMemberCount(events), integrationsConnected: integrationEvents.length, - externalSharesLast30Days: shareEvents.filter((e) => e.properties?.external === true).length, + externalSharesLast30Days: shareEvents.filter((e) => e.context?.external === true).length, }; } function extractRevenueFeatures(events: TelemetryEventDoc[]): RevenueFeatures { - const planChangeEvents = events.filter( - (e) => e.eventName?.includes('plan') || e.eventName?.includes('subscription') - ); + const planChangeEvents = events.filter((e) => e.eventName?.includes('plan') || e.eventName?.includes('subscription')); const supportEvents = events.filter((e) => e.eventName?.includes('support')); const upgrades = planChangeEvents.filter((e) => e.eventName?.includes('upgrade')).length; @@ -520,7 +413,7 @@ function extractRevenueFeatures(events: TelemetryEventDoc[]): RevenueFeatures { daysSincePlanChange: extractDaysSincePlanChange(events), supportTicketCount: supportEvents.length, supportSatisfactionScore: calculateSupportSatisfaction(supportEvents), - escalatedTicketCount: supportEvents.filter((e) => e.properties?.escalated).length, + escalatedTicketCount: supportEvents.filter((e) => e.context?.escalated).length, }; } @@ -528,52 +421,31 @@ function extractRollingWindowFeatures(timeWindows: TimeWindowFeatures): RollingW const monthly = timeWindows.monthly; const weekly = timeWindows.weekly; - // 7-day rolling averages const rollingAvgSessions7d = weekly.sessionCount / 7; - const rollingAvgDuration7d = weekly.sessionCount - ? weekly.totalDuration / weekly.sessionCount / 60 - : 0; + const rollingAvgDuration7d = weekly.sessionCount ? weekly.totalDuration / weekly.sessionCount / 60 : 0; const rollingAvgActions7d = weekly.sessionCount ? weekly.actionCount / weekly.sessionCount : 0; - // Week-over-week change (comparing current week to average week in month) const avgWeekInMonth = monthly.sessionCount / 4; const wowSessionChange = avgWeekInMonth ? (weekly.sessionCount - avgWeekInMonth) / avgWeekInMonth : 0; - const avgDurationWeekInMonth = monthly.sessionCount - ? monthly.totalDuration / monthly.sessionCount / 60 / 4 - : 0; + const avgDurationWeekInMonth = monthly.sessionCount ? monthly.totalDuration / monthly.sessionCount / 60 / 4 : 0; const wowDurationChange = avgDurationWeekInMonth ? (rollingAvgDuration7d - avgDurationWeekInMonth) / avgDurationWeekInMonth : 0; - // Cohort percentiles (would require cohort data - using estimates) - const cohortSessionPercentile = estimateCohortPercentile(rollingAvgSessions7d, 'sessions'); - const cohortEngagementPercentile = estimateCohortPercentile( - timeWindows.monthly.uniqueFeatures.length, - 'features' - ); - const cohortRetentionPercentile = estimateCohortPercentile( - monthly.daysActive || 0, - 'retention' - ); - return { rollingAvgSessions7d, rollingAvgDuration7d, rollingAvgActions7d, wowSessionChange, wowDurationChange, - wowActionsChange: wowSessionChange, // Correlated with session change - cohortSessionPercentile, - cohortEngagementPercentile, - cohortRetentionPercentile, + wowActionsChange: wowSessionChange, + cohortSessionPercentile: estimateCohortPercentile(rollingAvgSessions7d, 'sessions'), + cohortEngagementPercentile: estimateCohortPercentile(timeWindows.monthly.uniqueFeatures.length, 'features'), + cohortRetentionPercentile: estimateCohortPercentile(monthly.daysActive || 0, 'retention'), }; } -// ============================================================================ -// Product-Specific Feature Extraction -// ============================================================================ - export function extractProductSpecificFeatures( events: TelemetryEventDoc[], productId: string @@ -600,12 +472,10 @@ function extractNomGapFeatures(events: TelemetryEventDoc[]): ProductSpecificFeat const fastEvents = events.filter((e) => e.feature === 'fasting'); const completedFasts = fastEvents.filter((e) => e.eventName === 'fast_completed'); const totalFasts = fastEvents.filter((e) => e.eventName === 'fast_started').length; - const protocolEvents = events.filter((e) => e.feature === 'protocol'); - const adheredProtocols = protocolEvents.filter((e) => e.properties?.adhered).length; - + const adheredProtocols = protocolEvents.filter((e) => e.context?.adhered).length; const streakEvents = events.filter((e) => e.eventName?.includes('streak')); - const currentStreak = Math.max(...streakEvents.map((e) => e.properties?.streakLength || 0), 0); + const currentStreak = Math.max(...streakEvents.map((e) => (e.context?.streakLength as number) || 0), 0); return { fastCompletionRate: totalFasts ? completedFasts.length / totalFasts : 0, @@ -617,19 +487,16 @@ function extractNomGapFeatures(events: TelemetryEventDoc[]): ProductSpecificFeat function extractJarvisJrFeatures(events: TelemetryEventDoc[]): ProductSpecificFeatures { const agentEvents = events.filter((e) => e.feature === 'agent'); - const uniqueAgents = new Set(agentEvents.map((e) => e.properties?.agentId)).size; - - const voiceEvents = events.filter((e) => e.properties?.mode === 'voice'); - const textEvents = events.filter((e) => e.properties?.mode === 'text'); + const uniqueAgents = new Set(agentEvents.map((e) => e.context?.agentId as string)).size; + const voiceEvents = events.filter((e) => e.context?.mode === 'voice'); + const textEvents = events.filter((e) => e.context?.mode === 'text'); const totalSessions = voiceEvents.length + textEvents.length; - const skillEvents = events.filter((e) => e.eventName?.includes('skill')); - const skillProgression = calculateSkillProgression(skillEvents); return { agentDiversityScore: Math.min(uniqueAgents / 3, 1), voiceSessionRatio: totalSessions ? voiceEvents.length / totalSessions : 0, - skillProgressionRate: skillProgression, + skillProgressionRate: calculateSkillProgression(skillEvents), sessionCompletionRate: calculateSessionCompletionRate(events), }; } @@ -638,12 +505,9 @@ function extractChronoMindFeatures(events: TelemetryEventDoc[]): ProductSpecific const timerEvents = events.filter((e) => e.feature === 'timer'); const completedTimers = timerEvents.filter((e) => e.eventName === 'timer_completed').length; const totalTimers = timerEvents.filter((e) => e.eventName === 'timer_started').length; - const cascadeEvents = events.filter((e) => e.feature === 'cascade'); - const acknowledgedCascades = cascadeEvents.filter((e) => e.properties?.acknowledged).length; - + const acknowledgedCascades = cascadeEvents.filter((e) => e.context?.acknowledged).length; const routineEvents = events.filter((e) => e.feature === 'routine'); - const completedRoutines = routineEvents.filter((e) => e.eventName === 'routine_completed').length; return { timerCompletionRate: totalTimers ? completedTimers / totalTimers : 0, @@ -655,37 +519,31 @@ function extractChronoMindFeatures(events: TelemetryEventDoc[]): ProductSpecific function extractMindLystFeatures(events: TelemetryEventDoc[]): ProductSpecificFeatures { const brainEvents = events.filter((e) => e.feature === 'brain'); - const uniqueBrains = new Set(brainEvents.map((e) => e.properties?.brainId)).size; - + const uniqueBrains = new Set(brainEvents.map((e) => e.context?.brainId as string)).size; const triageEvents = events.filter((e) => e.eventName?.includes('triage')); - const accurateTriages = triageEvents.filter((e) => e.properties?.accurate).length; - + const accurateTriages = triageEvents.filter((e) => e.context?.accurate).length; const memoryEvents = events.filter((e) => e.eventName?.includes('memory_capture')); const reflectionEvents = events.filter((e) => e.eventName?.includes('reflection')); - const completedReflections = reflectionEvents.filter((e) => e.properties?.completed).length; + const completedReflections = reflectionEvents.filter((e) => e.context?.completed).length; return { brainUsageDiversity: Math.min(uniqueBrains / 3, 1), triageAccuracyScore: triageEvents.length ? accurateTriages / triageEvents.length : 0, - memoryCaptureFrequency: memoryEvents.length / 30, // per day - reflectionCompletionRate: reflectionEvents.length - ? completedReflections / reflectionEvents.length - : 0, + memoryCaptureFrequency: memoryEvents.length / 30, + reflectionCompletionRate: reflectionEvents.length ? completedReflections / reflectionEvents.length : 0, }; } function extractPeakPulseFeatures(events: TelemetryEventDoc[]): ProductSpecificFeatures { const sessionEvents = events.filter((e) => e.feature === 'activity_session'); const goalEvents = events.filter((e) => e.feature === 'goal'); - const completedGoals = goalEvents.filter((e) => e.properties?.completed).length; - + const completedGoals = goalEvents.filter((e) => e.context?.completed).length; const streakEvents = events.filter((e) => e.eventName?.includes('streak')); - const currentStreak = Math.max(...streakEvents.map((e) => e.properties?.streakLength || 0), 0); - + const currentStreak = Math.max(...streakEvents.map((e) => (e.context?.streakLength as number) || 0), 0); const shareEvents = events.filter((e) => e.eventName?.includes('share')); return { - activitySessionFrequency: sessionEvents.length / 30, // per day + activitySessionFrequency: sessionEvents.length / 30, goalCompletionRate: goalEvents.length ? completedGoals / goalEvents.length : 0, streakMaintenanceScore: Math.min(currentStreak / 7, 1), socialSharingCount: shareEvents.length, @@ -694,41 +552,33 @@ function extractPeakPulseFeatures(events: TelemetryEventDoc[]): ProductSpecificF function extractLysnrAIFeatures(events: TelemetryEventDoc[]): ProductSpecificFeatures { const dictationEvents = events.filter((e) => e.feature === 'dictation'); - const completedDictations = dictationEvents.filter( - (e) => e.eventName === 'dictation_completed' - ).length; - - const accuracyEvents = dictationEvents.filter((e) => e.properties?.accuracy !== undefined); + const completedDictations = dictationEvents.filter((e) => e.eventName === 'dictation_completed').length; + const accuracyEvents = dictationEvents.filter((e) => e.metrics?.accuracy !== undefined); const avgAccuracy = accuracyEvents.length - ? accuracyEvents.reduce((sum, e) => sum + (e.properties?.accuracy || 0), 0) / - accuracyEvents.length + ? accuracyEvents.reduce((sum, e) => sum + ((e.metrics?.accuracy as number) || 0), 0) / accuracyEvents.length : 0; - const hotkeyEvents = events.filter((e) => e.eventName?.includes('hotkey')); const vocabularyEvents = events.filter((e) => e.eventName?.includes('vocabulary')); return { - dictationFrequency: dictationEvents.length / 30, // per day + dictationFrequency: dictationEvents.length / 30, accuracyRate: avgAccuracy, hotkeyUsageRate: hotkeyEvents.length / Math.max(dictationEvents.length, 1), vocabularyGrowthRate: calculateVocabularyGrowth(vocabularyEvents), }; } -// ============================================================================ // Helper Functions -// ============================================================================ - function findLastSession(events: TelemetryEventDoc[]): TelemetryEventDoc | undefined { return events - .filter((e) => e.eventType === 'session_start' || e.sessionId) - .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())[0]; + .filter((e) => e.eventName?.includes('session_start') || e.sessionId) + .sort((a, b) => new Date(b.occurredAt).getTime() - new Date(a.occurredAt).getTime())[0]; } function findLastCoreAction(events: TelemetryEventDoc[]): TelemetryEventDoc | undefined { return events - .filter((e) => e.properties?.isCoreAction === true || e.eventName?.includes('core')) - .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())[0]; + .filter((e) => e.context?.isCoreAction === true || e.eventName?.includes('core')) + .sort((a, b) => new Date(b.occurredAt).getTime() - new Date(a.occurredAt).getTime())[0]; } function countSessions(events: TelemetryEventDoc[]): number { @@ -736,24 +586,23 @@ function countSessions(events: TelemetryEventDoc[]): number { } function sumDurations(events: TelemetryEventDoc[]): number { - return events.reduce((sum, e) => sum + (e.duration || 0), 0); + return events.reduce((sum, e) => sum + (e.metrics?.duration || 0), 0); } function countActions(events: TelemetryEventDoc[]): number { - return events.filter((e) => e.eventType === 'action').length; + return events.filter((e) => e.eventName?.includes('action')).length; } function countErrors(events: TelemetryEventDoc[]): number { - return events.filter((e) => e.eventType === 'error' || e.eventName?.includes('error')).length; + return events.filter((e) => e.eventType === 'error' || e.eventType === 'fatal').length; } function extractUniqueFeatures(events: TelemetryEventDoc[]): string[] { - return Array.from(new Set(events.map((e) => e.feature).filter(Boolean) as string[]); + return Array.from(new Set(events.map((e) => e.feature).filter(Boolean) as string[])); } function daysBetween(timestamp: string, reference: Date): number { - const diff = reference.getTime() - new Date(timestamp).getTime(); - return Math.floor(diff / (1000 * 60 * 60 * 24)); + return Math.floor((reference.getTime() - new Date(timestamp).getTime()) / (1000 * 60 * 60 * 24)); } function calculateTrend(current: number, baseline: number): number { @@ -768,33 +617,18 @@ function calculateDataQualityScore( ): number { let score = 0; let factors = 0; - - if (behavior.sessionsLast30Days > 0) { - score += Math.min(behavior.sessionsLast30Days / 10, 1); - factors++; - } - if (engagement.uniqueFeaturesUsed > 0) { - score += Math.min(engagement.uniqueFeaturesUsed / 5, 1); - factors++; - } - if (performance.errorRateLast30Days >= 0) { - score += 1 - performance.errorRateLast30Days; - factors++; - } - + if (behavior.sessionsLast30Days > 0) { score += Math.min(behavior.sessionsLast30Days / 10, 1); factors++; } + if (engagement.featureUsageDiversity > 0) { score += Math.min(engagement.featureUsageDiversity * 5, 1); factors++; } + if (performance.errorRateLast30Days >= 0) { score += 1 - performance.errorRateLast30Days; factors++; } return factors > 0 ? score / factors : 0; } -// Placeholder implementations for product-specific helpers function calculateAutophagyEngagement(events: TelemetryEventDoc[]): number { - const autophagyEvents = events.filter((e) => e.properties?.stage === 'autophagy'); - return Math.min(autophagyEvents.length / 10, 1); + return Math.min(events.filter((e) => e.context?.stage === 'autophagy').length / 10, 1); } function calculateSkillProgression(events: TelemetryEventDoc[]): number { - if (events.length === 0) return 0; - const progressed = events.filter((e) => e.properties?.progressed).length; - return progressed / events.length; + return events.length ? events.filter((e) => e.context?.progressed).length / events.length : 0; } function calculateSessionCompletionRate(events: TelemetryEventDoc[]): number { @@ -804,61 +638,50 @@ function calculateSessionCompletionRate(events: TelemetryEventDoc[]): number { } function calculateRoutineAdherence(events: TelemetryEventDoc[]): number { - if (events.length === 0) return 0; - const onTime = events.filter((e) => e.properties?.onTime).length; - return onTime / events.length; + return events.length ? events.filter((e) => e.context?.onTime).length / events.length : 0; } function calculateUrgencyResponse(events: TelemetryEventDoc[]): number { - const urgent = events.filter((e) => e.properties?.urgent === true); - if (urgent.length === 0) return 0; - const responded = urgent.filter((e) => e.properties?.responded).length; - return responded / urgent.length; + const urgent = events.filter((e) => e.context?.urgent === true); + return urgent.length ? urgent.filter((e) => e.context?.responded).length / urgent.length : 0; } function calculateVocabularyGrowth(events: TelemetryEventDoc[]): number { - const wordsAdded = events.reduce((sum, e) => sum + (e.properties?.wordsAdded || 0), 0); - return wordsAdded / 30; // per day + return events.reduce((sum, e) => sum + ((e.metrics?.wordsAdded as number) || 0), 0) / 30; } function calculateOnboardingCompletion(events: TelemetryEventDoc[]): number { - const onboardingSteps = events.filter((e) => e.eventName?.includes('onboarding')); - const completed = onboardingSteps.filter((e) => e.properties?.completed).length; - const totalSteps = 5; // Configurable - return Math.min(completed / totalSteps, 1); + const steps = events.filter((e) => e.eventName?.includes('onboarding')); + return Math.min(steps.filter((e) => e.context?.completed).length / 5, 1); } function hasFirstValueMoment(events: TelemetryEventDoc[]): boolean { - return events.some((e) => e.eventName?.includes('first_value') || e.properties?.ahaMoment); + return events.some((e) => e.eventName?.includes('first_value') || e.context?.ahaMoment); } function calculateTimeToFirstValue(events: TelemetryEventDoc[]): number { - const firstSession = events.find((e) => e.eventType === 'session_start'); + const firstSession = events.find((e) => e.eventName?.includes('session_start')); const firstValue = events.find((e) => e.eventName?.includes('first_value')); - if (!firstSession || !firstValue) return 0; - return ( - (new Date(firstValue.timestamp).getTime() - new Date(firstSession.timestamp).getTime()) / - (1000 * 60 * 60) - ); + return firstSession && firstValue + ? (new Date(firstValue.occurredAt).getTime() - new Date(firstSession.occurredAt).getTime()) / (1000 * 60 * 60) + : 0; } function countCrashes(events: TelemetryEventDoc[]): number { - return events.filter((e) => e.eventName?.includes('crash') || e.properties?.crash).length; + return events.filter((e) => e.eventName?.includes('crash') || e.context?.crash).length; } function countSlowRequests(events: TelemetryEventDoc[]): number { - return events.filter((e) => e.duration && e.duration > 5000).length; + return events.filter((e) => e.metrics?.duration && e.metrics.duration > 5000).length; } function countTimeouts(events: TelemetryEventDoc[]): number { - return events.filter((e) => e.properties?.timeout || e.eventName?.includes('timeout')).length; + return events.filter((e) => e.context?.timeout || e.eventName?.includes('timeout')).length; } function calculateErrorRecoveryRate(events: TelemetryEventDoc[]): number { - const errors = events.filter((e) => e.eventType === 'error'); - if (errors.length === 0) return 1; - const recovered = errors.filter((e) => e.properties?.recovered).length; - return recovered / errors.length; + const errors = events.filter((e) => e.eventType === 'error' || e.eventType === 'fatal'); + return errors.length ? errors.filter((e) => e.context?.recovered).length / errors.length : 1; } function countSupportTickets(events: TelemetryEventDoc[]): number { @@ -866,67 +689,58 @@ function countSupportTickets(events: TelemetryEventDoc[]): number { } function calculateCollaborationScore(events: TelemetryEventDoc[]): number { - const collabEvents = events.filter((e) => e.properties?.collaborative === true); - return Math.min(collabEvents.length / 10, 1); + return Math.min(events.filter((e) => e.context?.collaborative === true).length / 10, 1); } function extractTeamMemberCount(events: TelemetryEventDoc[]): number { - const teamEvents = events.filter((e) => e.properties?.teamSize !== undefined); - return teamEvents.length > 0 ? Math.max(...teamEvents.map((e) => e.properties?.teamSize || 0)) : 0; + const teamEvents = events.filter((e) => e.context?.teamSize !== undefined); + return teamEvents.length ? Math.max(...teamEvents.map((e) => (e.context?.teamSize as number) || 0)) : 0; } function extractPlanTier(events: TelemetryEventDoc[]): number { - const planEvent = events.find((e) => e.properties?.planTier !== undefined); - return planEvent?.properties?.planTier || 0; + const planEvent = events.find((e) => e.context?.planTier !== undefined); + return (planEvent?.context?.planTier as number) || 0; } function extractLifetimeValue(events: TelemetryEventDoc[]): number { - return events.reduce((sum, e) => sum + (e.properties?.revenue || 0), 0); + return events.reduce((sum, e) => sum + ((e.metrics?.revenue as number) || 0), 0); } function extractMrrContribution(events: TelemetryEventDoc[]): number { - const mrrEvent = events.find((e) => e.properties?.mrr !== undefined); - return mrrEvent?.properties?.mrr || 0; + const mrrEvent = events.find((e) => e.metrics?.mrr !== undefined); + return (mrrEvent?.metrics?.mrr as number) || 0; } function extractDaysSincePayment(events: TelemetryEventDoc[]): number { const paymentEvent = events .filter((e) => e.eventName?.includes('payment')) - .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())[0]; - return paymentEvent ? daysBetween(paymentEvent.timestamp, new Date()) : 30; + .sort((a, b) => new Date(b.occurredAt).getTime() - new Date(a.occurredAt).getTime())[0]; + return paymentEvent ? daysBetween(paymentEvent.occurredAt, new Date()) : 30; } function extractDaysSincePlanChange(events: TelemetryEventDoc[]): number { const planChange = events .filter((e) => e.eventName?.includes('plan_change')) - .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())[0]; - return planChange ? daysBetween(planChange.timestamp, new Date()) : 90; + .sort((a, b) => new Date(b.occurredAt).getTime() - new Date(a.occurredAt).getTime())[0]; + return planChange ? daysBetween(planChange.occurredAt, new Date()) : 90; } function calculateSupportSatisfaction(events: TelemetryEventDoc[]): number { - const ratedEvents = events.filter((e) => e.properties?.satisfaction !== undefined); - if (ratedEvents.length === 0) return 0; - const sum = ratedEvents.reduce((acc, e) => acc + (e.properties?.satisfaction || 0), 0); - return sum / ratedEvents.length; + const rated = events.filter((e) => e.context?.satisfaction !== undefined); + return rated.length ? rated.reduce((acc, e) => acc + ((e.context?.satisfaction as number) || 0), 0) / rated.length : 0; } function estimateCohortPercentile(value: number, metric: string): number { - // Simplified estimation - in production, this would query cohort data - const baselines: Record = { - sessions: 2, - features: 5, - retention: 15, - }; - const baseline = baselines[metric] || 1; - return Math.min(Math.round((value / baseline) * 50), 100); + const baselines: Record = { sessions: 2, features: 5, retention: 15 }; + return Math.min(Math.round((value / (baselines[metric] || 1)) * 50), 100); } -function weeklyEvents(events: TelemetryEventDoc[]): TelemetryEventDoc[] { +function getWeeklyEvents(events: TelemetryEventDoc[]): TelemetryEventDoc[] { const weekAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000); - return events.filter((e) => new Date(e.timestamp) >= weekAgo); + return events.filter((e) => new Date(e.occurredAt) >= weekAgo); } -function monthlyEvents(events: TelemetryEventDoc[]): TelemetryEventDoc[] { +function getMonthlyEvents(events: TelemetryEventDoc[]): TelemetryEventDoc[] { const monthAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000); - return events.filter((e) => new Date(e.timestamp) >= monthAgo); + return events.filter((e) => new Date(e.occurredAt) >= monthAgo); }