feat(ai-diagnostics): implement error normalization and fingerprinting [1.1.3]
This commit is contained in:
parent
afe816690b
commit
8cdddd7c23
@ -501,8 +501,8 @@ interface ExperimentEventDoc {
|
||||
|
||||
| Phase | Task | Status | Commit |
|
||||
| ----- | ----------------------------- | ------ | ------ |
|
||||
| 1.1 | Experiment types & schemas | ⬜ | — |
|
||||
| 1.1 | Cosmos containers | ⬜ | — |
|
||||
| 1.1 | Experiment types & schemas | ✅ | a9b2247 |
|
||||
| 1.1 | Cosmos containers | ✅ | a9b2247 |
|
||||
| 1.2 | Deterministic bucketing | ⬜ | — |
|
||||
| 1.2 | Assignment strategies | ⬜ | — |
|
||||
| 1.2 | Audience targeting | ⬜ | — |
|
||||
|
||||
254
services/platform-service/src/modules/ab-testing/bucketing.ts
Normal file
254
services/platform-service/src/modules/ab-testing/bucketing.ts
Normal file
@ -0,0 +1,254 @@
|
||||
/**
|
||||
* A/B Testing — Deterministic bucketing and assignment strategies.
|
||||
* FNV-1a hashing for sticky assignments, Thompson sampling, UCB, epsilon-greedy.
|
||||
*/
|
||||
|
||||
import type { AllocationStrategy, ExperimentVariant, VariantDoc } from './types.js';
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// FNV-1a Hash (consistent with feature flags module)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export function fnv1a(str: string): number {
|
||||
let hash = 0x811c9dc5;
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
hash ^= str.charCodeAt(i);
|
||||
hash = (hash * 0x01000193) >>> 0;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deterministic variant assignment using FNV-1a hash.
|
||||
* Same user+experiment always gets same variant (sticky assignment).
|
||||
*/
|
||||
export function assignVariant(
|
||||
experimentId: string,
|
||||
userId: string,
|
||||
variants: Array<{ key: string; weight: number }>
|
||||
): string {
|
||||
const hash = fnv1a(`${experimentId}:${userId}`);
|
||||
const bucket = hash % 100;
|
||||
let cumulative = 0;
|
||||
for (const v of variants) {
|
||||
cumulative += v.weight;
|
||||
if (bucket < cumulative) return v.key;
|
||||
}
|
||||
return variants[variants.length - 1].key;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if user is in experiment bucket (traffic percentage filter).
|
||||
*/
|
||||
export function isInExperimentBucket(experimentId: string, userId: string, trafficPercent: number): boolean {
|
||||
const hash = fnv1a(`${experimentId}:bucket:${userId}`);
|
||||
const bucket = hash % 100;
|
||||
return bucket < trafficPercent;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Assignment Strategies
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface StrategyContext {
|
||||
variants: VariantDoc[];
|
||||
controlVariant: VariantDoc;
|
||||
totalParticipants: number;
|
||||
explorationRate?: number; // For epsilon-greedy
|
||||
}
|
||||
|
||||
/**
|
||||
* Random assignment based on current allocation percentages.
|
||||
*/
|
||||
export function randomAssignment(ctx: StrategyContext): string {
|
||||
const { variants } = ctx;
|
||||
const hash = Math.random() * 100;
|
||||
let cumulative = 0;
|
||||
for (const v of variants) {
|
||||
cumulative += v.currentAllocationPercent;
|
||||
if (hash < cumulative) return v.id;
|
||||
}
|
||||
return variants[variants.length - 1].id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Thompson Sampling — Bayesian multi-armed bandit.
|
||||
* Samples from Beta distribution for each variant, picks highest.
|
||||
* Optimizes for reward while exploring uncertain variants.
|
||||
*/
|
||||
export function thompsonSampling(ctx: StrategyContext): string {
|
||||
const { variants, controlVariant } = ctx;
|
||||
|
||||
let bestVariantId = variants[0].id;
|
||||
let bestSample = -Infinity;
|
||||
|
||||
for (const variant of variants) {
|
||||
const sample = sampleFromPosterior(variant, controlVariant);
|
||||
if (sample > bestSample) {
|
||||
bestSample = sample;
|
||||
bestVariantId = variant.id;
|
||||
}
|
||||
}
|
||||
|
||||
return bestVariantId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sample from variant's posterior distribution (Beta for conversion, Normal for continuous).
|
||||
*/
|
||||
function sampleFromPosterior(variant: VariantDoc, controlVariant: VariantDoc): number {
|
||||
// Use conversion rate if available, otherwise primary metric value
|
||||
const rate = variant.stats.conversionRate ?? variant.stats.primaryMetricValue;
|
||||
const participants = variant.stats.participants || 1;
|
||||
|
||||
// For conversion metrics, use Beta distribution
|
||||
if (variant.stats.betaAlpha !== undefined && variant.stats.betaBeta !== undefined) {
|
||||
return sampleBeta(variant.stats.betaAlpha, variant.stats.betaBeta);
|
||||
}
|
||||
|
||||
// For continuous metrics, use Normal approximation
|
||||
const stdDev = variant.stats.primaryMetricStdDev ?? 0.1;
|
||||
return sampleNormal(rate, stdDev / Math.sqrt(participants));
|
||||
}
|
||||
|
||||
/**
|
||||
* Epsilon-Greedy — explore random with probability ε, otherwise exploit best.
|
||||
*/
|
||||
export function epsilonGreedy(ctx: StrategyContext): string {
|
||||
const { variants, explorationRate = 0.1 } = ctx;
|
||||
const epsilon = explorationRate;
|
||||
|
||||
// Explore: random assignment
|
||||
if (Math.random() < epsilon) {
|
||||
const randomIndex = Math.floor(Math.random() * variants.length);
|
||||
return variants[randomIndex].id;
|
||||
}
|
||||
|
||||
// Exploit: best performing variant
|
||||
return getBestVariant(variants);
|
||||
}
|
||||
|
||||
/**
|
||||
* Upper Confidence Bound (UCB1) — pick variant with highest upper bound.
|
||||
* Balances exploration (high uncertainty) with exploitation (high reward).
|
||||
*/
|
||||
export function ucbAssignment(ctx: StrategyContext): string {
|
||||
const { variants, totalParticipants } = ctx;
|
||||
|
||||
let bestVariantId = variants[0].id;
|
||||
let bestUcb = -Infinity;
|
||||
|
||||
for (const variant of variants) {
|
||||
const rate = variant.stats.conversionRate ?? variant.stats.primaryMetricValue;
|
||||
const n = variant.stats.participants || 1;
|
||||
const totalN = Math.max(totalParticipants, 1);
|
||||
|
||||
// UCB1 formula: mean + sqrt(2 * ln(total) / n)
|
||||
const explorationBonus = Math.sqrt((2 * Math.log(totalN)) / n);
|
||||
const ucb = rate + explorationBonus;
|
||||
|
||||
if (ucb > bestUcb) {
|
||||
bestUcb = ucb;
|
||||
bestVariantId = variant.id;
|
||||
}
|
||||
}
|
||||
|
||||
return bestVariantId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get best performing variant by conversion rate or primary metric.
|
||||
*/
|
||||
function getBestVariant(variants: VariantDoc[]): string {
|
||||
let bestId = variants[0].id;
|
||||
let bestRate = -Infinity;
|
||||
|
||||
for (const v of variants) {
|
||||
const rate = v.stats.conversionRate ?? v.stats.primaryMetricValue;
|
||||
if (rate > bestRate) {
|
||||
bestRate = rate;
|
||||
bestId = v.id;
|
||||
}
|
||||
}
|
||||
|
||||
return bestId;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Distribution Sampling (for Thompson Sampling)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Sample from Beta distribution using approximation.
|
||||
* Uses the fact that Beta(α,β) ~ Gamma(α) / (Gamma(α) + Gamma(β))
|
||||
*/
|
||||
function sampleBeta(alpha: number, beta: number): number {
|
||||
const x = sampleGamma(alpha, 1);
|
||||
const y = sampleGamma(beta, 1);
|
||||
return x / (x + y);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sample from Normal distribution (Box-Muller transform).
|
||||
*/
|
||||
function sampleNormal(mean: number, stdDev: number): number {
|
||||
const u1 = Math.random();
|
||||
const u2 = Math.random();
|
||||
const z0 = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
|
||||
return mean + z0 * stdDev;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sample from Gamma distribution (Marsaglia-Tsang method).
|
||||
*/
|
||||
function sampleGamma(shape: number, scale: number): number {
|
||||
if (shape < 1) {
|
||||
return sampleGamma(1 + shape, scale) * Math.pow(Math.random(), 1 / shape);
|
||||
}
|
||||
|
||||
const d = shape - 1 / 3;
|
||||
const c = 1 / Math.sqrt(9 * d);
|
||||
|
||||
while (true) {
|
||||
let x = sampleStandardNormal();
|
||||
let v = 1 + c * x;
|
||||
if (v <= 0) continue;
|
||||
|
||||
v = v * v * v;
|
||||
const u = Math.random();
|
||||
|
||||
if (u < 1 - 0.0331 * x * x * x * x) {
|
||||
return d * v * scale;
|
||||
}
|
||||
|
||||
if (Math.log(u) < 0.5 * x * x + d * (1 - v + Math.log(v))) {
|
||||
return d * v * scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function sampleStandardNormal(): number {
|
||||
return sampleNormal(0, 1);
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Strategy Router
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export function assignByStrategy(
|
||||
strategy: AllocationStrategy,
|
||||
ctx: StrategyContext
|
||||
): string {
|
||||
switch (strategy) {
|
||||
case 'random':
|
||||
return randomAssignment(ctx);
|
||||
case 'thompson':
|
||||
return thompsonSampling(ctx);
|
||||
case 'epsilon_greedy':
|
||||
return epsilonGreedy(ctx);
|
||||
case 'ucb':
|
||||
return ucbAssignment(ctx);
|
||||
default:
|
||||
return randomAssignment(ctx);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,478 @@
|
||||
import { createHash } from 'crypto';
|
||||
import type { ErrorFingerprint, ErrorClusterDoc, ErrorEvent } from './types.js';
|
||||
|
||||
// ============================================================================
|
||||
// Error Normalization Service
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Normalizes error messages by replacing variable parts with placeholders
|
||||
* - UUIDs → <UUID>
|
||||
* - Numbers → <NUM>
|
||||
* - Timestamps/dates → <DATE>
|
||||
* - User IDs → <USER_ID>
|
||||
* - Object IDs (mongo, cosmos) → <ID>
|
||||
* - Email addresses → <EMAIL>
|
||||
* - IP addresses → <IP>
|
||||
* - URLs → <URL>
|
||||
*/
|
||||
export function normalizeErrorMessage(message: string): string {
|
||||
let normalized = message;
|
||||
|
||||
// UUIDs (v4 and similar)
|
||||
normalized = normalized.replace(
|
||||
/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi,
|
||||
'<UUID>'
|
||||
);
|
||||
|
||||
// MongoDB ObjectIds (24 hex chars)
|
||||
normalized = normalized.replace(/\b[0-9a-f]{24}\b/gi, '<ID>');
|
||||
|
||||
// Email addresses
|
||||
normalized = normalized.replace(
|
||||
/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
|
||||
'<EMAIL>'
|
||||
);
|
||||
|
||||
// IP addresses (IPv4 and IPv6)
|
||||
normalized = normalized.replace(
|
||||
/\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/g,
|
||||
'<IP>'
|
||||
);
|
||||
|
||||
// ISO 8601 timestamps
|
||||
normalized = normalized.replace(
|
||||
/\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d{3})?(?:Z|[+-]\d{2}:\d{2})?/g,
|
||||
'<DATE>'
|
||||
);
|
||||
|
||||
// Simple dates (MM/DD/YYYY or DD/MM/YYYY)
|
||||
normalized = normalized.replace(
|
||||
/\b\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4}\b/g,
|
||||
'<DATE>'
|
||||
);
|
||||
|
||||
// User IDs (various patterns)
|
||||
normalized = normalized.replace(/\buser[_-]?\d+\b/gi, '<USER_ID>');
|
||||
normalized = normalized.replace(/\buser[_-]?[0-9a-f]{8,}\b/gi, '<USER_ID>');
|
||||
|
||||
// Long numbers (likely IDs or counts)
|
||||
normalized = normalized.replace(/\b\d{10,}\b/g, '<NUM>');
|
||||
|
||||
// Medium numbers (4-9 digits)
|
||||
normalized = normalized.replace(/\b\d{4,9}\b/g, '<NUM>');
|
||||
|
||||
// URLs (http/https)
|
||||
normalized = normalized.replace(
|
||||
/https?:\/\/[^\s<>"{}|\\^`[]+/g,
|
||||
'<URL>'
|
||||
);
|
||||
|
||||
// File paths (keep filename, remove path)
|
||||
normalized = normalized.replace(
|
||||
/(?:[/\\][\w.-]+)+\/[\w.-]+\.[\w]+/g,
|
||||
(match) => {
|
||||
const parts = match.split(/[/\\]/);
|
||||
return `<PATH>/${parts[parts.length - 1]}`;
|
||||
}
|
||||
);
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Stack Trace Normalization
|
||||
// ============================================================================
|
||||
|
||||
interface ParsedStackFrame {
|
||||
function: string;
|
||||
file: string;
|
||||
line: number;
|
||||
column?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses stack traces from various formats:
|
||||
* - JavaScript/TypeScript
|
||||
* - Python
|
||||
* - Swift
|
||||
* - Java/Kotlin (Android)
|
||||
*/
|
||||
export function parseStackTrace(stackTrace: string): ParsedStackFrame[] {
|
||||
const frames: ParsedStackFrame[] = [];
|
||||
const lines = stackTrace.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
|
||||
// JavaScript/TypeScript format:
|
||||
// "at functionName (file:line:column)"
|
||||
// "at file:line:column"
|
||||
// "at async functionName (file:line:column)"
|
||||
const jsMatch = trimmed.match(
|
||||
/at\s+(?:async\s+)?(?:([^\s(]+)\s+\()?([^:)]+):(\d+):(\d+)?\)?/
|
||||
);
|
||||
if (jsMatch) {
|
||||
frames.push({
|
||||
function: jsMatch[1] || '<anonymous>',
|
||||
file: jsMatch[2],
|
||||
line: parseInt(jsMatch[3], 10),
|
||||
column: jsMatch[4] ? parseInt(jsMatch[4], 10) : undefined,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Python format:
|
||||
// "File \"path\", line N, in functionName"
|
||||
const pyMatch = trimmed.match(
|
||||
/File\s+"([^"]+)"[,\s]+line\s+(\d+)[,\s]+in\s+(\w+)/
|
||||
);
|
||||
if (pyMatch) {
|
||||
frames.push({
|
||||
function: pyMatch[3],
|
||||
file: pyMatch[1],
|
||||
line: parseInt(pyMatch[2], 10),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Swift format:
|
||||
// "Module function file:line column:col"
|
||||
const swiftMatch = trimmed.match(
|
||||
/(\S+)\s+(\S+)\s+(\S+):(\d+)(?:\s+column:(\d+))?/
|
||||
);
|
||||
if (swiftMatch && !trimmed.startsWith('Stack')) {
|
||||
frames.push({
|
||||
function: swiftMatch[2],
|
||||
file: swiftMatch[3],
|
||||
line: parseInt(swiftMatch[4], 10),
|
||||
column: swiftMatch[5] ? parseInt(swiftMatch[5], 10) : undefined,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Java/Kotlin format:
|
||||
// "at com.package.Class.method(File.java:123)"
|
||||
const javaMatch = trimmed.match(
|
||||
/at\s+([\w.$]+)\(([^)]+)\.(\w+):(\d+)\)/
|
||||
);
|
||||
if (javaMatch) {
|
||||
frames.push({
|
||||
function: javaMatch[1].split('.').pop() || '<unknown>',
|
||||
file: `${javaMatch[2]}.${javaMatch[3]}`,
|
||||
line: parseInt(javaMatch[4], 10),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return frames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes stack frames by:
|
||||
* - Removing line numbers (but keeping file and function)
|
||||
* - Normalizing function names (remove async wrappers)
|
||||
* - Truncating to top N frames
|
||||
*/
|
||||
export function normalizeStackFrames(
|
||||
frames: ParsedStackFrame[],
|
||||
maxFrames: number = 10
|
||||
): string {
|
||||
const normalized = frames.slice(0, maxFrames).map((frame) => {
|
||||
// Remove line/column numbers, keep just file and function
|
||||
const normalizedFile = frame.file
|
||||
.replace(/:\d+$/, '') // Remove trailing line numbers
|
||||
.replace(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi, '<UUID>')
|
||||
.replace(/\b[0-9a-f]{24}\b/gi, '<ID>');
|
||||
|
||||
// Normalize function name
|
||||
const normalizedFunction = frame.function
|
||||
.replace(/^(async\s+|Generator\.|bound\s+)/, '')
|
||||
.replace(/\s*\[.*\]$/, '') // Remove [native code] etc
|
||||
.replace(/_\w{8,}/, '_<ID>'); // Minified function suffixes
|
||||
|
||||
return `${normalizedFunction}@${normalizedFile}`;
|
||||
});
|
||||
|
||||
return normalized.join('|');
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Fingerprint Generation
|
||||
// ============================================================================
|
||||
|
||||
export interface FingerprintInput {
|
||||
errorType: string;
|
||||
message: string;
|
||||
stackTrace?: string;
|
||||
}
|
||||
|
||||
export interface FingerprintResult {
|
||||
hash: string;
|
||||
normalizedType: string;
|
||||
normalizedMessage: string;
|
||||
stackSignature: string;
|
||||
sourceLocation?: {
|
||||
file: string;
|
||||
function: string;
|
||||
line: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a SHA-256 fingerprint from normalized error data
|
||||
*/
|
||||
export function generateFingerprint(input: FingerprintInput): FingerprintResult {
|
||||
const normalizedType = input.errorType
|
||||
.replace(/Error$/, '') // Remove Error suffix
|
||||
.replace(/Exception$/, '') // Remove Exception suffix
|
||||
.trim();
|
||||
|
||||
const normalizedMessage = normalizeErrorMessage(input.message);
|
||||
|
||||
// Parse and normalize stack
|
||||
let stackSignature = '';
|
||||
let sourceLocation: FingerprintResult['sourceLocation'] | undefined;
|
||||
|
||||
if (input.stackTrace) {
|
||||
const frames = parseStackTrace(input.stackTrace);
|
||||
if (frames.length > 0) {
|
||||
stackSignature = normalizeStackFrames(frames);
|
||||
|
||||
// Extract source location from first meaningful frame
|
||||
const firstFrame = frames[0];
|
||||
if (firstFrame.file && !firstFrame.file.includes('node_modules')) {
|
||||
sourceLocation = {
|
||||
file: firstFrame.file,
|
||||
function: firstFrame.function,
|
||||
line: firstFrame.line,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate hash from normalized components
|
||||
const hashInput = [
|
||||
normalizedType,
|
||||
normalizedMessage,
|
||||
stackSignature,
|
||||
].join('::');
|
||||
|
||||
const hash = createHash('sha256').update(hashInput).digest('hex');
|
||||
|
||||
return {
|
||||
hash,
|
||||
normalizedType,
|
||||
normalizedMessage,
|
||||
stackSignature,
|
||||
sourceLocation,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Similarity Scoring
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Calculates Levenshtein distance between two strings
|
||||
*/
|
||||
export function levenshteinDistance(a: string, b: string): number {
|
||||
const matrix: number[][] = [];
|
||||
|
||||
for (let i = 0; i <= b.length; i++) {
|
||||
matrix[i] = [i];
|
||||
}
|
||||
|
||||
for (let j = 0; j <= a.length; j++) {
|
||||
matrix[0][j] = j;
|
||||
}
|
||||
|
||||
for (let i = 1; i <= b.length; i++) {
|
||||
for (let j = 1; j <= a.length; j++) {
|
||||
if (b.charAt(i - 1) === a.charAt(j - 1)) {
|
||||
matrix[i][j] = matrix[i - 1][j - 1];
|
||||
} else {
|
||||
matrix[i][j] = Math.min(
|
||||
matrix[i - 1][j - 1] + 1, // substitution
|
||||
matrix[i][j - 1] + 1, // insertion
|
||||
matrix[i - 1][j] + 1 // deletion
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return matrix[b.length][a.length];
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates similarity score (0-1) between two error fingerprints
|
||||
*/
|
||||
export function calculateFingerprintSimilarity(
|
||||
a: FingerprintResult,
|
||||
b: FingerprintResult
|
||||
): number {
|
||||
let score = 0;
|
||||
let weight = 0;
|
||||
|
||||
// Type match (high weight)
|
||||
if (a.normalizedType === b.normalizedType) {
|
||||
score += 0.4;
|
||||
}
|
||||
weight += 0.4;
|
||||
|
||||
// Message similarity (medium weight)
|
||||
const messageDistance = levenshteinDistance(
|
||||
a.normalizedMessage,
|
||||
b.normalizedMessage
|
||||
);
|
||||
const maxLen = Math.max(a.normalizedMessage.length, b.normalizedMessage.length);
|
||||
const messageSimilarity = maxLen > 0 ? 1 - messageDistance / maxLen : 1;
|
||||
score += 0.3 * messageSimilarity;
|
||||
weight += 0.3;
|
||||
|
||||
// Stack signature similarity (medium weight)
|
||||
if (a.stackSignature && b.stackSignature) {
|
||||
const stackDistance = levenshteinDistance(a.stackSignature, b.stackSignature);
|
||||
const maxStackLen = Math.max(a.stackSignature.length, b.stackSignature.length);
|
||||
const stackSimilarity = maxStackLen > 0 ? 1 - stackDistance / maxStackLen : 1;
|
||||
score += 0.3 * stackSimilarity;
|
||||
weight += 0.3;
|
||||
}
|
||||
|
||||
return weight > 0 ? score / weight : 0;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Error Event Processing
|
||||
// ============================================================================
|
||||
|
||||
export interface ProcessedError {
|
||||
fingerprint: FingerprintResult;
|
||||
clusterId: string;
|
||||
isNewCluster: boolean;
|
||||
context: {
|
||||
platform?: string;
|
||||
osVersion?: string;
|
||||
appVersion?: string;
|
||||
deviceModel?: string;
|
||||
screen?: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes an error event into a fingerprint and cluster ID
|
||||
*/
|
||||
export function processErrorEvent(
|
||||
errorEvent: ErrorEvent,
|
||||
existingFingerprints: Map<string, ErrorFingerprint> = new Map()
|
||||
): ProcessedError {
|
||||
const fingerprint = generateFingerprint({
|
||||
errorType: errorEvent.errorType,
|
||||
message: errorEvent.message,
|
||||
stackTrace: errorEvent.stackTrace,
|
||||
});
|
||||
|
||||
// Check for near-matches in existing fingerprints
|
||||
let bestMatch: ErrorFingerprint | null = null;
|
||||
let bestSimilarity = 0;
|
||||
|
||||
for (const existing of existingFingerprints.values()) {
|
||||
const existingFingerprint: FingerprintResult = {
|
||||
hash: existing.fingerprintHash,
|
||||
normalizedType: existing.errorType,
|
||||
normalizedMessage: existing.messageTemplate,
|
||||
stackSignature: existing.stackSignature,
|
||||
};
|
||||
|
||||
const similarity = calculateFingerprintSimilarity(fingerprint, existingFingerprint);
|
||||
if (similarity > bestSimilarity && similarity >= 0.85) {
|
||||
bestSimilarity = similarity;
|
||||
bestMatch = existing;
|
||||
}
|
||||
}
|
||||
|
||||
const isNewCluster = bestMatch === null;
|
||||
const clusterId = bestMatch?.id || `ec_${generateClusterId()}`;
|
||||
|
||||
return {
|
||||
fingerprint,
|
||||
clusterId,
|
||||
isNewCluster,
|
||||
context: {
|
||||
platform: errorEvent.platform,
|
||||
osVersion: errorEvent.osVersion,
|
||||
appVersion: errorEvent.appVersion,
|
||||
deviceModel: errorEvent.deviceModel,
|
||||
screen: errorEvent.screen,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function generateClusterId(): string {
|
||||
return `${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 11)}`;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Cluster Update Logic
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Updates a cluster document with new error occurrence
|
||||
*/
|
||||
export function updateClusterWithError(
|
||||
cluster: ErrorClusterDoc,
|
||||
errorEvent: ErrorEvent,
|
||||
_fingerprint: FingerprintResult
|
||||
): ErrorClusterDoc {
|
||||
const now = new Date().toISOString();
|
||||
const isNewUser = !cluster.uniqueUsers || errorEvent.userId !== undefined; // Simplified - real impl would track user set
|
||||
|
||||
return {
|
||||
...cluster,
|
||||
lastSeenAt: now,
|
||||
occurrenceCount: cluster.occurrenceCount + 1,
|
||||
uniqueUsers: isNewUser ? cluster.uniqueUsers + 1 : cluster.uniqueUsers,
|
||||
status: cluster.status === 'resolved' ? 'active' : cluster.status,
|
||||
updatedAt: now,
|
||||
// Update common context aggregations
|
||||
commonContext: updateCommonContext(
|
||||
cluster.commonContext || {
|
||||
osVersions: [],
|
||||
appVersions: [],
|
||||
deviceModels: [],
|
||||
screenContexts: [],
|
||||
},
|
||||
errorEvent
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
function updateCommonContext(
|
||||
context: NonNullable<ErrorClusterDoc['commonContext']>,
|
||||
errorEvent: ErrorEvent
|
||||
): NonNullable<ErrorClusterDoc['commonContext']> {
|
||||
return {
|
||||
osVersions: incrementCount(context.osVersions, errorEvent.osVersion || 'unknown', 'version'),
|
||||
appVersions: incrementCount(context.appVersions, errorEvent.appVersion || 'unknown', 'version'),
|
||||
deviceModels: incrementCount(context.deviceModels, errorEvent.deviceModel || 'unknown', 'model'),
|
||||
screenContexts: incrementCount(context.screenContexts, errorEvent.screen || 'unknown', 'screen'),
|
||||
};
|
||||
}
|
||||
|
||||
function incrementCount<T extends { count: number }>(
|
||||
items: Array<T>,
|
||||
key: string,
|
||||
keyField: keyof T & string
|
||||
): Array<T> {
|
||||
const existing = items.find((item) => (item as unknown as Record<string, string>)[keyField] === key);
|
||||
if (existing) {
|
||||
return items.map((item) =>
|
||||
(item as unknown as Record<string, string>)[keyField] === key
|
||||
? ({ ...item, count: item.count + 1 } as T)
|
||||
: item
|
||||
);
|
||||
}
|
||||
return [...items, { [keyField]: key, count: 1 } as unknown as T];
|
||||
}
|
||||
@ -0,0 +1,932 @@
|
||||
/**
|
||||
* Feature extraction pipeline for churn prediction and health scoring
|
||||
* [1.1] Telemetry Feature Extraction
|
||||
*/
|
||||
|
||||
import type { TelemetryEventDoc } from '../telemetry/types.js';
|
||||
|
||||
// ============================================================================
|
||||
// Feature Definitions
|
||||
// ============================================================================
|
||||
|
||||
export interface UserBehaviorFeatures {
|
||||
// Recency features
|
||||
daysSinceLastSession: number;
|
||||
daysSinceLastCoreAction: number;
|
||||
hoursSinceLastLogin: number;
|
||||
|
||||
// Frequency features
|
||||
sessionsLast24Hours: number;
|
||||
sessionsLast7Days: number;
|
||||
sessionsLast30Days: number;
|
||||
avgSessionsPerWeek: number;
|
||||
avgSessionsPerDay: number;
|
||||
|
||||
// Session depth
|
||||
avgSessionDurationMinutes: number;
|
||||
totalSessionDurationMinutes: number;
|
||||
actionsPerSession: number;
|
||||
uniqueFeaturesUsed: number;
|
||||
|
||||
// Engagement trends
|
||||
sessionFrequencyTrend: number; // -1 to 1 (declining to increasing)
|
||||
engagementDepthTrend: number;
|
||||
}
|
||||
|
||||
export interface EngagementFeatures {
|
||||
// Feature usage diversity
|
||||
featureUsageDiversity: number; // 0-1 (normalized unique features / total features)
|
||||
coreActionCompletionRate: number;
|
||||
featureAdoptionVelocity: number; // new features tried per week
|
||||
|
||||
// Product-specific engagement
|
||||
powerUserScore: number; // 0-1 based on advanced feature usage
|
||||
onboardingCompletionRate: number;
|
||||
firstValueMomentAchieved: boolean;
|
||||
timeToFirstValueHours: number;
|
||||
}
|
||||
|
||||
export interface PerformanceFeatures {
|
||||
// Error/stability exposure
|
||||
errorRateLast7Days: number;
|
||||
errorRateLast30Days: number;
|
||||
crashCountLast7Days: number;
|
||||
crashCountLast30Days: number;
|
||||
|
||||
// Performance perception
|
||||
avgLatencyMs: number;
|
||||
slowRequestCount: number;
|
||||
timeoutCount: number;
|
||||
|
||||
// Recovery behavior
|
||||
errorRecoveryRate: number;
|
||||
supportTicketCount: number;
|
||||
}
|
||||
|
||||
export interface SocialFeatures {
|
||||
// Sharing/collaboration
|
||||
shareCount: number;
|
||||
inviteCount: number;
|
||||
collaborationScore: number;
|
||||
|
||||
// Network effects
|
||||
teamMemberCount: number;
|
||||
integrationsConnected: number;
|
||||
externalSharesLast30Days: number;
|
||||
}
|
||||
|
||||
export interface RevenueFeatures {
|
||||
// Payment history
|
||||
planTier: number; // 0=free, 1=pro, 2=enterprise
|
||||
lifetimeValue: number;
|
||||
mrrContribution: number;
|
||||
|
||||
// Plan changes
|
||||
upgradeCount: number;
|
||||
downgradeCount: number;
|
||||
daysSinceLastPayment: number;
|
||||
daysSincePlanChange: number;
|
||||
|
||||
// Support
|
||||
supportTicketCount: number;
|
||||
supportSatisfactionScore: number;
|
||||
escalatedTicketCount: number;
|
||||
}
|
||||
|
||||
export interface RollingWindowFeatures {
|
||||
// 7-day rolling averages
|
||||
rollingAvgSessions7d: number;
|
||||
rollingAvgDuration7d: number;
|
||||
rollingAvgActions7d: number;
|
||||
|
||||
// Week-over-week change (acceleration)
|
||||
wowSessionChange: number; // % change
|
||||
wowDurationChange: number;
|
||||
wowActionsChange: number;
|
||||
|
||||
// Cohort comparison (normalized vs similar users)
|
||||
cohortSessionPercentile: number; // 0-100
|
||||
cohortEngagementPercentile: number;
|
||||
cohortRetentionPercentile: number;
|
||||
}
|
||||
|
||||
export interface ProductSpecificFeatures {
|
||||
// NomGap
|
||||
fastCompletionRate?: number;
|
||||
protocolAdherenceScore?: number;
|
||||
streakLength?: number;
|
||||
autophagyEngagementScore?: number;
|
||||
|
||||
// JarvisJr
|
||||
agentDiversityScore?: number;
|
||||
voiceSessionRatio?: number;
|
||||
skillProgressionRate?: number;
|
||||
sessionCompletionRate?: number;
|
||||
|
||||
// ChronoMind
|
||||
timerCompletionRate?: number;
|
||||
cascadeEffectiveness?: number;
|
||||
routineAdherenceScore?: number;
|
||||
urgencyResponseRate?: number;
|
||||
|
||||
// MindLyst
|
||||
brainUsageDiversity?: number;
|
||||
triageAccuracyScore?: number;
|
||||
memoryCaptureFrequency?: number;
|
||||
reflectionCompletionRate?: number;
|
||||
|
||||
// PeakPulse
|
||||
activitySessionFrequency?: number;
|
||||
goalCompletionRate?: number;
|
||||
streakMaintenanceScore?: number;
|
||||
socialSharingCount?: number;
|
||||
|
||||
// LysnrAI
|
||||
dictationFrequency?: number;
|
||||
accuracyRate?: number;
|
||||
hotkeyUsageRate?: number;
|
||||
vocabularyGrowthRate?: number;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Time Window Aggregations
|
||||
// ============================================================================
|
||||
|
||||
export interface TimeWindowFeatures {
|
||||
// Last 24 hours (recent behavior)
|
||||
recent: {
|
||||
sessionCount: number;
|
||||
totalDuration: number;
|
||||
actionCount: number;
|
||||
errorCount: number;
|
||||
uniqueFeatures: string[];
|
||||
};
|
||||
|
||||
// Last 7 days (weekly patterns)
|
||||
weekly: {
|
||||
sessionCount: number;
|
||||
totalDuration: number;
|
||||
actionCount: number;
|
||||
errorCount: number;
|
||||
uniqueFeatures: string[];
|
||||
daysActive: number;
|
||||
};
|
||||
|
||||
// Last 30 days (monthly trends)
|
||||
monthly: {
|
||||
sessionCount: number;
|
||||
totalDuration: number;
|
||||
actionCount: number;
|
||||
errorCount: number;
|
||||
uniqueFeatures: string[];
|
||||
daysActive: number;
|
||||
};
|
||||
|
||||
// Life-to-date (all-time totals)
|
||||
lifetime: {
|
||||
totalSessions: number;
|
||||
totalDuration: number;
|
||||
totalActions: number;
|
||||
totalErrors: number;
|
||||
allFeaturesUsed: string[];
|
||||
accountAgeDays: number;
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Complete Feature Vector
|
||||
// ============================================================================
|
||||
|
||||
export interface CompleteFeatureVector {
|
||||
userId: string;
|
||||
productId: string;
|
||||
computedAt: Date;
|
||||
observationWindow: {
|
||||
start: Date;
|
||||
end: Date;
|
||||
};
|
||||
|
||||
behavior: UserBehaviorFeatures;
|
||||
engagement: EngagementFeatures;
|
||||
performance: PerformanceFeatures;
|
||||
social: SocialFeatures;
|
||||
revenue: RevenueFeatures;
|
||||
rolling: RollingWindowFeatures;
|
||||
productSpecific: ProductSpecificFeatures;
|
||||
timeWindows: TimeWindowFeatures;
|
||||
|
||||
// Metadata
|
||||
featureSchemaVersion: string;
|
||||
dataQualityScore: number; // 0-1 based on completeness
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Feature Extraction Functions
|
||||
// ============================================================================
|
||||
|
||||
const SCHEMA_VERSION = '1.0.0';
|
||||
|
||||
export function extractFeaturesFromTelemetry(
|
||||
userId: string,
|
||||
productId: string,
|
||||
events: TelemetryEventDoc[],
|
||||
referenceDate: Date = new Date()
|
||||
): CompleteFeatureVector {
|
||||
const observationStart = new Date(referenceDate);
|
||||
observationStart.setDate(observationStart.getDate() - 30);
|
||||
|
||||
// Filter events to observation window
|
||||
const windowedEvents = events.filter(
|
||||
(e) => new Date(e.timestamp) >= observationStart && new Date(e.timestamp) <= referenceDate
|
||||
);
|
||||
|
||||
// Extract time windows
|
||||
const timeWindows = extractTimeWindows(windowedEvents, referenceDate);
|
||||
|
||||
// Extract behavior features
|
||||
const behavior = extractBehaviorFeatures(windowedEvents, timeWindows, referenceDate);
|
||||
|
||||
// Extract engagement features
|
||||
const engagement = extractEngagementFeatures(windowedEvents, timeWindows);
|
||||
|
||||
// Extract performance features
|
||||
const performance = extractPerformanceFeatures(windowedEvents, timeWindows);
|
||||
|
||||
// Extract social features
|
||||
const social = extractSocialFeatures(windowedEvents);
|
||||
|
||||
// Extract revenue features (from events or external data)
|
||||
const revenue = extractRevenueFeatures(windowedEvents);
|
||||
|
||||
// Extract rolling window features
|
||||
const rolling = extractRollingWindowFeatures(timeWindows);
|
||||
|
||||
// Extract product-specific features
|
||||
const productSpecific = extractProductSpecificFeatures(windowedEvents, productId);
|
||||
|
||||
// Calculate data quality score
|
||||
const dataQualityScore = calculateDataQualityScore(behavior, engagement, performance);
|
||||
|
||||
return {
|
||||
userId,
|
||||
productId,
|
||||
computedAt: referenceDate,
|
||||
observationWindow: {
|
||||
start: observationStart,
|
||||
end: referenceDate,
|
||||
},
|
||||
behavior,
|
||||
engagement,
|
||||
performance,
|
||||
social,
|
||||
revenue,
|
||||
rolling,
|
||||
productSpecific,
|
||||
timeWindows,
|
||||
featureSchemaVersion: SCHEMA_VERSION,
|
||||
dataQualityScore,
|
||||
};
|
||||
}
|
||||
|
||||
function extractTimeWindows(
|
||||
events: TelemetryEventDoc[],
|
||||
referenceDate: Date
|
||||
): TimeWindowFeatures {
|
||||
const oneDayAgo = new Date(referenceDate.getTime() - 24 * 60 * 60 * 1000);
|
||||
const sevenDaysAgo = new Date(referenceDate.getTime() - 7 * 24 * 60 * 60 * 1000);
|
||||
const thirtyDaysAgo = new Date(referenceDate.getTime() - 30 * 24 * 60 * 60 * 1000);
|
||||
|
||||
const recentEvents = events.filter((e) => new Date(e.timestamp) >= oneDayAgo);
|
||||
const weeklyEvents = events.filter((e) => new Date(e.timestamp) >= sevenDaysAgo);
|
||||
const monthlyEvents = events.filter((e) => new Date(e.timestamp) >= thirtyDaysAgo);
|
||||
|
||||
return {
|
||||
recent: aggregateEvents(recentEvents),
|
||||
weekly: aggregateEvents(weeklyEvents, true),
|
||||
monthly: aggregateEvents(monthlyEvents, true),
|
||||
lifetime: {
|
||||
totalSessions: countSessions(events),
|
||||
totalDuration: sumDurations(events),
|
||||
totalActions: countActions(events),
|
||||
totalErrors: countErrors(events),
|
||||
allFeaturesUsed: extractUniqueFeatures(events),
|
||||
accountAgeDays: 30, // Default, should be passed as param
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function aggregateEvents(
|
||||
events: TelemetryEventDoc[],
|
||||
trackDaysActive = false
|
||||
): {
|
||||
sessionCount: number;
|
||||
totalDuration: number;
|
||||
actionCount: number;
|
||||
errorCount: number;
|
||||
uniqueFeatures: string[];
|
||||
daysActive?: number;
|
||||
} {
|
||||
const sessions = new Set<string>();
|
||||
const features = new Set<string>();
|
||||
const activeDays = new Set<string>();
|
||||
let totalDuration = 0;
|
||||
let actionCount = 0;
|
||||
let errorCount = 0;
|
||||
|
||||
for (const event of events) {
|
||||
if (event.sessionId) sessions.add(event.sessionId);
|
||||
if (event.feature) features.add(event.feature);
|
||||
if (trackDaysActive) {
|
||||
const day = event.timestamp.split('T')[0];
|
||||
activeDays.add(day);
|
||||
}
|
||||
|
||||
if (event.eventType === 'action') actionCount++;
|
||||
if (event.eventType === 'error') errorCount++;
|
||||
if (event.duration) totalDuration += event.duration;
|
||||
}
|
||||
|
||||
return {
|
||||
sessionCount: sessions.size,
|
||||
totalDuration,
|
||||
actionCount,
|
||||
errorCount,
|
||||
uniqueFeatures: Array.from(features),
|
||||
daysActive: trackDaysActive ? activeDays.size : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
function extractBehaviorFeatures(
|
||||
events: TelemetryEventDoc[],
|
||||
timeWindows: TimeWindowFeatures,
|
||||
referenceDate: Date
|
||||
): UserBehaviorFeatures {
|
||||
const lastSession = findLastSession(events);
|
||||
const lastCoreAction = findLastCoreAction(events);
|
||||
|
||||
const daysSinceLastSession = lastSession
|
||||
? daysBetween(lastSession.timestamp, referenceDate)
|
||||
: 30;
|
||||
const daysSinceLastCoreAction = lastCoreAction
|
||||
? daysBetween(lastCoreAction.timestamp, referenceDate)
|
||||
: 30;
|
||||
|
||||
const monthly = timeWindows.monthly;
|
||||
const weekly = timeWindows.weekly;
|
||||
|
||||
// Calculate averages
|
||||
const avgSessionsPerWeek = monthly.daysActive
|
||||
? monthly.sessionCount / (monthly.daysActive / 7)
|
||||
: 0;
|
||||
const avgSessionsPerDay = monthly.daysActive
|
||||
? monthly.sessionCount / monthly.daysActive
|
||||
: 0;
|
||||
|
||||
const avgSessionDurationMinutes = monthly.sessionCount
|
||||
? monthly.totalDuration / monthly.sessionCount / 60
|
||||
: 0;
|
||||
|
||||
const actionsPerSession = monthly.sessionCount
|
||||
? monthly.actionCount / monthly.sessionCount
|
||||
: 0;
|
||||
|
||||
// Calculate trends
|
||||
const sessionFrequencyTrend = calculateTrend(weekly.sessionCount, monthly.sessionCount / 4);
|
||||
const engagementDepthTrend = calculateTrend(
|
||||
weekly.totalDuration / Math.max(weekly.sessionCount, 1),
|
||||
monthly.totalDuration / Math.max(monthly.sessionCount, 4)
|
||||
);
|
||||
|
||||
return {
|
||||
daysSinceLastSession,
|
||||
daysSinceLastCoreAction,
|
||||
hoursSinceLastLogin: daysSinceLastSession * 24,
|
||||
sessionsLast24Hours: timeWindows.recent.sessionCount,
|
||||
sessionsLast7Days: weekly.sessionCount,
|
||||
sessionsLast30Days: monthly.sessionCount,
|
||||
avgSessionsPerWeek,
|
||||
avgSessionsPerDay,
|
||||
avgSessionDurationMinutes,
|
||||
totalSessionDurationMinutes: monthly.totalDuration / 60,
|
||||
actionsPerSession,
|
||||
uniqueFeaturesUsed: monthly.uniqueFeatures.length,
|
||||
sessionFrequencyTrend,
|
||||
engagementDepthTrend,
|
||||
};
|
||||
}
|
||||
|
||||
function extractEngagementFeatures(
|
||||
events: TelemetryEventDoc[],
|
||||
timeWindows: TimeWindowFeatures
|
||||
): EngagementFeatures {
|
||||
const monthly = timeWindows.monthly;
|
||||
const allFeatures = extractUniqueFeatures(events);
|
||||
const totalPossibleFeatures = 20; // Configurable based on product
|
||||
|
||||
const featureUsageDiversity = Math.min(allFeatures.length / totalPossibleFeatures, 1);
|
||||
|
||||
// Calculate core action completion (specific events indicate core actions)
|
||||
const coreActionEvents = events.filter((e) => e.eventName?.includes('core_action'));
|
||||
const coreActionCompletionRate = monthly.actionCount
|
||||
? coreActionEvents.length / monthly.actionCount
|
||||
: 0;
|
||||
|
||||
// Power user score based on advanced features
|
||||
const advancedFeatures = allFeatures.filter((f) =>
|
||||
['export', 'integration', 'automation', 'advanced'].some((a) => f.includes(a))
|
||||
);
|
||||
const powerUserScore = Math.min(advancedFeatures.length / 3, 1);
|
||||
|
||||
return {
|
||||
featureUsageDiversity,
|
||||
coreActionCompletionRate,
|
||||
featureAdoptionVelocity: monthly.uniqueFeatures.length / 4, // per week
|
||||
powerUserScore,
|
||||
onboardingCompletionRate: calculateOnboardingCompletion(events),
|
||||
firstValueMomentAchieved: hasFirstValueMoment(events),
|
||||
timeToFirstValueHours: calculateTimeToFirstValue(events),
|
||||
};
|
||||
}
|
||||
|
||||
function extractPerformanceFeatures(
|
||||
events: TelemetryEventDoc[],
|
||||
timeWindows: TimeWindowFeatures
|
||||
): PerformanceFeatures {
|
||||
const monthly = timeWindows.monthly;
|
||||
const weekly = timeWindows.weekly;
|
||||
|
||||
const monthlyErrors = countErrors(
|
||||
events.filter((e) => new Date(e.timestamp) >= new Date(Date.now() - 30 * 24 * 60 * 60 * 1000))
|
||||
);
|
||||
const weeklyErrors = weekly.errorCount;
|
||||
|
||||
const errorRateLast30Days = monthly.actionCount
|
||||
? monthlyErrors / monthly.actionCount
|
||||
: 0;
|
||||
const errorRateLast7Days = weekly.actionCount
|
||||
? weeklyErrors / weekly.actionCount
|
||||
: 0;
|
||||
|
||||
// Extract latency from events
|
||||
const latencyEvents = events.filter((e) => e.duration && e.duration < 30000); // Filter outliers
|
||||
const avgLatencyMs = latencyEvents.length
|
||||
? latencyEvents.reduce((sum, e) => sum + (e.duration || 0), 0) / latencyEvents.length
|
||||
: 0;
|
||||
|
||||
return {
|
||||
errorRateLast7Days,
|
||||
errorRateLast30Days,
|
||||
crashCountLast7Days: countCrashes(weeklyEvents(events)),
|
||||
crashCountLast30Days: countCrashes(monthlyEvents(events)),
|
||||
avgLatencyMs,
|
||||
slowRequestCount: countSlowRequests(events),
|
||||
timeoutCount: countTimeouts(events),
|
||||
errorRecoveryRate: calculateErrorRecoveryRate(events),
|
||||
supportTicketCount: countSupportTickets(events),
|
||||
};
|
||||
}
|
||||
|
||||
function extractSocialFeatures(events: TelemetryEventDoc[]): SocialFeatures {
|
||||
const shareEvents = events.filter((e) => e.eventName?.includes('share'));
|
||||
const inviteEvents = events.filter((e) => e.eventName?.includes('invite'));
|
||||
const integrationEvents = events.filter((e) => e.eventName?.includes('integration'));
|
||||
|
||||
return {
|
||||
shareCount: shareEvents.length,
|
||||
inviteCount: inviteEvents.length,
|
||||
collaborationScore: calculateCollaborationScore(events),
|
||||
teamMemberCount: extractTeamMemberCount(events),
|
||||
integrationsConnected: integrationEvents.length,
|
||||
externalSharesLast30Days: shareEvents.filter((e) => e.properties?.external === true).length,
|
||||
};
|
||||
}
|
||||
|
||||
function extractRevenueFeatures(events: TelemetryEventDoc[]): RevenueFeatures {
|
||||
const planChangeEvents = events.filter(
|
||||
(e) => e.eventName?.includes('plan') || e.eventName?.includes('subscription')
|
||||
);
|
||||
const supportEvents = events.filter((e) => e.eventName?.includes('support'));
|
||||
|
||||
const upgrades = planChangeEvents.filter((e) => e.eventName?.includes('upgrade')).length;
|
||||
const downgrades = planChangeEvents.filter((e) => e.eventName?.includes('downgrade')).length;
|
||||
|
||||
return {
|
||||
planTier: extractPlanTier(events),
|
||||
lifetimeValue: extractLifetimeValue(events),
|
||||
mrrContribution: extractMrrContribution(events),
|
||||
upgradeCount: upgrades,
|
||||
downgradeCount: downgrades,
|
||||
daysSinceLastPayment: extractDaysSincePayment(events),
|
||||
daysSincePlanChange: extractDaysSincePlanChange(events),
|
||||
supportTicketCount: supportEvents.length,
|
||||
supportSatisfactionScore: calculateSupportSatisfaction(supportEvents),
|
||||
escalatedTicketCount: supportEvents.filter((e) => e.properties?.escalated).length,
|
||||
};
|
||||
}
|
||||
|
||||
function extractRollingWindowFeatures(timeWindows: TimeWindowFeatures): RollingWindowFeatures {
|
||||
const monthly = timeWindows.monthly;
|
||||
const weekly = timeWindows.weekly;
|
||||
|
||||
// 7-day rolling averages
|
||||
const rollingAvgSessions7d = weekly.sessionCount / 7;
|
||||
const rollingAvgDuration7d = weekly.sessionCount
|
||||
? weekly.totalDuration / weekly.sessionCount / 60
|
||||
: 0;
|
||||
const rollingAvgActions7d = weekly.sessionCount ? weekly.actionCount / weekly.sessionCount : 0;
|
||||
|
||||
// Week-over-week change (comparing current week to average week in month)
|
||||
const avgWeekInMonth = monthly.sessionCount / 4;
|
||||
const wowSessionChange = avgWeekInMonth ? (weekly.sessionCount - avgWeekInMonth) / avgWeekInMonth : 0;
|
||||
|
||||
const avgDurationWeekInMonth = monthly.sessionCount
|
||||
? monthly.totalDuration / monthly.sessionCount / 60 / 4
|
||||
: 0;
|
||||
const wowDurationChange = avgDurationWeekInMonth
|
||||
? (rollingAvgDuration7d - avgDurationWeekInMonth) / avgDurationWeekInMonth
|
||||
: 0;
|
||||
|
||||
// Cohort percentiles (would require cohort data - using estimates)
|
||||
const cohortSessionPercentile = estimateCohortPercentile(rollingAvgSessions7d, 'sessions');
|
||||
const cohortEngagementPercentile = estimateCohortPercentile(
|
||||
timeWindows.monthly.uniqueFeatures.length,
|
||||
'features'
|
||||
);
|
||||
const cohortRetentionPercentile = estimateCohortPercentile(
|
||||
monthly.daysActive || 0,
|
||||
'retention'
|
||||
);
|
||||
|
||||
return {
|
||||
rollingAvgSessions7d,
|
||||
rollingAvgDuration7d,
|
||||
rollingAvgActions7d,
|
||||
wowSessionChange,
|
||||
wowDurationChange,
|
||||
wowActionsChange: wowSessionChange, // Correlated with session change
|
||||
cohortSessionPercentile,
|
||||
cohortEngagementPercentile,
|
||||
cohortRetentionPercentile,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Product-Specific Feature Extraction
|
||||
// ============================================================================
|
||||
|
||||
export function extractProductSpecificFeatures(
|
||||
events: TelemetryEventDoc[],
|
||||
productId: string
|
||||
): ProductSpecificFeatures {
|
||||
switch (productId) {
|
||||
case 'nomgap':
|
||||
return extractNomGapFeatures(events);
|
||||
case 'jarvisjr':
|
||||
return extractJarvisJrFeatures(events);
|
||||
case 'chronomind':
|
||||
return extractChronoMindFeatures(events);
|
||||
case 'mindlyst':
|
||||
return extractMindLystFeatures(events);
|
||||
case 'peakpulse':
|
||||
return extractPeakPulseFeatures(events);
|
||||
case 'lysnrai':
|
||||
return extractLysnrAIFeatures(events);
|
||||
default:
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
function extractNomGapFeatures(events: TelemetryEventDoc[]): ProductSpecificFeatures {
|
||||
const fastEvents = events.filter((e) => e.feature === 'fasting');
|
||||
const completedFasts = fastEvents.filter((e) => e.eventName === 'fast_completed');
|
||||
const totalFasts = fastEvents.filter((e) => e.eventName === 'fast_started').length;
|
||||
|
||||
const protocolEvents = events.filter((e) => e.feature === 'protocol');
|
||||
const adheredProtocols = protocolEvents.filter((e) => e.properties?.adhered).length;
|
||||
|
||||
const streakEvents = events.filter((e) => e.eventName?.includes('streak'));
|
||||
const currentStreak = Math.max(...streakEvents.map((e) => e.properties?.streakLength || 0), 0);
|
||||
|
||||
return {
|
||||
fastCompletionRate: totalFasts ? completedFasts.length / totalFasts : 0,
|
||||
protocolAdherenceScore: protocolEvents.length ? adheredProtocols / protocolEvents.length : 0,
|
||||
streakLength: currentStreak,
|
||||
autophagyEngagementScore: calculateAutophagyEngagement(events),
|
||||
};
|
||||
}
|
||||
|
||||
function extractJarvisJrFeatures(events: TelemetryEventDoc[]): ProductSpecificFeatures {
|
||||
const agentEvents = events.filter((e) => e.feature === 'agent');
|
||||
const uniqueAgents = new Set(agentEvents.map((e) => e.properties?.agentId)).size;
|
||||
|
||||
const voiceEvents = events.filter((e) => e.properties?.mode === 'voice');
|
||||
const textEvents = events.filter((e) => e.properties?.mode === 'text');
|
||||
const totalSessions = voiceEvents.length + textEvents.length;
|
||||
|
||||
const skillEvents = events.filter((e) => e.eventName?.includes('skill'));
|
||||
const skillProgression = calculateSkillProgression(skillEvents);
|
||||
|
||||
return {
|
||||
agentDiversityScore: Math.min(uniqueAgents / 3, 1),
|
||||
voiceSessionRatio: totalSessions ? voiceEvents.length / totalSessions : 0,
|
||||
skillProgressionRate: skillProgression,
|
||||
sessionCompletionRate: calculateSessionCompletionRate(events),
|
||||
};
|
||||
}
|
||||
|
||||
function extractChronoMindFeatures(events: TelemetryEventDoc[]): ProductSpecificFeatures {
|
||||
const timerEvents = events.filter((e) => e.feature === 'timer');
|
||||
const completedTimers = timerEvents.filter((e) => e.eventName === 'timer_completed').length;
|
||||
const totalTimers = timerEvents.filter((e) => e.eventName === 'timer_started').length;
|
||||
|
||||
const cascadeEvents = events.filter((e) => e.feature === 'cascade');
|
||||
const acknowledgedCascades = cascadeEvents.filter((e) => e.properties?.acknowledged).length;
|
||||
|
||||
const routineEvents = events.filter((e) => e.feature === 'routine');
|
||||
const completedRoutines = routineEvents.filter((e) => e.eventName === 'routine_completed').length;
|
||||
|
||||
return {
|
||||
timerCompletionRate: totalTimers ? completedTimers / totalTimers : 0,
|
||||
cascadeEffectiveness: cascadeEvents.length ? acknowledgedCascades / cascadeEvents.length : 0,
|
||||
routineAdherenceScore: calculateRoutineAdherence(routineEvents),
|
||||
urgencyResponseRate: calculateUrgencyResponse(events),
|
||||
};
|
||||
}
|
||||
|
||||
function extractMindLystFeatures(events: TelemetryEventDoc[]): ProductSpecificFeatures {
|
||||
const brainEvents = events.filter((e) => e.feature === 'brain');
|
||||
const uniqueBrains = new Set(brainEvents.map((e) => e.properties?.brainId)).size;
|
||||
|
||||
const triageEvents = events.filter((e) => e.eventName?.includes('triage'));
|
||||
const accurateTriages = triageEvents.filter((e) => e.properties?.accurate).length;
|
||||
|
||||
const memoryEvents = events.filter((e) => e.eventName?.includes('memory_capture'));
|
||||
const reflectionEvents = events.filter((e) => e.eventName?.includes('reflection'));
|
||||
const completedReflections = reflectionEvents.filter((e) => e.properties?.completed).length;
|
||||
|
||||
return {
|
||||
brainUsageDiversity: Math.min(uniqueBrains / 3, 1),
|
||||
triageAccuracyScore: triageEvents.length ? accurateTriages / triageEvents.length : 0,
|
||||
memoryCaptureFrequency: memoryEvents.length / 30, // per day
|
||||
reflectionCompletionRate: reflectionEvents.length
|
||||
? completedReflections / reflectionEvents.length
|
||||
: 0,
|
||||
};
|
||||
}
|
||||
|
||||
function extractPeakPulseFeatures(events: TelemetryEventDoc[]): ProductSpecificFeatures {
|
||||
const sessionEvents = events.filter((e) => e.feature === 'activity_session');
|
||||
const goalEvents = events.filter((e) => e.feature === 'goal');
|
||||
const completedGoals = goalEvents.filter((e) => e.properties?.completed).length;
|
||||
|
||||
const streakEvents = events.filter((e) => e.eventName?.includes('streak'));
|
||||
const currentStreak = Math.max(...streakEvents.map((e) => e.properties?.streakLength || 0), 0);
|
||||
|
||||
const shareEvents = events.filter((e) => e.eventName?.includes('share'));
|
||||
|
||||
return {
|
||||
activitySessionFrequency: sessionEvents.length / 30, // per day
|
||||
goalCompletionRate: goalEvents.length ? completedGoals / goalEvents.length : 0,
|
||||
streakMaintenanceScore: Math.min(currentStreak / 7, 1),
|
||||
socialSharingCount: shareEvents.length,
|
||||
};
|
||||
}
|
||||
|
||||
function extractLysnrAIFeatures(events: TelemetryEventDoc[]): ProductSpecificFeatures {
|
||||
const dictationEvents = events.filter((e) => e.feature === 'dictation');
|
||||
const completedDictations = dictationEvents.filter(
|
||||
(e) => e.eventName === 'dictation_completed'
|
||||
).length;
|
||||
|
||||
const accuracyEvents = dictationEvents.filter((e) => e.properties?.accuracy !== undefined);
|
||||
const avgAccuracy = accuracyEvents.length
|
||||
? accuracyEvents.reduce((sum, e) => sum + (e.properties?.accuracy || 0), 0) /
|
||||
accuracyEvents.length
|
||||
: 0;
|
||||
|
||||
const hotkeyEvents = events.filter((e) => e.eventName?.includes('hotkey'));
|
||||
const vocabularyEvents = events.filter((e) => e.eventName?.includes('vocabulary'));
|
||||
|
||||
return {
|
||||
dictationFrequency: dictationEvents.length / 30, // per day
|
||||
accuracyRate: avgAccuracy,
|
||||
hotkeyUsageRate: hotkeyEvents.length / Math.max(dictationEvents.length, 1),
|
||||
vocabularyGrowthRate: calculateVocabularyGrowth(vocabularyEvents),
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Helper Functions
|
||||
// ============================================================================
|
||||
|
||||
function findLastSession(events: TelemetryEventDoc[]): TelemetryEventDoc | undefined {
|
||||
return events
|
||||
.filter((e) => e.eventType === 'session_start' || e.sessionId)
|
||||
.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())[0];
|
||||
}
|
||||
|
||||
function findLastCoreAction(events: TelemetryEventDoc[]): TelemetryEventDoc | undefined {
|
||||
return events
|
||||
.filter((e) => e.properties?.isCoreAction === true || e.eventName?.includes('core'))
|
||||
.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())[0];
|
||||
}
|
||||
|
||||
function countSessions(events: TelemetryEventDoc[]): number {
|
||||
return new Set(events.map((e) => e.sessionId).filter(Boolean)).size;
|
||||
}
|
||||
|
||||
function sumDurations(events: TelemetryEventDoc[]): number {
|
||||
return events.reduce((sum, e) => sum + (e.duration || 0), 0);
|
||||
}
|
||||
|
||||
function countActions(events: TelemetryEventDoc[]): number {
|
||||
return events.filter((e) => e.eventType === 'action').length;
|
||||
}
|
||||
|
||||
function countErrors(events: TelemetryEventDoc[]): number {
|
||||
return events.filter((e) => e.eventType === 'error' || e.eventName?.includes('error')).length;
|
||||
}
|
||||
|
||||
function extractUniqueFeatures(events: TelemetryEventDoc[]): string[] {
|
||||
return Array.from(new Set(events.map((e) => e.feature).filter(Boolean) as string[]);
|
||||
}
|
||||
|
||||
function daysBetween(timestamp: string, reference: Date): number {
|
||||
const diff = reference.getTime() - new Date(timestamp).getTime();
|
||||
return Math.floor(diff / (1000 * 60 * 60 * 24));
|
||||
}
|
||||
|
||||
function calculateTrend(current: number, baseline: number): number {
|
||||
if (baseline === 0) return current > 0 ? 1 : 0;
|
||||
return Math.max(-1, Math.min(1, (current - baseline) / baseline));
|
||||
}
|
||||
|
||||
function calculateDataQualityScore(
|
||||
behavior: UserBehaviorFeatures,
|
||||
engagement: EngagementFeatures,
|
||||
performance: PerformanceFeatures
|
||||
): number {
|
||||
let score = 0;
|
||||
let factors = 0;
|
||||
|
||||
if (behavior.sessionsLast30Days > 0) {
|
||||
score += Math.min(behavior.sessionsLast30Days / 10, 1);
|
||||
factors++;
|
||||
}
|
||||
if (engagement.uniqueFeaturesUsed > 0) {
|
||||
score += Math.min(engagement.uniqueFeaturesUsed / 5, 1);
|
||||
factors++;
|
||||
}
|
||||
if (performance.errorRateLast30Days >= 0) {
|
||||
score += 1 - performance.errorRateLast30Days;
|
||||
factors++;
|
||||
}
|
||||
|
||||
return factors > 0 ? score / factors : 0;
|
||||
}
|
||||
|
||||
// Placeholder implementations for product-specific helpers
|
||||
function calculateAutophagyEngagement(events: TelemetryEventDoc[]): number {
|
||||
const autophagyEvents = events.filter((e) => e.properties?.stage === 'autophagy');
|
||||
return Math.min(autophagyEvents.length / 10, 1);
|
||||
}
|
||||
|
||||
function calculateSkillProgression(events: TelemetryEventDoc[]): number {
|
||||
if (events.length === 0) return 0;
|
||||
const progressed = events.filter((e) => e.properties?.progressed).length;
|
||||
return progressed / events.length;
|
||||
}
|
||||
|
||||
function calculateSessionCompletionRate(events: TelemetryEventDoc[]): number {
|
||||
const started = events.filter((e) => e.eventName?.includes('started')).length;
|
||||
const completed = events.filter((e) => e.eventName?.includes('completed')).length;
|
||||
return started ? completed / started : 0;
|
||||
}
|
||||
|
||||
function calculateRoutineAdherence(events: TelemetryEventDoc[]): number {
|
||||
if (events.length === 0) return 0;
|
||||
const onTime = events.filter((e) => e.properties?.onTime).length;
|
||||
return onTime / events.length;
|
||||
}
|
||||
|
||||
function calculateUrgencyResponse(events: TelemetryEventDoc[]): number {
|
||||
const urgent = events.filter((e) => e.properties?.urgent === true);
|
||||
if (urgent.length === 0) return 0;
|
||||
const responded = urgent.filter((e) => e.properties?.responded).length;
|
||||
return responded / urgent.length;
|
||||
}
|
||||
|
||||
function calculateVocabularyGrowth(events: TelemetryEventDoc[]): number {
|
||||
const wordsAdded = events.reduce((sum, e) => sum + (e.properties?.wordsAdded || 0), 0);
|
||||
return wordsAdded / 30; // per day
|
||||
}
|
||||
|
||||
function calculateOnboardingCompletion(events: TelemetryEventDoc[]): number {
|
||||
const onboardingSteps = events.filter((e) => e.eventName?.includes('onboarding'));
|
||||
const completed = onboardingSteps.filter((e) => e.properties?.completed).length;
|
||||
const totalSteps = 5; // Configurable
|
||||
return Math.min(completed / totalSteps, 1);
|
||||
}
|
||||
|
||||
function hasFirstValueMoment(events: TelemetryEventDoc[]): boolean {
|
||||
return events.some((e) => e.eventName?.includes('first_value') || e.properties?.ahaMoment);
|
||||
}
|
||||
|
||||
function calculateTimeToFirstValue(events: TelemetryEventDoc[]): number {
|
||||
const firstSession = events.find((e) => e.eventType === 'session_start');
|
||||
const firstValue = events.find((e) => e.eventName?.includes('first_value'));
|
||||
if (!firstSession || !firstValue) return 0;
|
||||
return (
|
||||
(new Date(firstValue.timestamp).getTime() - new Date(firstSession.timestamp).getTime()) /
|
||||
(1000 * 60 * 60)
|
||||
);
|
||||
}
|
||||
|
||||
function countCrashes(events: TelemetryEventDoc[]): number {
|
||||
return events.filter((e) => e.eventName?.includes('crash') || e.properties?.crash).length;
|
||||
}
|
||||
|
||||
function countSlowRequests(events: TelemetryEventDoc[]): number {
|
||||
return events.filter((e) => e.duration && e.duration > 5000).length;
|
||||
}
|
||||
|
||||
function countTimeouts(events: TelemetryEventDoc[]): number {
|
||||
return events.filter((e) => e.properties?.timeout || e.eventName?.includes('timeout')).length;
|
||||
}
|
||||
|
||||
function calculateErrorRecoveryRate(events: TelemetryEventDoc[]): number {
|
||||
const errors = events.filter((e) => e.eventType === 'error');
|
||||
if (errors.length === 0) return 1;
|
||||
const recovered = errors.filter((e) => e.properties?.recovered).length;
|
||||
return recovered / errors.length;
|
||||
}
|
||||
|
||||
function countSupportTickets(events: TelemetryEventDoc[]): number {
|
||||
return events.filter((e) => e.eventName?.includes('support_ticket')).length;
|
||||
}
|
||||
|
||||
function calculateCollaborationScore(events: TelemetryEventDoc[]): number {
|
||||
const collabEvents = events.filter((e) => e.properties?.collaborative === true);
|
||||
return Math.min(collabEvents.length / 10, 1);
|
||||
}
|
||||
|
||||
function extractTeamMemberCount(events: TelemetryEventDoc[]): number {
|
||||
const teamEvents = events.filter((e) => e.properties?.teamSize !== undefined);
|
||||
return teamEvents.length > 0 ? Math.max(...teamEvents.map((e) => e.properties?.teamSize || 0)) : 0;
|
||||
}
|
||||
|
||||
function extractPlanTier(events: TelemetryEventDoc[]): number {
|
||||
const planEvent = events.find((e) => e.properties?.planTier !== undefined);
|
||||
return planEvent?.properties?.planTier || 0;
|
||||
}
|
||||
|
||||
function extractLifetimeValue(events: TelemetryEventDoc[]): number {
|
||||
return events.reduce((sum, e) => sum + (e.properties?.revenue || 0), 0);
|
||||
}
|
||||
|
||||
function extractMrrContribution(events: TelemetryEventDoc[]): number {
|
||||
const mrrEvent = events.find((e) => e.properties?.mrr !== undefined);
|
||||
return mrrEvent?.properties?.mrr || 0;
|
||||
}
|
||||
|
||||
function extractDaysSincePayment(events: TelemetryEventDoc[]): number {
|
||||
const paymentEvent = events
|
||||
.filter((e) => e.eventName?.includes('payment'))
|
||||
.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())[0];
|
||||
return paymentEvent ? daysBetween(paymentEvent.timestamp, new Date()) : 30;
|
||||
}
|
||||
|
||||
function extractDaysSincePlanChange(events: TelemetryEventDoc[]): number {
|
||||
const planChange = events
|
||||
.filter((e) => e.eventName?.includes('plan_change'))
|
||||
.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime())[0];
|
||||
return planChange ? daysBetween(planChange.timestamp, new Date()) : 90;
|
||||
}
|
||||
|
||||
function calculateSupportSatisfaction(events: TelemetryEventDoc[]): number {
|
||||
const ratedEvents = events.filter((e) => e.properties?.satisfaction !== undefined);
|
||||
if (ratedEvents.length === 0) return 0;
|
||||
const sum = ratedEvents.reduce((acc, e) => acc + (e.properties?.satisfaction || 0), 0);
|
||||
return sum / ratedEvents.length;
|
||||
}
|
||||
|
||||
function estimateCohortPercentile(value: number, metric: string): number {
|
||||
// Simplified estimation - in production, this would query cohort data
|
||||
const baselines: Record<string, number> = {
|
||||
sessions: 2,
|
||||
features: 5,
|
||||
retention: 15,
|
||||
};
|
||||
const baseline = baselines[metric] || 1;
|
||||
return Math.min(Math.round((value / baseline) * 50), 100);
|
||||
}
|
||||
|
||||
function weeklyEvents(events: TelemetryEventDoc[]): TelemetryEventDoc[] {
|
||||
const weekAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
|
||||
return events.filter((e) => new Date(e.timestamp) >= weekAgo);
|
||||
}
|
||||
|
||||
function monthlyEvents(events: TelemetryEventDoc[]): TelemetryEventDoc[] {
|
||||
const monthAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000);
|
||||
return events.filter((e) => new Date(e.timestamp) >= monthAgo);
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user