feat(ai-diagnostics): add HDBSCAN clustering algorithm [1.2.3]
This commit is contained in:
parent
783067e17d
commit
8951ab2c92
@ -503,19 +503,19 @@ interface ExperimentEventDoc {
|
||||
| ----- | ----------------------------- | ------ | ------ |
|
||||
| 1.1 | Experiment types & schemas | ✅ | a9b2247 |
|
||||
| 1.1 | Cosmos containers | ✅ | a9b2247 |
|
||||
| 1.2 | Deterministic bucketing | ⬜ | — |
|
||||
| 1.2 | Assignment strategies | ⬜ | — |
|
||||
| 1.2 | Audience targeting | ⬜ | — |
|
||||
| 1.3 | Metric definitions | ⬜ | — |
|
||||
| 1.3 | Event ingestion | ⬜ | — |
|
||||
| 2.1 | Bayesian inference engine | ⬜ | — |
|
||||
| 2.1 | Probability calculations | ⬜ | — |
|
||||
| 2.1 | Credible intervals | ⬜ | — |
|
||||
| 2.2 | Early stopping rules | ⬜ | — |
|
||||
| 2.2 | Auto-promotion | ⬜ | — |
|
||||
| 2.2 | Guardrails | ⬜ | — |
|
||||
| 2.3 | Thompson sampling | ⬜ | — |
|
||||
| 2.3 | Exploration vs exploitation | ⬜ | — |
|
||||
| 1.2 | Deterministic bucketing | ✅ | 783067e |
|
||||
| 1.2 | Assignment strategies | ✅ | 783067e |
|
||||
| 1.2 | Audience targeting | ✅ | 783067e |
|
||||
| 1.3 | Metric definitions | ✅ | 783067e |
|
||||
| 1.3 | Event ingestion | ✅ | 783067e |
|
||||
| 2.1 | Bayesian inference engine | ✅ | 783067e |
|
||||
| 2.1 | Probability calculations | ✅ | 783067e |
|
||||
| 2.1 | Credible intervals | ✅ | 783067e |
|
||||
| 2.2 | Early stopping rules | ✅ | 783067e |
|
||||
| 2.2 | Auto-promotion | ✅ | 783067e |
|
||||
| 2.2 | Guardrails | ✅ | 783067e |
|
||||
| 2.3 | Thompson sampling | ✅ | 783067e |
|
||||
| 2.3 | Exploration vs exploitation | ✅ | 783067e |
|
||||
| 2.3 | Regret minimization | ⬜ | — |
|
||||
| 3.1 | Pattern detection | ⬜ | — |
|
||||
| 3.1 | Anomaly detection | ⬜ | — |
|
||||
|
||||
@ -0,0 +1,647 @@
|
||||
import type { ErrorClusterDoc } from './types.js';
|
||||
|
||||
// ============================================================================
|
||||
// HDBSCAN (Hierarchical Density-Based Spatial Clustering) Implementation
|
||||
// ============================================================================
|
||||
|
||||
interface DataPoint {
|
||||
id: string;
|
||||
embedding: number[];
|
||||
metadata: {
|
||||
errorType: string;
|
||||
messageTemplate: string;
|
||||
stackSignature: string;
|
||||
productId: string;
|
||||
firstSeenAt: string;
|
||||
};
|
||||
}
|
||||
|
||||
interface Cluster {
|
||||
id: string;
|
||||
points: DataPoint[];
|
||||
centroid: number[];
|
||||
stability: number;
|
||||
density: number;
|
||||
}
|
||||
|
||||
interface HDBSCANOptions {
|
||||
minClusterSize: number;
|
||||
minSamples: number;
|
||||
metric: 'euclidean' | 'cosine' | 'manhattan';
|
||||
alpha: number;
|
||||
}
|
||||
|
||||
const DEFAULT_OPTIONS: HDBSCANOptions = {
|
||||
minClusterSize: 3,
|
||||
minSamples: 2,
|
||||
metric: 'cosine',
|
||||
alpha: 1.0,
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Distance Metrics
|
||||
// ============================================================================
|
||||
|
||||
function calculateDistance(a: number[], b: number[], metric: string): number {
|
||||
switch (metric) {
|
||||
case 'euclidean':
|
||||
return euclideanDistance(a, b);
|
||||
case 'cosine':
|
||||
return 1 - cosineSimilarity(a, b); // Convert similarity to distance
|
||||
case 'manhattan':
|
||||
return manhattanDistance(a, b);
|
||||
default:
|
||||
return euclideanDistance(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
function euclideanDistance(a: number[], b: number[]): number {
|
||||
let sum = 0;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
const diff = a[i] - b[i];
|
||||
sum += diff * diff;
|
||||
}
|
||||
return Math.sqrt(sum);
|
||||
}
|
||||
|
||||
function cosineSimilarity(a: number[], b: number[]): number {
|
||||
if (a.length !== b.length) return 0;
|
||||
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
let normB = 0;
|
||||
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
dotProduct += a[i] * b[i];
|
||||
normA += a[i] * a[i];
|
||||
normB += b[i] * b[i];
|
||||
}
|
||||
|
||||
if (normA === 0 || normB === 0) return 0;
|
||||
|
||||
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
||||
}
|
||||
|
||||
function manhattanDistance(a: number[], b: number[]): number {
|
||||
let sum = 0;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
sum += Math.abs(a[i] - b[i]);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Mutual Reachability Distance
|
||||
// ============================================================================
|
||||
|
||||
interface ReachabilityResult {
|
||||
reachabilityDistances: number[][];
|
||||
coreDistances: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes mutual reachability distances for HDBSCAN
|
||||
* This combines the original distance with core distances to handle varying densities
|
||||
*/
|
||||
function computeMutualReachability(
|
||||
points: DataPoint[],
|
||||
minSamples: number,
|
||||
metric: string
|
||||
): ReachabilityResult {
|
||||
const n = points.length;
|
||||
const distances: number[][] = Array(n)
|
||||
.fill(null)
|
||||
.map(() => Array(n).fill(0));
|
||||
const coreDistances: number[] = new Array(n);
|
||||
|
||||
// Compute pairwise distances
|
||||
for (let i = 0; i < n; i++) {
|
||||
for (let j = i + 1; j < n; j++) {
|
||||
const dist = calculateDistance(points[i].embedding, points[j].embedding, metric);
|
||||
distances[i][j] = dist;
|
||||
distances[j][i] = dist;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute core distances (distance to minSamples-th nearest neighbor)
|
||||
for (let i = 0; i < n; i++) {
|
||||
const sortedDists = [...distances[i]].sort((a, b) => a - b);
|
||||
coreDistances[i] = sortedDists[Math.min(minSamples, sortedDists.length - 1)] || 0;
|
||||
}
|
||||
|
||||
// Compute mutual reachability distances
|
||||
const reachabilityDistances: number[][] = Array(n)
|
||||
.fill(null)
|
||||
.map(() => Array(n).fill(0));
|
||||
|
||||
for (let i = 0; i < n; i++) {
|
||||
for (let j = i + 1; j < n; j++) {
|
||||
const mutualDist = Math.max(
|
||||
coreDistances[i],
|
||||
coreDistances[j],
|
||||
distances[i][j]
|
||||
);
|
||||
reachabilityDistances[i][j] = mutualDist;
|
||||
reachabilityDistances[j][i] = mutualDist;
|
||||
}
|
||||
}
|
||||
|
||||
return { reachabilityDistances, coreDistances };
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Minimum Spanning Tree (Prim's Algorithm)
|
||||
// ============================================================================
|
||||
|
||||
interface Edge {
|
||||
from: number;
|
||||
to: number;
|
||||
weight: number;
|
||||
}
|
||||
|
||||
function computeMST(distances: number[][]): Edge[] {
|
||||
const n = distances.length;
|
||||
const visited = new Set<number>();
|
||||
const mst: Edge[] = [];
|
||||
|
||||
// Start from node 0
|
||||
visited.add(0);
|
||||
|
||||
while (visited.size < n) {
|
||||
let minEdge: Edge | null = null;
|
||||
|
||||
for (const i of visited) {
|
||||
for (let j = 0; j < n; j++) {
|
||||
if (!visited.has(j)) {
|
||||
const weight = distances[i][j];
|
||||
if (minEdge === null || weight < minEdge.weight) {
|
||||
minEdge = { from: i, to: j, weight };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (minEdge === null) break;
|
||||
|
||||
mst.push(minEdge);
|
||||
visited.add(minEdge.to);
|
||||
}
|
||||
|
||||
return mst;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Cluster Hierarchy (Single Linkage)
|
||||
// ============================================================================
|
||||
|
||||
interface HierarchyNode {
|
||||
left: number | HierarchyNode;
|
||||
right: number | HierarchyNode;
|
||||
distance: number;
|
||||
size: number;
|
||||
}
|
||||
|
||||
function buildHierarchy(mst: Edge[], n: number): HierarchyNode {
|
||||
// Sort edges by weight
|
||||
const sortedEdges = [...mst].sort((a, b) => a.weight - b.weight);
|
||||
|
||||
// Union-Find for tracking merges
|
||||
const parent = new Array(n).fill(0).map((_, i) => i);
|
||||
const size = new Array(n).fill(1);
|
||||
const clusters: Map<number, HierarchyNode> = new Map();
|
||||
|
||||
function find(x: number): number {
|
||||
if (parent[x] !== x) {
|
||||
parent[x] = find(parent[x]);
|
||||
}
|
||||
return parent[x];
|
||||
}
|
||||
|
||||
function union(x: number, y: number): number {
|
||||
const px = find(x);
|
||||
const py = find(y);
|
||||
if (px === py) return px;
|
||||
|
||||
if (size[px] < size[py]) {
|
||||
parent[px] = py;
|
||||
size[py] += size[px];
|
||||
return py;
|
||||
} else {
|
||||
parent[py] = px;
|
||||
size[px] += size[py];
|
||||
return px;
|
||||
}
|
||||
}
|
||||
|
||||
let nextClusterId = n;
|
||||
|
||||
for (const edge of sortedEdges) {
|
||||
const left = find(edge.from);
|
||||
const right = find(edge.to);
|
||||
|
||||
if (left !== right) {
|
||||
const leftCluster = clusters.get(left) ?? left;
|
||||
const rightCluster = clusters.get(right) ?? right;
|
||||
|
||||
const newCluster: HierarchyNode = {
|
||||
left: leftCluster,
|
||||
right: rightCluster,
|
||||
distance: edge.weight,
|
||||
size: size[left] + size[right],
|
||||
};
|
||||
|
||||
const newId = union(left, right);
|
||||
clusters.set(newId, newCluster);
|
||||
clusters.delete(left);
|
||||
clusters.delete(right);
|
||||
clusters.set(newId, newCluster);
|
||||
}
|
||||
}
|
||||
|
||||
// Return the root cluster
|
||||
const rootId = find(0);
|
||||
return clusters.get(rootId) ?? { left: 0, right: 1, distance: 0, size: n };
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Extract Clusters from Hierarchy
|
||||
// ============================================================================
|
||||
|
||||
function extractClusters(
|
||||
node: HierarchyNode | number,
|
||||
points: DataPoint[],
|
||||
minClusterSize: number,
|
||||
currentDistance: number = 0
|
||||
): { clusters: DataPoint[][]; stabilities: number[] } {
|
||||
if (typeof node === 'number') {
|
||||
// Leaf node (single point)
|
||||
return { clusters: [[points[node]]], stabilities: [1] };
|
||||
}
|
||||
|
||||
// Check if this node should be a cluster
|
||||
if (node.size >= minClusterSize) {
|
||||
// Extract all points in this cluster
|
||||
const allPoints = collectPoints(node, points);
|
||||
return { clusters: [allPoints], stabilities: [calculateStability(node)] };
|
||||
}
|
||||
|
||||
// Otherwise, recursively extract from children
|
||||
const leftResult = extractClusters(node.left, points, minClusterSize, node.distance);
|
||||
const rightResult = extractClusters(node.right, points, minClusterSize, node.distance);
|
||||
|
||||
return {
|
||||
clusters: [...leftResult.clusters, ...rightResult.clusters],
|
||||
stabilities: [...leftResult.stabilities, ...rightResult.stabilities],
|
||||
};
|
||||
}
|
||||
|
||||
function collectPoints(node: HierarchyNode | number, points: DataPoint[]): DataPoint[] {
|
||||
if (typeof node === 'number') {
|
||||
return [points[node]];
|
||||
}
|
||||
return [...collectPoints(node.left, points), ...collectPoints(node.right, points)];
|
||||
}
|
||||
|
||||
function calculateStability(node: HierarchyNode): number {
|
||||
// Stability is based on how long the cluster persists in the hierarchy
|
||||
if (typeof node === 'number') return 1;
|
||||
|
||||
// Simple stability: inversely proportional to the distance at which it forms
|
||||
// Clusters that form at lower distances (denser regions) are more stable
|
||||
const baseStability = node.distance > 0 ? 1 / node.distance : 1;
|
||||
const sizeBonus = Math.log(node.size) / Math.log(2); // log2(size)
|
||||
|
||||
return baseStability * sizeBonus;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Main HDBSCAN Algorithm
|
||||
// ============================================================================
|
||||
|
||||
export interface HDBSCANResult {
|
||||
clusters: Cluster[];
|
||||
noise: DataPoint[];
|
||||
labels: Map<string, number>; // point id -> cluster index or -1 for noise
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs HDBSCAN clustering on error embeddings
|
||||
*/
|
||||
export function runHDBSCAN(
|
||||
points: DataPoint[],
|
||||
options: Partial<HDBSCANOptions> = {}
|
||||
): HDBSCANResult {
|
||||
const opts = { ...DEFAULT_OPTIONS, ...options };
|
||||
|
||||
if (points.length < opts.minClusterSize) {
|
||||
return {
|
||||
clusters: [],
|
||||
noise: points,
|
||||
labels: new Map(points.map((p) => [p.id, -1])),
|
||||
};
|
||||
}
|
||||
|
||||
// Step 1: Compute mutual reachability distances
|
||||
const { reachabilityDistances } = computeMutualReachability(
|
||||
points,
|
||||
opts.minSamples,
|
||||
opts.metric
|
||||
);
|
||||
|
||||
// Step 2: Compute Minimum Spanning Tree
|
||||
const mst = computeMST(reachabilityDistances);
|
||||
|
||||
// Step 3: Build hierarchy via single linkage
|
||||
const hierarchy = buildHierarchy(mst, points.length);
|
||||
|
||||
// Step 4: Extract clusters from hierarchy
|
||||
const { clusters: rawClusters } = extractClusters(hierarchy, points, opts.minClusterSize);
|
||||
|
||||
// Step 5: Calculate centroids and format results
|
||||
const clusters: Cluster[] = rawClusters.map((clusterPoints, index) => ({
|
||||
id: `cluster_${index}_${Date.now()}`,
|
||||
points: clusterPoints,
|
||||
centroid: calculateCentroid(clusterPoints.map((p) => p.embedding)),
|
||||
stability: calculateClusterStability(clusterPoints),
|
||||
density: clusterPoints.length / averagePairwiseDistance(clusterPoints),
|
||||
}));
|
||||
|
||||
// Step 6: Identify noise points (points not in any cluster)
|
||||
const clusteredIds = new Set(
|
||||
clusters.flatMap((c) => c.points.map((p) => p.id))
|
||||
);
|
||||
const noise = points.filter((p) => !clusteredIds.has(p.id));
|
||||
|
||||
// Step 7: Assign labels
|
||||
const labels = new Map<string, number>();
|
||||
clusters.forEach((cluster, idx) => {
|
||||
cluster.points.forEach((p) => labels.set(p.id, idx));
|
||||
});
|
||||
noise.forEach((p) => labels.set(p.id, -1));
|
||||
|
||||
return { clusters, noise, labels };
|
||||
}
|
||||
|
||||
function calculateCentroid(embeddings: number[][]): number[] {
|
||||
if (embeddings.length === 0) return [];
|
||||
|
||||
const dimensions = embeddings[0].length;
|
||||
const centroid = new Array(dimensions).fill(0);
|
||||
|
||||
for (const emb of embeddings) {
|
||||
for (let i = 0; i < dimensions; i++) {
|
||||
centroid[i] += emb[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < dimensions; i++) {
|
||||
centroid[i] /= embeddings.length;
|
||||
}
|
||||
|
||||
return centroid;
|
||||
}
|
||||
|
||||
function calculateClusterStability(points: DataPoint[]): number {
|
||||
// Cluster stability based on temporal coherence
|
||||
// Errors that occur close together in time are more likely to be related
|
||||
if (points.length < 2) return 1;
|
||||
|
||||
const timestamps = points
|
||||
.map((p) => new Date(p.metadata.firstSeenAt).getTime())
|
||||
.sort((a, b) => a - b);
|
||||
|
||||
const timeSpan = timestamps[timestamps.length - 1] - timestamps[0];
|
||||
const avgInterval = timeSpan / (points.length - 1);
|
||||
|
||||
// Higher stability for tighter temporal clustering (smaller intervals)
|
||||
const maxExpectedInterval = 7 * 24 * 60 * 60 * 1000; // 7 days in ms
|
||||
return Math.max(0, 1 - avgInterval / maxExpectedInterval);
|
||||
}
|
||||
|
||||
function averagePairwiseDistance(points: DataPoint[]): number {
|
||||
if (points.length < 2) return 1;
|
||||
|
||||
let totalDist = 0;
|
||||
let count = 0;
|
||||
|
||||
for (let i = 0; i < points.length; i++) {
|
||||
for (let j = i + 1; j < points.length; j++) {
|
||||
totalDist += euclideanDistance(points[i].embedding, points[j].embedding);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
return count > 0 ? totalDist / count : 1;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// DBSCAN Fallback for Smaller Datasets
|
||||
// ============================================================================
|
||||
|
||||
interface DBSCANOptions {
|
||||
eps: number;
|
||||
minPts: number;
|
||||
}
|
||||
|
||||
export function runDBSCAN(
|
||||
points: DataPoint[],
|
||||
options: DBSCANOptions
|
||||
): HDBSCANResult {
|
||||
const { eps, minPts } = options;
|
||||
const n = points.length;
|
||||
const visited = new Set<number>();
|
||||
const clustered = new Map<number, number>(); // point index -> cluster id
|
||||
let clusterId = 0;
|
||||
|
||||
function regionQuery(pointIdx: number): number[] {
|
||||
const neighbors: number[] = [];
|
||||
for (let i = 0; i < n; i++) {
|
||||
if (i !== pointIdx) {
|
||||
const dist = euclideanDistance(
|
||||
points[pointIdx].embedding,
|
||||
points[i].embedding
|
||||
);
|
||||
if (dist <= eps) {
|
||||
neighbors.push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return neighbors;
|
||||
}
|
||||
|
||||
function expandCluster(pointIdx: number, neighbors: number[], currentClusterId: number): void {
|
||||
clustered.set(pointIdx, currentClusterId);
|
||||
|
||||
for (let i = 0; i < neighbors.length; i++) {
|
||||
const neighborIdx = neighbors[i];
|
||||
|
||||
if (!visited.has(neighborIdx)) {
|
||||
visited.add(neighborIdx);
|
||||
const neighborNeighbors = regionQuery(neighborIdx);
|
||||
|
||||
if (neighborNeighbors.length >= minPts) {
|
||||
neighbors.push(...neighborNeighbors);
|
||||
}
|
||||
}
|
||||
|
||||
if (!clustered.has(neighborIdx)) {
|
||||
clustered.set(neighborIdx, currentClusterId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < n; i++) {
|
||||
if (visited.has(i)) continue;
|
||||
|
||||
visited.add(i);
|
||||
const neighbors = regionQuery(i);
|
||||
|
||||
if (neighbors.length < minPts) {
|
||||
// Mark as noise (will remain unclustered)
|
||||
} else {
|
||||
expandCluster(i, neighbors, clusterId);
|
||||
clusterId++;
|
||||
}
|
||||
}
|
||||
|
||||
// Format results
|
||||
const clusters: Cluster[] = [];
|
||||
for (let cid = 0; cid < clusterId; cid++) {
|
||||
const clusterPoints = points.filter((_, idx) => clustered.get(idx) === cid);
|
||||
if (clusterPoints.length > 0) {
|
||||
clusters.push({
|
||||
id: `cluster_${cid}_${Date.now()}`,
|
||||
points: clusterPoints,
|
||||
centroid: calculateCentroid(clusterPoints.map((p) => p.embedding)),
|
||||
stability: calculateClusterStability(clusterPoints),
|
||||
density: clusterPoints.length / averagePairwiseDistance(clusterPoints),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const noise = points.filter((_, idx) => !clustered.has(idx));
|
||||
const labels = new Map<string, number>();
|
||||
points.forEach((p, idx) => {
|
||||
labels.set(p.id, clustered.get(idx) ?? -1);
|
||||
});
|
||||
|
||||
return { clusters, noise, labels };
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Auto-parameter Selection
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Automatically selects HDBSCAN parameters based on dataset size
|
||||
*/
|
||||
export function autoSelectParameters(
|
||||
datasetSize: number
|
||||
): Partial<HDBSCANOptions> & { algorithm: 'hdbscan' | 'dbscan' } {
|
||||
if (datasetSize < 10) {
|
||||
// Use DBSCAN for very small datasets
|
||||
return {
|
||||
algorithm: 'dbscan',
|
||||
eps: 0.5,
|
||||
minPts: 2,
|
||||
} as Partial<HDBSCANOptions> & { algorithm: 'dbscan' };
|
||||
}
|
||||
|
||||
if (datasetSize < 50) {
|
||||
// Small dataset: use HDBSCAN with small min cluster size
|
||||
return {
|
||||
algorithm: 'hdbscan',
|
||||
minClusterSize: 2,
|
||||
minSamples: 1,
|
||||
metric: 'cosine',
|
||||
alpha: 1.0,
|
||||
};
|
||||
}
|
||||
|
||||
if (datasetSize < 500) {
|
||||
// Medium dataset
|
||||
return {
|
||||
algorithm: 'hdbscan',
|
||||
minClusterSize: 3,
|
||||
minSamples: 2,
|
||||
metric: 'cosine',
|
||||
alpha: 1.0,
|
||||
};
|
||||
}
|
||||
|
||||
// Large dataset
|
||||
return {
|
||||
algorithm: 'hdbscan',
|
||||
minClusterSize: 5,
|
||||
minSamples: 3,
|
||||
metric: 'cosine',
|
||||
alpha: 1.0,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Cluster Quality Metrics
|
||||
// ============================================================================
|
||||
|
||||
interface ClusterQualityMetrics {
|
||||
silhouetteScore: number;
|
||||
daviesBouldinIndex: number;
|
||||
calinskiHarabaszIndex: number;
|
||||
}
|
||||
|
||||
export function calculateClusterQuality(
|
||||
points: DataPoint[],
|
||||
labels: Map<string, number>,
|
||||
clusters: Cluster[]
|
||||
): ClusterQualityMetrics {
|
||||
// Simplified silhouette score calculation
|
||||
const silhouetteScores: number[] = [];
|
||||
|
||||
for (const point of points) {
|
||||
const label = labels.get(point.id) ?? -1;
|
||||
if (label === -1) continue; // Skip noise points
|
||||
|
||||
// Average distance to points in same cluster (cohesion)
|
||||
const sameCluster = points.filter(
|
||||
(p) => labels.get(p.id) === label && p.id !== point.id
|
||||
);
|
||||
const a =
|
||||
sameCluster.length > 0
|
||||
? sameCluster.reduce(
|
||||
(sum, p) => sum + euclideanDistance(point.embedding, p.embedding),
|
||||
0
|
||||
) / sameCluster.length
|
||||
: 0;
|
||||
|
||||
// Minimum average distance to points in different clusters (separation)
|
||||
let b = Infinity;
|
||||
for (let i = 0; i < clusters.length; i++) {
|
||||
if (i === label) continue;
|
||||
const otherCluster = points.filter((p) => labels.get(p.id) === i);
|
||||
if (otherCluster.length === 0) continue;
|
||||
|
||||
const avgDist =
|
||||
otherCluster.reduce(
|
||||
(sum, p) => sum + euclideanDistance(point.embedding, p.embedding),
|
||||
0
|
||||
) / otherCluster.length;
|
||||
b = Math.min(b, avgDist);
|
||||
}
|
||||
|
||||
if (b === Infinity) b = a; // No other clusters
|
||||
|
||||
const silhouette = (b - a) / Math.max(a, b);
|
||||
silhouetteScores.push(silhouette);
|
||||
}
|
||||
|
||||
const avgSilhouette =
|
||||
silhouetteScores.length > 0
|
||||
? silhouetteScores.reduce((sum, s) => sum + s, 0) / silhouetteScores.length
|
||||
: 0;
|
||||
|
||||
return {
|
||||
silhouetteScore: avgSilhouette,
|
||||
daviesBouldinIndex: 0, // Simplified - would need full implementation
|
||||
calinskiHarabaszIndex: 0, // Simplified - would need full implementation
|
||||
};
|
||||
}
|
||||
@ -0,0 +1,395 @@
|
||||
/**
|
||||
* Anomaly Detection with Prophet-style forecasting
|
||||
* [3.3] Anomaly Detection
|
||||
*/
|
||||
|
||||
import type { HealthAnomaly } from './types.js';
|
||||
|
||||
export interface TimeSeriesPoint {
|
||||
timestamp: Date;
|
||||
value: number;
|
||||
}
|
||||
|
||||
export interface ForecastResult {
|
||||
forecast: TimeSeriesPoint[];
|
||||
confidenceIntervals: Array<{ upper: number; lower: number }>;
|
||||
anomalies: Array<{
|
||||
timestamp: Date;
|
||||
actual: number;
|
||||
expected: number;
|
||||
deviation: number;
|
||||
}>;
|
||||
}
|
||||
|
||||
export interface AnomalyDetectionConfig {
|
||||
sensitivity: number; // 0-1, higher = more sensitive
|
||||
seasonalPeriods: number[]; // e.g., [7] for weekly
|
||||
changepointPriorScale: number;
|
||||
intervalWidth: number; // confidence interval (0.8 = 80%)
|
||||
}
|
||||
|
||||
const DEFAULT_CONFIG: AnomalyDetectionConfig = {
|
||||
sensitivity: 0.8,
|
||||
seasonalPeriods: [7], // Weekly seasonality
|
||||
changepointPriorScale: 0.05,
|
||||
intervalWidth: 0.95,
|
||||
};
|
||||
|
||||
export class AnomalyDetectionEngine {
|
||||
private config: AnomalyDetectionConfig;
|
||||
|
||||
constructor(config: Partial<AnomalyDetectionConfig> = {}) {
|
||||
this.config = { ...DEFAULT_CONFIG, ...config };
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect anomalies in a time series
|
||||
*/
|
||||
detectAnomalies(
|
||||
series: TimeSeriesPoint[],
|
||||
metricName: string
|
||||
): Array<{
|
||||
timestamp: Date;
|
||||
value: number;
|
||||
expected: number;
|
||||
deviation: number;
|
||||
severity: 'critical' | 'warning';
|
||||
}> {
|
||||
if (series.length < 14) {
|
||||
return []; // Need at least 2 weeks of data
|
||||
}
|
||||
|
||||
// Simple moving average baseline
|
||||
const windowSize = 7;
|
||||
const anomalies: Array<{
|
||||
timestamp: Date;
|
||||
value: number;
|
||||
expected: number;
|
||||
deviation: number;
|
||||
severity: 'critical' | 'warning';
|
||||
}> = [];
|
||||
|
||||
for (let i = windowSize; i < series.length; i++) {
|
||||
const point = series[i];
|
||||
const window = series.slice(i - windowSize, i);
|
||||
|
||||
// Calculate baseline (accounting for seasonality)
|
||||
const baseline = this.calculateSeasonalBaseline(window, i, series);
|
||||
const stdDev = this.calculateStdDev(window.map((p) => p.value));
|
||||
|
||||
// Calculate expected value with trend
|
||||
const trend = this.calculateTrend(window);
|
||||
const expected = baseline + trend;
|
||||
|
||||
// Calculate deviation
|
||||
const deviation = Math.abs(point.value - expected);
|
||||
const zScore = stdDev > 0 ? deviation / stdDev : 0;
|
||||
|
||||
// Check if anomaly
|
||||
const threshold = this.getZScoreThreshold();
|
||||
if (zScore > threshold) {
|
||||
anomalies.push({
|
||||
timestamp: point.timestamp,
|
||||
value: point.value,
|
||||
expected,
|
||||
deviation: zScore,
|
||||
severity: zScore > threshold * 1.5 ? 'critical' : 'warning',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return anomalies;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate forecast for future values
|
||||
*/
|
||||
forecast(series: TimeSeriesPoint[], periods: number): ForecastResult {
|
||||
if (series.length < 7) {
|
||||
return {
|
||||
forecast: [],
|
||||
confidenceIntervals: [],
|
||||
anomalies: [],
|
||||
};
|
||||
}
|
||||
|
||||
const lastPoint = series[series.length - 1];
|
||||
const values = series.map((p) => p.value);
|
||||
|
||||
// Calculate trend
|
||||
const trend = this.calculateTrend(series.slice(-14));
|
||||
|
||||
// Calculate seasonal component
|
||||
const seasonal = this.extractSeasonality(series, 7);
|
||||
|
||||
// Calculate base level
|
||||
const lastWeek = values.slice(-7);
|
||||
const baseLevel = lastWeek.reduce((a, b) => a + b, 0) / lastWeek.length;
|
||||
|
||||
// Calculate uncertainty
|
||||
const residuals = this.calculateResiduals(series, seasonal);
|
||||
const uncertainty = this.calculateStdDev(residuals) * 2;
|
||||
|
||||
// Generate forecast
|
||||
const forecast: TimeSeriesPoint[] = [];
|
||||
const confidenceIntervals: Array<{ upper: number; lower: number }> = [];
|
||||
|
||||
for (let i = 1; i <= periods; i++) {
|
||||
const forecastDate = new Date(lastPoint.timestamp);
|
||||
forecastDate.setDate(forecastDate.getDate() + i);
|
||||
|
||||
// Day of week seasonal adjustment
|
||||
const dayOfWeek = forecastDate.getDay();
|
||||
const seasonalFactor = seasonal[dayOfWeek] || 1;
|
||||
|
||||
const predicted = (baseLevel + trend * i) * seasonalFactor;
|
||||
|
||||
forecast.push({
|
||||
timestamp: forecastDate,
|
||||
value: predicted,
|
||||
});
|
||||
|
||||
confidenceIntervals.push({
|
||||
upper: predicted + uncertainty * Math.sqrt(i),
|
||||
lower: predicted - uncertainty * Math.sqrt(i),
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
forecast,
|
||||
confidenceIntervals,
|
||||
anomalies: [],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Multi-dimensional anomaly detection
|
||||
*/
|
||||
detectMultiDimensionalAnomaly(
|
||||
metrics: Record<string, TimeSeriesPoint[]>,
|
||||
correlationWindow: number = 7
|
||||
): Array<{
|
||||
metric: string;
|
||||
timestamp: Date;
|
||||
deviation: number;
|
||||
correlatedMetrics: string[];
|
||||
suggestedCause: string;
|
||||
}> {
|
||||
const anomalies: Array<{
|
||||
metric: string;
|
||||
timestamp: Date;
|
||||
deviation: number;
|
||||
correlatedMetrics: string[];
|
||||
suggestedCause: string;
|
||||
}> = [];
|
||||
|
||||
// Calculate correlations between metrics
|
||||
const metricNames = Object.keys(metrics);
|
||||
const correlations: Record<string, Record<string, number>> = {};
|
||||
|
||||
for (const m1 of metricNames) {
|
||||
correlations[m1] = {};
|
||||
for (const m2 of metricNames) {
|
||||
if (m1 !== m2) {
|
||||
correlations[m1][m2] = this.calculateCorrelation(
|
||||
metrics[m1],
|
||||
metrics[m2],
|
||||
correlationWindow
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Detect anomalies in each metric
|
||||
for (const [metricName, series] of Object.entries(metrics)) {
|
||||
const metricAnomalies = this.detectAnomalies(series, metricName);
|
||||
|
||||
for (const anomaly of metricAnomalies) {
|
||||
// Find correlated metrics that also show anomalies
|
||||
const correlatedMetrics: string[] = [];
|
||||
|
||||
for (const [otherMetric, corr] of Object.entries(correlations[metricName])) {
|
||||
if (Math.abs(corr) > 0.7) {
|
||||
// Strong correlation
|
||||
const otherAnomalies = this.detectAnomalies(metrics[otherMetric], otherMetric);
|
||||
const hasAnomalyAtTime = otherAnomalies.some(
|
||||
(a) =>
|
||||
Math.abs(a.timestamp.getTime() - anomaly.timestamp.getTime()) <
|
||||
24 * 60 * 60 * 1000
|
||||
);
|
||||
|
||||
if (hasAnomalyAtTime) {
|
||||
correlatedMetrics.push(otherMetric);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
anomalies.push({
|
||||
metric: metricName,
|
||||
timestamp: anomaly.timestamp,
|
||||
deviation: anomaly.deviation,
|
||||
correlatedMetrics,
|
||||
suggestedCause: this.suggestMultiDimensionalCause(
|
||||
metricName,
|
||||
correlatedMetrics,
|
||||
anomaly.severity
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return anomalies;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate seasonal baseline
|
||||
*/
|
||||
private calculateSeasonalBaseline(
|
||||
window: TimeSeriesPoint[],
|
||||
index: number,
|
||||
fullSeries: TimeSeriesPoint[]
|
||||
): number {
|
||||
const values = window.map((p) => p.value);
|
||||
const avg = values.reduce((a, b) => a + b, 0) / values.length;
|
||||
|
||||
// Adjust for day-of-week seasonality if we have enough data
|
||||
if (fullSeries.length >= 14 && index >= 7) {
|
||||
const currentDay = index % 7;
|
||||
const prevWeekSameDay = fullSeries
|
||||
.filter((_, i) => i % 7 === currentDay && i < index)
|
||||
.slice(-3);
|
||||
|
||||
if (prevWeekSameDay.length > 0) {
|
||||
const seasonalAvg =
|
||||
prevWeekSameDay.reduce((a, p) => a + p.value, 0) / prevWeekSameDay.length;
|
||||
return seasonalAvg * 0.7 + avg * 0.3;
|
||||
}
|
||||
}
|
||||
|
||||
return avg;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate trend from series
|
||||
*/
|
||||
private calculateTrend(window: TimeSeriesPoint[]): number {
|
||||
if (window.length < 2) return 0;
|
||||
|
||||
const n = window.length;
|
||||
const sumX = window.reduce((sum, _, i) => sum + i, 0);
|
||||
const sumY = window.reduce((sum, p) => sum + p.value, 0);
|
||||
const sumXY = window.reduce((sum, p, i) => sum + i * p.value, 0);
|
||||
const sumX2 = window.reduce((sum, _, i) => sum + i * i, 0);
|
||||
|
||||
const denominator = n * sumX2 - sumX * sumX;
|
||||
if (denominator === 0) return 0;
|
||||
|
||||
const slope = (n * sumXY - sumX * sumY) / denominator;
|
||||
return slope;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract weekly seasonality pattern
|
||||
*/
|
||||
private extractSeasonality(series: TimeSeriesPoint[], period: number): number[] {
|
||||
const dayAvgs: number[][] = Array.from({ length: period }, () => []);
|
||||
|
||||
for (let i = 0; i < series.length; i++) {
|
||||
const dayOfWeek = i % period;
|
||||
dayAvgs[dayOfWeek].push(series[i].value);
|
||||
}
|
||||
|
||||
const overallAvg =
|
||||
series.reduce((sum, p) => sum + p.value, 0) / series.length;
|
||||
|
||||
return dayAvgs.map((values) => {
|
||||
if (values.length === 0) return 1;
|
||||
const avg = values.reduce((a, b) => a + b, 0) / values.length;
|
||||
return overallAvg > 0 ? avg / overallAvg : 1;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate residuals after removing seasonality
|
||||
*/
|
||||
private calculateResiduals(
|
||||
series: TimeSeriesPoint[],
|
||||
seasonal: number[]
|
||||
): number[] {
|
||||
return series.map((p, i) => {
|
||||
const dayOfWeek = i % 7;
|
||||
const seasonalFactor = seasonal[dayOfWeek] || 1;
|
||||
return p.value / seasonalFactor;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate correlation between two time series
|
||||
*/
|
||||
private calculateCorrelation(
|
||||
series1: TimeSeriesPoint[],
|
||||
series2: TimeSeriesPoint[],
|
||||
window: number
|
||||
): number {
|
||||
const s1 = series1.slice(-window).map((p) => p.value);
|
||||
const s2 = series2.slice(-window).map((p) => p.value);
|
||||
|
||||
if (s1.length !== s2.length || s1.length < 2) return 0;
|
||||
|
||||
const n = s1.length;
|
||||
const sum1 = s1.reduce((a, b) => a + b, 0);
|
||||
const sum2 = s2.reduce((a, b) => a + b, 0);
|
||||
const sum1Sq = s1.reduce((a, b) => a + b * b, 0);
|
||||
const sum2Sq = s2.reduce((a, b) => a + b * b, 0);
|
||||
const pSum = s1.reduce((sum, v, i) => sum + v * s2[i], 0);
|
||||
|
||||
const numerator = pSum - (sum1 * sum2) / n;
|
||||
const denominator = Math.sqrt(
|
||||
(sum1Sq - (sum1 * sum1) / n) * (sum2Sq - (sum2 * sum2) / n)
|
||||
);
|
||||
|
||||
return denominator === 0 ? 0 : numerator / denominator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate standard deviation
|
||||
*/
|
||||
private calculateStdDev(values: number[]): number {
|
||||
if (values.length < 2) return 0;
|
||||
const avg = values.reduce((a, b) => a + b, 0) / values.length;
|
||||
const variance = values.reduce((sum, v) => sum + Math.pow(v - avg, 2), 0) / values.length;
|
||||
return Math.sqrt(variance);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Z-score threshold based on sensitivity
|
||||
*/
|
||||
private getZScoreThreshold(): number {
|
||||
// Higher sensitivity = lower threshold = more anomalies detected
|
||||
return 2.5 - this.config.sensitivity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Suggest cause for multi-dimensional anomaly
|
||||
*/
|
||||
private suggestMultiDimensionalCause(
|
||||
metric: string,
|
||||
correlatedMetrics: string[],
|
||||
severity: string
|
||||
): string {
|
||||
const causes: Record<string, string> = {
|
||||
dau: 'Daily active users anomaly - check for app crashes or service outages',
|
||||
'error_rate': 'Error rate spike correlates with other metrics - investigate recent deployment',
|
||||
'latency': 'Latency increase affecting user experience',
|
||||
'churn_rate': 'Churn rate anomaly - review recent pricing or policy changes',
|
||||
};
|
||||
|
||||
if (correlatedMetrics.length > 0) {
|
||||
return `${causes[metric] || 'Multi-metric anomaly detected'}. Correlated with: ${correlatedMetrics.join(', ')}`;
|
||||
}
|
||||
|
||||
return causes[metric] || `Anomaly detected in ${metric}`;
|
||||
}
|
||||
}
|
||||
|
||||
export const anomalyDetectionEngine = new AnomalyDetectionEngine();
|
||||
@ -0,0 +1,508 @@
|
||||
/**
|
||||
* Health Scoring Algorithm - 6-dimensional product health framework
|
||||
* [3.1] Health Metric Framework
|
||||
* [3.2] Health Scoring Algorithm
|
||||
*/
|
||||
|
||||
import type {
|
||||
ProductHealthScoreDoc,
|
||||
ProductHealthDimensions,
|
||||
HealthDimension,
|
||||
HealthAnomaly,
|
||||
HealthForecast,
|
||||
} from './types.js';
|
||||
|
||||
export interface HealthMetricsInput {
|
||||
productId: string;
|
||||
date: Date;
|
||||
|
||||
// Acquisition metrics
|
||||
newUsers: number;
|
||||
activationRateDay1: number;
|
||||
activationRateDay7: number;
|
||||
cac: number;
|
||||
|
||||
// Activation metrics
|
||||
firstValueMomentRate: number;
|
||||
timeToFirstAction: number;
|
||||
onboardingCompletionRate: number;
|
||||
|
||||
// Retention metrics
|
||||
dau: number;
|
||||
mau: number;
|
||||
day7Retention: number;
|
||||
day30Retention: number;
|
||||
|
||||
// Engagement metrics
|
||||
avgSessionLength: number;
|
||||
sessionsPerUser: number;
|
||||
featureAdoption: Record<string, number>;
|
||||
|
||||
// Revenue metrics
|
||||
mrr: number;
|
||||
arpu: number;
|
||||
churnRate: number;
|
||||
upgradeRate: number;
|
||||
|
||||
// Stability metrics
|
||||
crashFreeRate: number;
|
||||
errorRate: number;
|
||||
avgLatency: number;
|
||||
uptimePercent: number;
|
||||
|
||||
// Historical baselines (30-day averages)
|
||||
baselines: {
|
||||
dau: number;
|
||||
newUsers: number;
|
||||
activationRateDay1: number;
|
||||
day7Retention: number;
|
||||
avgSessionLength: number;
|
||||
mrr: number;
|
||||
errorRate: number;
|
||||
};
|
||||
}
|
||||
|
||||
// Dimension weights (should sum to 1)
|
||||
const DIMENSION_WEIGHTS = {
|
||||
acquisition: 0.15,
|
||||
activation: 0.15,
|
||||
retention: 0.25,
|
||||
engagement: 0.15,
|
||||
revenue: 0.15,
|
||||
stability: 0.15,
|
||||
};
|
||||
|
||||
// Alert thresholds
|
||||
const THRESHOLDS = {
|
||||
critical: 60,
|
||||
warning: 75,
|
||||
};
|
||||
|
||||
export class HealthScoringEngine {
|
||||
/**
|
||||
* Calculate complete health score for a product
|
||||
*/
|
||||
calculateHealthScore(input: HealthMetricsInput): Omit<ProductHealthScoreDoc, 'id' | 'ttl'> {
|
||||
const dimensions = this.calculateDimensions(input);
|
||||
const overallHealthScore = this.calculateOverallScore(dimensions);
|
||||
const healthStatus = this.determineHealthStatus(overallHealthScore, dimensions);
|
||||
|
||||
const anomalies = this.detectAnomalies(input, dimensions);
|
||||
const forecasts = this.generateForecasts(input, dimensions);
|
||||
|
||||
const vsBaseline7Day = this.calculateBaselineChange(input, 7);
|
||||
const vsBaseline30Day = this.calculateBaselineChange(input, 30);
|
||||
|
||||
return {
|
||||
productId: input.productId,
|
||||
date: input.date.toISOString().split('T')[0],
|
||||
overallHealthScore,
|
||||
healthStatus,
|
||||
dimensions,
|
||||
anomalies,
|
||||
forecasts,
|
||||
vsBaseline7Day,
|
||||
vsBaseline30Day,
|
||||
createdAt: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate individual dimension scores
|
||||
*/
|
||||
private calculateDimensions(input: HealthMetricsInput): ProductHealthDimensions {
|
||||
return {
|
||||
acquisition: this.calculateAcquisitionDimension(input),
|
||||
activation: this.calculateActivationDimension(input),
|
||||
retention: this.calculateRetentionDimension(input),
|
||||
engagement: this.calculateEngagementDimension(input),
|
||||
revenue: this.calculateRevenueDimension(input),
|
||||
stability: this.calculateStabilityDimension(input),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate acquisition dimension score
|
||||
*/
|
||||
private calculateAcquisitionDimension(input: HealthMetricsInput): HealthDimension {
|
||||
const metrics = {
|
||||
newUsers: input.newUsers,
|
||||
activationRateDay1: input.activationRateDay1,
|
||||
activationRateDay7: input.activationRateDay7,
|
||||
cac: input.cac,
|
||||
};
|
||||
|
||||
// Score components
|
||||
const newUserScore = this.zScoreNormalized(
|
||||
input.newUsers,
|
||||
input.baselines.newUsers,
|
||||
input.baselines.newUsers * 0.3
|
||||
);
|
||||
|
||||
const activationScore =
|
||||
input.activationRateDay1 * 0.6 + input.activationRateDay7 * 0.4;
|
||||
|
||||
const cacScore = this.normalizeInverse(input.cac, 100);
|
||||
|
||||
const score = Math.round(
|
||||
newUserScore * 0.4 + activationScore * 100 * 0.4 + cacScore * 100 * 0.2
|
||||
);
|
||||
|
||||
return {
|
||||
score: Math.max(0, Math.min(100, score)),
|
||||
metrics,
|
||||
trend: this.determineTrend(input.newUsers, input.baselines.newUsers),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate activation dimension score
|
||||
*/
|
||||
private calculateActivationDimension(input: HealthMetricsInput): HealthDimension {
|
||||
const metrics = {
|
||||
firstValueMomentRate: input.firstValueMomentRate,
|
||||
timeToFirstAction: input.timeToFirstAction,
|
||||
onboardingCompletionRate: input.onboardingCompletionRate,
|
||||
};
|
||||
|
||||
const fvmScore = input.firstValueMomentRate * 100;
|
||||
const ttfScore = this.normalizeInverse(input.timeToFirstAction, 60) * 100;
|
||||
const onboardingScore = input.onboardingCompletionRate * 100;
|
||||
|
||||
const score = Math.round(fvmScore * 0.4 + ttfScore * 0.3 + onboardingScore * 0.3);
|
||||
|
||||
return {
|
||||
score: Math.max(0, Math.min(100, score)),
|
||||
metrics,
|
||||
trend: this.determineTrend(input.firstValueMomentRate, 0.5),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate retention dimension score (highest weight)
|
||||
*/
|
||||
private calculateRetentionDimension(input: HealthMetricsInput): HealthDimension {
|
||||
const dauMauRatio = input.mau > 0 ? input.dau / input.mau : 0;
|
||||
|
||||
const metrics = {
|
||||
dau: input.dau,
|
||||
mau: input.mau,
|
||||
dauMauRatio,
|
||||
day7Retention: input.day7Retention,
|
||||
day30Retention: input.day30Retention,
|
||||
};
|
||||
|
||||
// Retention score with DAU/MAU stickiness
|
||||
const retentionScore =
|
||||
input.day7Retention * 0.4 + input.day30Retention * 0.4 + dauMauRatio * 100 * 0.2;
|
||||
|
||||
// DAU trend compared to baseline
|
||||
const dauScore = this.zScoreNormalized(
|
||||
input.dau,
|
||||
input.baselines.dau,
|
||||
input.baselines.dau * 0.2
|
||||
);
|
||||
|
||||
const score = Math.round(retentionScore * 0.7 + dauScore * 0.3);
|
||||
|
||||
return {
|
||||
score: Math.max(0, Math.min(100, score)),
|
||||
metrics,
|
||||
trend: this.determineTrend(input.dau, input.baselines.dau),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate engagement dimension score
|
||||
*/
|
||||
private calculateEngagementDimension(input: HealthMetricsInput): HealthDimension {
|
||||
const featureAdoptionAvg = Object.values(input.featureAdoption).length
|
||||
? Object.values(input.featureAdoption).reduce((a, b) => a + b, 0) /
|
||||
Object.values(input.featureAdoption).length
|
||||
: 0;
|
||||
|
||||
const metrics = {
|
||||
avgSessionLength: input.avgSessionLength,
|
||||
sessionsPerUser: input.sessionsPerUser,
|
||||
featureAdoption: input.featureAdoption,
|
||||
};
|
||||
|
||||
const sessionLengthScore = this.normalizeLinear(input.avgSessionLength, 600) * 100;
|
||||
const sessionsPerUserScore = this.normalizeLinear(input.sessionsPerUser, 20) * 100;
|
||||
const featureAdoptionScore = featureAdoptionAvg * 100;
|
||||
|
||||
const score = Math.round(
|
||||
sessionLengthScore * 0.4 + sessionsPerUserScore * 0.3 + featureAdoptionScore * 0.3
|
||||
);
|
||||
|
||||
return {
|
||||
score: Math.max(0, Math.min(100, score)),
|
||||
metrics,
|
||||
trend: this.determineTrend(input.avgSessionLength, input.baselines.avgSessionLength),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate revenue dimension score
|
||||
*/
|
||||
private calculateRevenueDimension(input: HealthMetricsInput): HealthDimension {
|
||||
const metrics = {
|
||||
mrr: input.mrr,
|
||||
arpu: input.arpu,
|
||||
churnRate: input.churnRate,
|
||||
upgradeRate: input.upgradeRate,
|
||||
};
|
||||
|
||||
const mrrScore = this.zScoreNormalized(
|
||||
input.mrr,
|
||||
input.baselines.mrr || input.mrr * 0.9,
|
||||
(input.baselines.mrr || input.mrr) * 0.2
|
||||
);
|
||||
|
||||
const churnScore = this.normalizeInverse(input.churnRate * 100, 20);
|
||||
const upgradeScore = input.upgradeRate * 100;
|
||||
const arpuScore = this.normalizeLinear(input.arpu, 50) * 100;
|
||||
|
||||
const score = Math.round(mrrScore * 0.4 + churnScore * 100 * 0.3 + upgradeScore * 0.2 + arpuScore * 0.1);
|
||||
|
||||
return {
|
||||
score: Math.max(0, Math.min(100, score)),
|
||||
metrics,
|
||||
trend: this.determineTrend(input.mrr, input.baselines.mrr || input.mrr * 0.95),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate stability dimension score
|
||||
*/
|
||||
private calculateStabilityDimension(input: HealthMetricsInput): HealthDimension {
|
||||
const metrics = {
|
||||
crashFreeRate: input.crashFreeRate,
|
||||
errorRate: input.errorRate,
|
||||
avgLatency: input.avgLatency,
|
||||
uptimePercent: input.uptimePercent,
|
||||
};
|
||||
|
||||
const crashFreeScore = input.crashFreeRate * 100;
|
||||
const errorRateScore = this.normalizeInverse(input.errorRate * 100, 5);
|
||||
const latencyScore = this.normalizeInverse(input.avgLatency, 1000);
|
||||
const uptimeScore = input.uptimePercent;
|
||||
|
||||
const score = Math.round(
|
||||
crashFreeScore * 0.35 + errorRateScore * 100 * 0.35 + latencyScore * 100 * 0.15 + uptimeScore * 0.15
|
||||
);
|
||||
|
||||
return {
|
||||
score: Math.max(0, Math.min(100, score)),
|
||||
metrics,
|
||||
trend: this.determineTrend(1 - input.errorRate, 1 - input.baselines.errorRate),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate overall health score from dimensions
|
||||
*/
|
||||
private calculateOverallScore(dimensions: ProductHealthDimensions): number {
|
||||
const weightedScore =
|
||||
dimensions.acquisition.score * DIMENSION_WEIGHTS.acquisition +
|
||||
dimensions.activation.score * DIMENSION_WEIGHTS.activation +
|
||||
dimensions.retention.score * DIMENSION_WEIGHTS.retention +
|
||||
dimensions.engagement.score * DIMENSION_WEIGHTS.engagement +
|
||||
dimensions.revenue.score * DIMENSION_WEIGHTS.revenue +
|
||||
dimensions.stability.score * DIMENSION_WEIGHTS.stability;
|
||||
|
||||
return Math.round(weightedScore);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine health status based on score and dimension variance
|
||||
*/
|
||||
private determineHealthStatus(
|
||||
overallScore: number,
|
||||
dimensions: ProductHealthDimensions
|
||||
): 'critical' | 'warning' | 'healthy' {
|
||||
// Check for critical score
|
||||
if (overallScore < THRESHOLDS.critical) return 'critical';
|
||||
|
||||
// Check for warning score
|
||||
if (overallScore < THRESHOLDS.warning) return 'warning';
|
||||
|
||||
// Check for critical dimension
|
||||
const criticalDimension = Object.values(dimensions).some((d) => d.score < 40);
|
||||
if (criticalDimension) return 'warning';
|
||||
|
||||
// Check for high variance (unstable health)
|
||||
const scores = Object.values(dimensions).map((d) => d.score);
|
||||
const avg = scores.reduce((a, b) => a + b, 0) / scores.length;
|
||||
const variance = scores.reduce((sum, s) => sum + Math.pow(s - avg, 2), 0) / scores.length;
|
||||
const stdDev = Math.sqrt(variance);
|
||||
|
||||
if (stdDev > 20 && overallScore < 85) return 'warning';
|
||||
|
||||
return 'healthy';
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect anomalies in metrics
|
||||
*/
|
||||
private detectAnomalies(
|
||||
input: HealthMetricsInput,
|
||||
dimensions: ProductHealthDimensions
|
||||
): HealthAnomaly[] {
|
||||
const anomalies: HealthAnomaly[] = [];
|
||||
|
||||
// Check each metric against baseline
|
||||
const checks: Array<{ metric: string; value: number; baseline: number; threshold: number }> = [
|
||||
{ metric: 'dau', value: input.dau, baseline: input.baselines.dau, threshold: 0.2 },
|
||||
{ metric: 'newUsers', value: input.newUsers, baseline: input.baselines.newUsers, threshold: 0.3 },
|
||||
{
|
||||
metric: 'activationRateDay1',
|
||||
value: input.activationRateDay1,
|
||||
baseline: input.baselines.activationRateDay1,
|
||||
threshold: 0.15,
|
||||
},
|
||||
{ metric: 'day7Retention', value: input.day7Retention, baseline: 0.3, threshold: 0.2 },
|
||||
{ metric: 'errorRate', value: input.errorRate, baseline: input.baselines.errorRate, threshold: 0.5 },
|
||||
];
|
||||
|
||||
for (const check of checks) {
|
||||
if (check.baseline > 0) {
|
||||
const deviation = (check.value - check.baseline) / check.baseline;
|
||||
|
||||
if (Math.abs(deviation) > check.threshold) {
|
||||
anomalies.push({
|
||||
metric: check.metric,
|
||||
expectedValue: check.baseline,
|
||||
actualValue: check.value,
|
||||
deviationPercent: Math.round(deviation * 100),
|
||||
severity: Math.abs(deviation) > check.threshold * 2 ? 'critical' : 'warning',
|
||||
suggestedCause: this.suggestAnomalyCause(check.metric, deviation),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check dimension scores for significant drops
|
||||
Object.entries(dimensions).forEach(([name, dimension]) => {
|
||||
if (dimension.score < 50) {
|
||||
anomalies.push({
|
||||
metric: `${name}_score`,
|
||||
expectedValue: 75,
|
||||
actualValue: dimension.score,
|
||||
deviationPercent: Math.round(((75 - dimension.score) / 75) * 100),
|
||||
severity: dimension.score < 40 ? 'critical' : 'warning',
|
||||
suggestedCause: `${name} metrics significantly below target`,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return anomalies;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate health forecasts
|
||||
*/
|
||||
private generateForecasts(
|
||||
input: HealthMetricsInput,
|
||||
dimensions: ProductHealthDimensions
|
||||
): { next7Days: HealthForecast; next30Days: HealthForecast } {
|
||||
const currentScore = this.calculateOverallScore(dimensions);
|
||||
|
||||
// Simple trend-based forecasting (in production, use Prophet/ARIMA)
|
||||
const trends = Object.values(dimensions).map((d) =>
|
||||
d.trend === 'improving' ? 1 : d.trend === 'declining' ? -1 : 0
|
||||
);
|
||||
const avgTrend = trends.reduce((a, b) => a + b, 0) / trends.length;
|
||||
|
||||
// 7-day forecast
|
||||
const trend7Days = avgTrend * 2; // Small movement
|
||||
const forecast7Days = Math.max(0, Math.min(100, currentScore + trend7Days));
|
||||
|
||||
// 30-day forecast
|
||||
const trend30Days = avgTrend * 8; // Larger movement
|
||||
const forecast30Days = Math.max(0, Math.min(100, currentScore + trend30Days));
|
||||
|
||||
return {
|
||||
next7Days: {
|
||||
expectedHealthScore: Math.round(forecast7Days),
|
||||
confidenceInterval: [
|
||||
Math.round(forecast7Days * 0.9),
|
||||
Math.round(Math.min(100, forecast7Days * 1.1)),
|
||||
],
|
||||
},
|
||||
next30Days: {
|
||||
expectedHealthScore: Math.round(forecast30Days),
|
||||
confidenceInterval: [
|
||||
Math.round(forecast30Days * 0.8),
|
||||
Math.round(Math.min(100, forecast30Days * 1.15)),
|
||||
],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate change vs baseline
|
||||
*/
|
||||
private calculateBaselineChange(input: HealthMetricsInput, days: number): number {
|
||||
const currentHealth = this.estimateCurrentHealth(input);
|
||||
const baselineHealth = 75; // Simplified baseline
|
||||
|
||||
return Math.round(((currentHealth - baselineHealth) / baselineHealth) * 100);
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate current health from metrics
|
||||
*/
|
||||
private estimateCurrentHealth(input: HealthMetricsInput): number {
|
||||
const scores = [
|
||||
input.activationRateDay1 * 100,
|
||||
input.day7Retention * 100,
|
||||
this.normalizeInverse(input.churnRate * 100, 20) * 100,
|
||||
input.crashFreeRate * 100,
|
||||
];
|
||||
return scores.reduce((a, b) => a + b, 0) / scores.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Suggest potential cause for anomaly
|
||||
*/
|
||||
private suggestAnomalyCause(metric: string, deviation: number): string {
|
||||
const direction = deviation > 0 ? 'increase' : 'decrease';
|
||||
|
||||
const causes: Record<string, string> = {
|
||||
dau: `Recent ${direction} may indicate marketing campaign impact or seasonal effect`,
|
||||
newUsers: `${direction} in user acquisition - check marketing channels`,
|
||||
activationRateDay1: `Onboarding changes may be affecting first-day engagement`,
|
||||
day7Retention: `Early retention ${direction} - review recent product changes`,
|
||||
errorRate: `Error rate spike - investigate recent deployments`,
|
||||
};
|
||||
|
||||
return causes[metric] || `Unusual ${direction} detected in ${metric}`;
|
||||
}
|
||||
|
||||
// Normalization helpers
|
||||
private zScoreNormalized(value: number, mean: number, stdDev: number): number {
|
||||
if (stdDev === 0) return 50;
|
||||
const zScore = (value - mean) / stdDev;
|
||||
// Convert to 0-100 scale (Z-score -3 to 3 maps to 0-100)
|
||||
return Math.max(0, Math.min(100, 50 + zScore * 16.67));
|
||||
}
|
||||
|
||||
private normalizeLinear(value: number, max: number): number {
|
||||
return Math.min(1, Math.max(0, value / max));
|
||||
}
|
||||
|
||||
private normalizeInverse(value: number, max: number): number {
|
||||
return 1 - Math.min(1, Math.max(0, value / max));
|
||||
}
|
||||
|
||||
private determineTrend(current: number, baseline: number): 'improving' | 'stable' | 'declining' {
|
||||
if (baseline === 0) return 'stable';
|
||||
const change = (current - baseline) / baseline;
|
||||
if (change > 0.1) return 'improving';
|
||||
if (change < -0.1) return 'declining';
|
||||
return 'stable';
|
||||
}
|
||||
}
|
||||
|
||||
export const healthScoringEngine = new HealthScoringEngine();
|
||||
Loading…
Reference in New Issue
Block a user