feat(ai-diagnostics): add proactive alerts API and complete [4.3]
This commit is contained in:
parent
52cc1f01dd
commit
eab854375e
@ -81,6 +81,7 @@ const CONTAINER_DEFS: Record<string, ContainerConfig> = {
|
||||
debug_traces: { partitionKeyPath: '/pk', defaultTtl: 7 * 86400 },
|
||||
debug_logs: { partitionKeyPath: '/pk', defaultTtl: 3 * 86400 },
|
||||
debug_screenshots: { partitionKeyPath: '/sessionId', defaultTtl: 7 * 86400 },
|
||||
diagnostic_triggers: { partitionKeyPath: '/id' },
|
||||
// Predictive Analytics
|
||||
user_features: { partitionKeyPath: '/userId', defaultTtl: 90 * 86400 },
|
||||
product_health: { partitionKeyPath: '/productId' },
|
||||
|
||||
@ -459,17 +459,20 @@ export function checkEarlyStopping(
|
||||
}
|
||||
|
||||
// No winner clear: control has > 95% probability of beating all variants
|
||||
const controlProbBeatsAll = probabilityVariantBeatsAll(
|
||||
controlVariant,
|
||||
variants,
|
||||
experiment.primaryMetric.type
|
||||
);
|
||||
if (controlProbBeatsAll >= threshold) {
|
||||
return {
|
||||
shouldStop: true,
|
||||
reason: 'No winner: control outperforms all variants with > 95% confidence',
|
||||
confidence: controlProbBeatsAll,
|
||||
};
|
||||
// (Skip this check if max duration is reached - duration takes precedence)
|
||||
if (daysRunning < experiment.guardrails.maxDurationDays) {
|
||||
const controlProbBeatsAll = probabilityVariantBeatsAll(
|
||||
controlVariant,
|
||||
variants,
|
||||
experiment.primaryMetric.type
|
||||
);
|
||||
if (controlProbBeatsAll >= threshold) {
|
||||
return {
|
||||
shouldStop: true,
|
||||
reason: 'No winner: control outperforms all variants with > 95% confidence',
|
||||
confidence: controlProbBeatsAll,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Time bound: max duration reached
|
||||
@ -632,6 +635,7 @@ function simulateBinomial(n: number, p: number): number {
|
||||
|
||||
/**
|
||||
* Power analysis: calculate required sample size for given effect size.
|
||||
* Uses standard two-proportion z-test formula.
|
||||
*/
|
||||
export function calculateSampleSize(
|
||||
baselineRate: number,
|
||||
@ -639,15 +643,29 @@ export function calculateSampleSize(
|
||||
alpha = 0.05,
|
||||
power = 0.8
|
||||
): number {
|
||||
// Handle edge cases
|
||||
if (baselineRate <= 0 || baselineRate >= 1) return 100;
|
||||
if (minDetectableEffect <= 0) return 100;
|
||||
|
||||
const zAlpha = 1.96; // ~95% confidence
|
||||
const zBeta = 0.84; // ~80% power
|
||||
|
||||
const p1 = baselineRate;
|
||||
const p2 = baselineRate * (1 + minDetectableEffect);
|
||||
const p2 = Math.min(baselineRate * (1 + minDetectableEffect), 0.99);
|
||||
|
||||
// If effect is too small, return large but bounded sample size
|
||||
const delta = Math.abs(p2 - p1);
|
||||
if (delta < 0.001) return 100000;
|
||||
|
||||
const pAvg = (p1 + p2) / 2;
|
||||
|
||||
const numerator = 2 * zAlpha * Math.sqrt(2 * pAvg * (1 - pAvg)) + zBeta * Math.sqrt(p1 * (1 - p1) + p2 * (1 - p2));
|
||||
const denominator = p2 - p1;
|
||||
// Standard two-proportion sample size formula
|
||||
const term1 = zAlpha * Math.sqrt(2 * pAvg * (1 - pAvg));
|
||||
const term2 = zBeta * Math.sqrt(p1 * (1 - p1) + p2 * (1 - p2));
|
||||
const numerator = term1 + term2;
|
||||
|
||||
return Math.ceil(Math.pow(numerator / denominator, 2));
|
||||
const n = Math.ceil(Math.pow(numerator / delta, 2));
|
||||
|
||||
// Return bounded value
|
||||
return Math.max(100, Math.min(n, 100000));
|
||||
}
|
||||
|
||||
@ -367,14 +367,43 @@ export async function createProactiveAlert(alert: ProactiveAlert): Promise<Proac
|
||||
return resource as ProactiveAlert;
|
||||
}
|
||||
|
||||
export async function getActiveAlerts(productId: string): Promise<ProactiveAlert[]> {
|
||||
export async function getActiveAlerts(
|
||||
productId: string | undefined,
|
||||
options?: {
|
||||
acknowledged?: boolean;
|
||||
severity?: 'critical' | 'high' | 'medium' | 'low';
|
||||
limit?: number;
|
||||
}
|
||||
): Promise<ProactiveAlert[]> {
|
||||
const container = getProactiveAlertsContainer();
|
||||
|
||||
let whereClause = 'NOT IS_DEFINED(c.resolvedAt)';
|
||||
|
||||
if (productId) {
|
||||
whereClause += ' AND c.productId = @productId';
|
||||
}
|
||||
|
||||
if (options?.acknowledged === false) {
|
||||
whereClause += ' AND NOT IS_DEFINED(c.acknowledgedAt)';
|
||||
} else if (options?.acknowledged === true) {
|
||||
whereClause += ' AND IS_DEFINED(c.acknowledgedAt)';
|
||||
}
|
||||
|
||||
if (options?.severity) {
|
||||
whereClause += " AND c.severity = @severity";
|
||||
}
|
||||
|
||||
const parameters: Array<{ name: string; value: string | number | boolean }> = [];
|
||||
if (productId) {
|
||||
parameters.push({ name: '@productId', value: productId });
|
||||
}
|
||||
if (options?.severity) {
|
||||
parameters.push({ name: '@severity', value: options.severity });
|
||||
}
|
||||
|
||||
const query = `
|
||||
SELECT * FROM c
|
||||
WHERE c.productId = @productId
|
||||
AND NOT IS_DEFINED(c.resolvedAt)
|
||||
AND NOT IS_DEFINED(c.acknowledgedAt)
|
||||
WHERE ${whereClause}
|
||||
ORDER BY
|
||||
CASE c.severity
|
||||
WHEN 'critical' THEN 1
|
||||
@ -384,12 +413,15 @@ export async function getActiveAlerts(productId: string): Promise<ProactiveAlert
|
||||
ELSE 5
|
||||
END,
|
||||
c.createdAt DESC
|
||||
OFFSET 0 LIMIT @limit
|
||||
`;
|
||||
|
||||
parameters.push({ name: '@limit', value: options?.limit ?? 50 });
|
||||
|
||||
const { resources } = await container.items
|
||||
.query({
|
||||
query,
|
||||
parameters: [{ name: '@productId', value: productId }],
|
||||
parameters,
|
||||
})
|
||||
.fetchAll();
|
||||
|
||||
@ -398,19 +430,58 @@ export async function getActiveAlerts(productId: string): Promise<ProactiveAlert
|
||||
|
||||
export async function acknowledgeAlert(
|
||||
alertId: string,
|
||||
productId: string,
|
||||
userId: string
|
||||
options: {
|
||||
acknowledgedBy: string;
|
||||
note?: string;
|
||||
}
|
||||
): Promise<void> {
|
||||
const container = getProactiveAlertsContainer();
|
||||
|
||||
try {
|
||||
const { resource } = await container.item(alertId, productId).read();
|
||||
const alert = resource as ProactiveAlert;
|
||||
// Query to find the alert by id (cross-partition query)
|
||||
const { resources } = await container.items
|
||||
.query({
|
||||
query: 'SELECT * FROM c WHERE c.id = @id',
|
||||
parameters: [{ name: '@id', value: alertId }],
|
||||
})
|
||||
.fetchAll();
|
||||
|
||||
if (resources.length === 0) return;
|
||||
|
||||
const alert = resources[0] as ProactiveAlert;
|
||||
const partitionKey = alert.productId;
|
||||
|
||||
await container.items.upsert({
|
||||
...alert,
|
||||
acknowledgedAt: new Date().toISOString(),
|
||||
acknowledgedBy: userId,
|
||||
acknowledgedBy: options.acknowledgedBy,
|
||||
acknowledgementNote: options.note,
|
||||
});
|
||||
} catch {
|
||||
// Alert not found
|
||||
}
|
||||
}
|
||||
|
||||
export async function resolveAlert(alertId: string): Promise<void> {
|
||||
const container = getProactiveAlertsContainer();
|
||||
|
||||
try {
|
||||
// Query to find the alert by id (cross-partition query)
|
||||
const { resources } = await container.items
|
||||
.query({
|
||||
query: 'SELECT * FROM c WHERE c.id = @id',
|
||||
parameters: [{ name: '@id', value: alertId }],
|
||||
})
|
||||
.fetchAll();
|
||||
|
||||
if (resources.length === 0) return;
|
||||
|
||||
const alert = resources[0] as ProactiveAlert;
|
||||
const partitionKey = alert.productId;
|
||||
|
||||
await container.items.upsert({
|
||||
...alert,
|
||||
resolvedAt: new Date().toISOString(),
|
||||
});
|
||||
} catch {
|
||||
// Alert not found
|
||||
|
||||
@ -541,4 +541,130 @@ export default async function aiDiagnosticsRoutes(fastify: FastifyInstance): Pro
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
// ==========================================================================
|
||||
// GET /ai-diagnostics/alerts - Get proactive alerts
|
||||
// ==========================================================================
|
||||
fastify.get('/alerts', {
|
||||
schema: {
|
||||
querystring: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
productId: { type: 'string' },
|
||||
acknowledged: { type: 'boolean' },
|
||||
severity: { type: 'string', enum: ['critical', 'high', 'medium', 'low'] },
|
||||
limit: { type: 'number', default: 50 },
|
||||
},
|
||||
},
|
||||
},
|
||||
handler: async (
|
||||
request: FastifyRequest<{
|
||||
Querystring: {
|
||||
productId?: string;
|
||||
acknowledged?: boolean;
|
||||
severity?: 'critical' | 'high' | 'medium' | 'low';
|
||||
limit?: number;
|
||||
};
|
||||
}>,
|
||||
reply: FastifyReply
|
||||
) => {
|
||||
try {
|
||||
const { productId, acknowledged, severity, limit = 50 } = request.query;
|
||||
|
||||
const alerts = await repository.getActiveAlerts(productId, {
|
||||
acknowledged,
|
||||
severity,
|
||||
limit,
|
||||
});
|
||||
|
||||
return reply.send({ alerts, total: alerts.length });
|
||||
} catch (error) {
|
||||
request.log.error({ error }, 'Failed to fetch alerts');
|
||||
return reply.status(500).send({
|
||||
error: 'Failed to fetch alerts',
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
// ==========================================================================
|
||||
// POST /ai-diagnostics/alerts/:id/acknowledge - Acknowledge alert
|
||||
// ==========================================================================
|
||||
fastify.post('/alerts/:id/acknowledge', {
|
||||
schema: {
|
||||
params: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
id: { type: 'string' },
|
||||
},
|
||||
required: ['id'],
|
||||
},
|
||||
body: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
note: { type: 'string' },
|
||||
},
|
||||
},
|
||||
},
|
||||
handler: async (
|
||||
request: FastifyRequest<{
|
||||
Params: { id: string };
|
||||
Body: { note?: string };
|
||||
}>,
|
||||
reply: FastifyReply
|
||||
) => {
|
||||
try {
|
||||
const { id } = request.params;
|
||||
const userId = request.jwtPayload?.sub || 'anonymous';
|
||||
const { note } = request.body;
|
||||
|
||||
await repository.acknowledgeAlert(id, {
|
||||
acknowledgedBy: userId,
|
||||
note,
|
||||
});
|
||||
|
||||
return reply.send({ success: true, message: 'Alert acknowledged' });
|
||||
} catch (error) {
|
||||
request.log.error({ error }, 'Failed to acknowledge alert');
|
||||
return reply.status(500).send({
|
||||
error: 'Failed to acknowledge alert',
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
// ==========================================================================
|
||||
// POST /ai-diagnostics/alerts/:id/resolve - Resolve alert
|
||||
// ==========================================================================
|
||||
fastify.post('/alerts/:id/resolve', {
|
||||
schema: {
|
||||
params: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
id: { type: 'string' },
|
||||
},
|
||||
required: ['id'],
|
||||
},
|
||||
},
|
||||
handler: async (
|
||||
request: FastifyRequest<{ Params: { id: string } }>,
|
||||
reply: FastifyReply
|
||||
) => {
|
||||
try {
|
||||
const { id } = request.params;
|
||||
|
||||
await repository.resolveAlert(id);
|
||||
|
||||
return reply.send({ success: true, message: 'Alert resolved' });
|
||||
} catch (error) {
|
||||
request.log.error({ error }, 'Failed to resolve alert');
|
||||
return reply.status(500).send({
|
||||
error: 'Failed to resolve alert',
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
@ -0,0 +1,109 @@
|
||||
/**
|
||||
* Auto-Trigger Routes — Remote Diagnostics Phase 4
|
||||
* Admin endpoints for configuring automated debug session triggers.
|
||||
*/
|
||||
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import { requireRole } from '../../lib/auth.js';
|
||||
import {
|
||||
createTriggerConfig,
|
||||
getTriggerConfig,
|
||||
listTriggerConfigs,
|
||||
updateTriggerConfig,
|
||||
deleteTriggerConfig,
|
||||
runAllTriggers,
|
||||
type TriggerConfig,
|
||||
CreateTriggerConfigSchema,
|
||||
} from './auto-triggers.js';
|
||||
|
||||
export async function autoTriggerRoutes(app: FastifyInstance): Promise<void> {
|
||||
// List all triggers for a product (admin only)
|
||||
app.get('/diagnostics/triggers', async (req, reply) => {
|
||||
await requireRole(req, 'admin');
|
||||
|
||||
const { productId } = req.query as { productId?: string };
|
||||
if (!productId) {
|
||||
return reply.status(400).send({ error: 'productId required' });
|
||||
}
|
||||
|
||||
const triggers = await listTriggerConfigs(productId);
|
||||
return { triggers };
|
||||
});
|
||||
|
||||
// Create new trigger (admin only)
|
||||
app.post('/diagnostics/triggers', async (req, reply) => {
|
||||
await requireRole(req, 'admin');
|
||||
|
||||
const result = CreateTriggerConfigSchema.safeParse(req.body);
|
||||
if (!result.success) {
|
||||
return reply.status(400).send({
|
||||
error: 'Invalid trigger config',
|
||||
details: result.error.issues,
|
||||
});
|
||||
}
|
||||
|
||||
const trigger = await createTriggerConfig(result.data);
|
||||
app.log.info(`Auto-trigger created: ${trigger.id} for product ${trigger.productId}`);
|
||||
|
||||
return reply.status(201).send({ trigger });
|
||||
});
|
||||
|
||||
// Get trigger details (admin only)
|
||||
app.get('/diagnostics/triggers/:id', async (req, reply) => {
|
||||
await requireRole(req, 'admin');
|
||||
|
||||
const { id } = req.params as { id: string };
|
||||
const trigger = await getTriggerConfig(id);
|
||||
|
||||
if (!trigger) {
|
||||
return reply.status(404).send({ error: 'Trigger not found' });
|
||||
}
|
||||
|
||||
return { trigger };
|
||||
});
|
||||
|
||||
// Update trigger (admin only)
|
||||
app.patch('/diagnostics/triggers/:id', async (req, reply) => {
|
||||
await requireRole(req, 'admin');
|
||||
|
||||
const { id } = req.params as { id: string };
|
||||
|
||||
const updated = await updateTriggerConfig(id, req.body as Partial<TriggerConfig>);
|
||||
if (!updated) {
|
||||
return reply.status(404).send({ error: 'Trigger not found' });
|
||||
}
|
||||
|
||||
app.log.info(`Auto-trigger updated: ${id}`);
|
||||
return { trigger: updated };
|
||||
});
|
||||
|
||||
// Delete trigger (admin only)
|
||||
app.delete('/diagnostics/triggers/:id', async (req, reply) => {
|
||||
await requireRole(req, 'admin');
|
||||
|
||||
const { id } = req.params as { id: string };
|
||||
|
||||
const deleted = await deleteTriggerConfig(id);
|
||||
if (!deleted) {
|
||||
return reply.status(404).send({ error: 'Trigger not found' });
|
||||
}
|
||||
|
||||
app.log.info(`Auto-trigger deleted: ${id}`);
|
||||
return reply.status(204).send();
|
||||
});
|
||||
|
||||
// Manually run all triggers for a product (admin only)
|
||||
app.post('/diagnostics/triggers/run', async (req, reply) => {
|
||||
await requireRole(req, 'admin');
|
||||
|
||||
const { productId } = req.body as { productId: string };
|
||||
const userId = req.jwtPayload?.sub ?? 'system';
|
||||
|
||||
if (!productId) {
|
||||
return reply.status(400).send({ error: 'productId required' });
|
||||
}
|
||||
|
||||
const results = await runAllTriggers(productId, userId);
|
||||
return { results };
|
||||
});
|
||||
}
|
||||
@ -50,10 +50,10 @@ export const TriggerConfigSchema = z.object({
|
||||
|
||||
// Notifications
|
||||
notifications: z.object({
|
||||
slackWebhook?: z.string().optional(),
|
||||
teamsWebhook?: z.string().optional(),
|
||||
slackWebhook: z.string().optional(),
|
||||
teamsWebhook: z.string().optional(),
|
||||
emailAdmins: z.boolean().default(true),
|
||||
pagerDutyKey?: z.string().optional(),
|
||||
pagerDutyKey: z.string().optional(),
|
||||
}),
|
||||
|
||||
// Cooldown to prevent spam
|
||||
@ -324,12 +324,13 @@ async function createAutoSession(
|
||||
trigger: TriggerConfig,
|
||||
adminUserId: string
|
||||
): Promise<DebugSessionDoc> {
|
||||
const session = await createSession({
|
||||
const now = new Date().toISOString();
|
||||
const expiresAt = new Date(Date.now() + trigger.sessionConfig.maxDurationMinutes * 60 * 1000).toISOString();
|
||||
const id = `ds_${crypto.randomUUID().replace(/-/g, '')}`;
|
||||
|
||||
const session: DebugSessionDoc = {
|
||||
id,
|
||||
productId: trigger.productId,
|
||||
targetUserId: undefined, // Auto-sessions target all users
|
||||
targetAnonymousId: undefined,
|
||||
targetDeviceId: undefined,
|
||||
targetSessionId: undefined,
|
||||
status: 'active',
|
||||
collectionLevel: trigger.sessionConfig.collectionLevel,
|
||||
captureLogs: trigger.sessionConfig.captureLogs,
|
||||
@ -337,12 +338,17 @@ async function createAutoSession(
|
||||
captureScreenshots: trigger.sessionConfig.captureScreenshots,
|
||||
screenshotOnError: trigger.sessionConfig.screenshotOnError,
|
||||
maxDurationMinutes: trigger.sessionConfig.maxDurationMinutes,
|
||||
createdBy: adminUserId, // System/admin who created the trigger
|
||||
autoTriggered: true,
|
||||
triggerId: trigger.id,
|
||||
triggerName: trigger.name,
|
||||
});
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
startedAt: now,
|
||||
expiresAt,
|
||||
logCount: 0,
|
||||
traceCount: 0,
|
||||
screenshotCount: 0,
|
||||
createdBy: adminUserId,
|
||||
};
|
||||
|
||||
await createSession(session);
|
||||
return session;
|
||||
}
|
||||
|
||||
|
||||
@ -48,6 +48,7 @@ import { themeRoutes } from './modules/themes/routes.js';
|
||||
import { waitlistRoutes } from './modules/waitlist/routes.js';
|
||||
import { telemetryRoutes } from './modules/telemetry/routes.js';
|
||||
import { diagnosticsRoutes } from './modules/diagnostics/routes.js';
|
||||
import { autoTriggerRoutes } from './modules/diagnostics/auto-trigger-routes.js';
|
||||
import { broadcastRoutes } from './modules/broadcasts/routes.js';
|
||||
import { surveyRoutes } from './modules/surveys/routes.js';
|
||||
import { jobRoutes } from './modules/jobs/routes.js';
|
||||
@ -150,6 +151,8 @@ await app.register(waitlistRoutes, { prefix: '/api' });
|
||||
await app.register(telemetryRoutes, { prefix: '/api' });
|
||||
// Diagnostics module (remote debug sessions — see docs/devops/REMOTE_DIAGNOSTICS_ROADMAP.md)
|
||||
await app.register(diagnosticsRoutes, { prefix: '/api' });
|
||||
// Auto-trigger routes for automated debug sessions (Phase 4)
|
||||
await app.register(autoTriggerRoutes, { prefix: '/api' });
|
||||
// Public routes — no auth, registered at top level
|
||||
await app.register(publicRoutes, { prefix: '/api' });
|
||||
// Scheduled jobs module (admin: list, trigger, view runs)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user