feat(telemetry): add telemetry module — ingest, config, query, clusters, policies (34 tests)
This commit is contained in:
parent
083cf029c1
commit
ce4c4ff53d
@ -34,6 +34,10 @@ const CONTAINER_DEFS: Record<string, ContainerConfig> = {
|
|||||||
daily_briefs: { partitionKeyPath: '/userId' },
|
daily_briefs: { partitionKeyPath: '/userId' },
|
||||||
reflections: { partitionKeyPath: '/userId' },
|
reflections: { partitionKeyPath: '/userId' },
|
||||||
brain_insights: { partitionKeyPath: '/userId' },
|
brain_insights: { partitionKeyPath: '/userId' },
|
||||||
|
// Telemetry (client diagnostics — see docs/WINDSURF/CLIENT_TELEMETRY_DESIGN.md)
|
||||||
|
telemetry_events: { partitionKeyPath: '/pk', defaultTtl: 30 * 86400 },
|
||||||
|
telemetry_error_clusters: { partitionKeyPath: '/pk', defaultTtl: 90 * 86400 },
|
||||||
|
telemetry_collection_policies: { partitionKeyPath: '/productId' },
|
||||||
};
|
};
|
||||||
|
|
||||||
export async function initCosmosIfNeeded(): Promise<void> {
|
export async function initCosmosIfNeeded(): Promise<void> {
|
||||||
|
|||||||
254
services/platform-service/src/modules/telemetry/repository.ts
Normal file
254
services/platform-service/src/modules/telemetry/repository.ts
Normal file
@ -0,0 +1,254 @@
|
|||||||
|
/**
|
||||||
|
* Telemetry repository — Cosmos DB CRUD for events, policies, and clusters.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { getContainer } from '../../lib/cosmos.js';
|
||||||
|
import type {
|
||||||
|
TelemetryEventDoc,
|
||||||
|
TelemetryCollectionPolicyDoc,
|
||||||
|
TelemetryErrorCluster,
|
||||||
|
TelemetryQueryInput,
|
||||||
|
} from './types.js';
|
||||||
|
|
||||||
|
// ─── Container accessors ────────────────────────────────────────────
|
||||||
|
|
||||||
|
function eventsContainer() {
|
||||||
|
return getContainer('telemetry_events');
|
||||||
|
}
|
||||||
|
|
||||||
|
function policiesContainer() {
|
||||||
|
return getContainer('telemetry_collection_policies');
|
||||||
|
}
|
||||||
|
|
||||||
|
function clustersContainer() {
|
||||||
|
return getContainer('telemetry_error_clusters');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Events ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function upsertEvent(doc: TelemetryEventDoc): Promise<void> {
|
||||||
|
await eventsContainer().items.upsert(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function upsertEventsBatch(docs: TelemetryEventDoc[]): Promise<void> {
|
||||||
|
// Cosmos DB doesn't have native batch across partitions.
|
||||||
|
// We upsert individually; for v1 this is acceptable.
|
||||||
|
// TODO: Group by pk and use bulk operations for same-partition batches.
|
||||||
|
const promises = docs.map(doc => eventsContainer().items.upsert(doc));
|
||||||
|
await Promise.all(promises);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function queryEvents(
|
||||||
|
productId: string,
|
||||||
|
input: TelemetryQueryInput
|
||||||
|
): Promise<{ events: TelemetryEventDoc[]; continuationToken?: string }> {
|
||||||
|
const conditions: string[] = ['c.productId = @productId'];
|
||||||
|
const parameters: Array<{ name: string; value: string | number | boolean }> = [
|
||||||
|
{ name: '@productId', value: productId },
|
||||||
|
];
|
||||||
|
|
||||||
|
if (input.userId) {
|
||||||
|
conditions.push('c.userId = @userId');
|
||||||
|
parameters.push({ name: '@userId', value: input.userId });
|
||||||
|
}
|
||||||
|
if (input.anonymousInstallId) {
|
||||||
|
conditions.push('c.anonymousInstallId = @anonymousInstallId');
|
||||||
|
parameters.push({ name: '@anonymousInstallId', value: input.anonymousInstallId });
|
||||||
|
}
|
||||||
|
if (input.platform) {
|
||||||
|
conditions.push('c.platform = @platform');
|
||||||
|
parameters.push({ name: '@platform', value: input.platform });
|
||||||
|
}
|
||||||
|
if (input.channel) {
|
||||||
|
conditions.push('c.channel = @channel');
|
||||||
|
parameters.push({ name: '@channel', value: input.channel });
|
||||||
|
}
|
||||||
|
if (input.osFamily) {
|
||||||
|
conditions.push('c.osFamily = @osFamily');
|
||||||
|
parameters.push({ name: '@osFamily', value: input.osFamily });
|
||||||
|
}
|
||||||
|
if (input.appVersion) {
|
||||||
|
conditions.push('c.appVersion = @appVersion');
|
||||||
|
parameters.push({ name: '@appVersion', value: input.appVersion });
|
||||||
|
}
|
||||||
|
if (input.buildNumber) {
|
||||||
|
conditions.push('c.buildNumber = @buildNumber');
|
||||||
|
parameters.push({ name: '@buildNumber', value: input.buildNumber });
|
||||||
|
}
|
||||||
|
if (input.module) {
|
||||||
|
conditions.push('c.module = @module');
|
||||||
|
parameters.push({ name: '@module', value: input.module });
|
||||||
|
}
|
||||||
|
if (input.eventName) {
|
||||||
|
conditions.push('c.eventName = @eventName');
|
||||||
|
parameters.push({ name: '@eventName', value: input.eventName });
|
||||||
|
}
|
||||||
|
if (input.eventType) {
|
||||||
|
conditions.push('c.eventType = @eventType');
|
||||||
|
parameters.push({ name: '@eventType', value: input.eventType });
|
||||||
|
}
|
||||||
|
if (input.from) {
|
||||||
|
conditions.push('c.occurredAt >= @from');
|
||||||
|
parameters.push({ name: '@from', value: input.from });
|
||||||
|
}
|
||||||
|
if (input.to) {
|
||||||
|
conditions.push('c.occurredAt <= @to');
|
||||||
|
parameters.push({ name: '@to', value: input.to });
|
||||||
|
}
|
||||||
|
|
||||||
|
const query = `SELECT * FROM c WHERE ${conditions.join(' AND ')} ORDER BY c.occurredAt DESC`;
|
||||||
|
|
||||||
|
const iterator = eventsContainer().items.query<TelemetryEventDoc>(
|
||||||
|
{ query, parameters },
|
||||||
|
{
|
||||||
|
maxItemCount: input.limit,
|
||||||
|
continuationToken: input.continuationToken || undefined,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
const { resources, continuationToken } = await iterator.fetchNext();
|
||||||
|
return {
|
||||||
|
events: resources,
|
||||||
|
continuationToken: continuationToken || undefined,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function deleteEventsByUserId(productId: string, userId: string): Promise<number> {
|
||||||
|
// Find all events for this user, then delete them
|
||||||
|
const { resources } = await eventsContainer()
|
||||||
|
.items.query<{ id: string; pk: string }>({
|
||||||
|
query: 'SELECT c.id, c.pk FROM c WHERE c.productId = @productId AND c.userId = @userId',
|
||||||
|
parameters: [
|
||||||
|
{ name: '@productId', value: productId },
|
||||||
|
{ name: '@userId', value: userId },
|
||||||
|
],
|
||||||
|
})
|
||||||
|
.fetchAll();
|
||||||
|
|
||||||
|
let deleted = 0;
|
||||||
|
for (const doc of resources) {
|
||||||
|
try {
|
||||||
|
await eventsContainer().item(doc.id, doc.pk).delete();
|
||||||
|
deleted++;
|
||||||
|
} catch {
|
||||||
|
// Skip docs that fail to delete (already deleted, etc.)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return deleted;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Policies ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function listPolicies(productId: string): Promise<TelemetryCollectionPolicyDoc[]> {
|
||||||
|
const { resources } = await policiesContainer()
|
||||||
|
.items.query<TelemetryCollectionPolicyDoc>({
|
||||||
|
query: 'SELECT * FROM c WHERE c.productId = @productId ORDER BY c.priority DESC',
|
||||||
|
parameters: [{ name: '@productId', value: productId }],
|
||||||
|
})
|
||||||
|
.fetchAll();
|
||||||
|
return resources;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getPolicy(
|
||||||
|
id: string,
|
||||||
|
productId: string
|
||||||
|
): Promise<TelemetryCollectionPolicyDoc | null> {
|
||||||
|
try {
|
||||||
|
const { resource } = await policiesContainer()
|
||||||
|
.item(id, productId)
|
||||||
|
.read<TelemetryCollectionPolicyDoc>();
|
||||||
|
return resource ?? null;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function createPolicy(
|
||||||
|
doc: TelemetryCollectionPolicyDoc
|
||||||
|
): Promise<TelemetryCollectionPolicyDoc> {
|
||||||
|
const { resource } = await policiesContainer().items.create(doc);
|
||||||
|
return resource as TelemetryCollectionPolicyDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function updatePolicy(
|
||||||
|
id: string,
|
||||||
|
productId: string,
|
||||||
|
updates: Partial<TelemetryCollectionPolicyDoc>
|
||||||
|
): Promise<TelemetryCollectionPolicyDoc | null> {
|
||||||
|
try {
|
||||||
|
const { resource: existing } = await policiesContainer()
|
||||||
|
.item(id, productId)
|
||||||
|
.read<TelemetryCollectionPolicyDoc>();
|
||||||
|
if (!existing) return null;
|
||||||
|
const merged = { ...existing, ...updates, updatedAt: new Date().toISOString() };
|
||||||
|
const { resource } = await policiesContainer().item(id, productId).replace(merged);
|
||||||
|
return resource as TelemetryCollectionPolicyDoc;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function deletePolicy(id: string, productId: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
await policiesContainer().item(id, productId).delete();
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Clusters ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function listClusters(
|
||||||
|
productId: string,
|
||||||
|
filters?: {
|
||||||
|
platform?: string;
|
||||||
|
module?: string;
|
||||||
|
from?: string;
|
||||||
|
to?: string;
|
||||||
|
limit?: number;
|
||||||
|
}
|
||||||
|
): Promise<TelemetryErrorCluster[]> {
|
||||||
|
const conditions: string[] = ['c.productId = @productId'];
|
||||||
|
const parameters: Array<{ name: string; value: string | number | boolean }> = [
|
||||||
|
{ name: '@productId', value: productId },
|
||||||
|
];
|
||||||
|
|
||||||
|
if (filters?.platform) {
|
||||||
|
conditions.push('c.platform = @platform');
|
||||||
|
parameters.push({ name: '@platform', value: filters.platform });
|
||||||
|
}
|
||||||
|
if (filters?.module) {
|
||||||
|
conditions.push('c.module = @module');
|
||||||
|
parameters.push({ name: '@module', value: filters.module });
|
||||||
|
}
|
||||||
|
if (filters?.from) {
|
||||||
|
conditions.push('c.lastSeenAt >= @from');
|
||||||
|
parameters.push({ name: '@from', value: filters.from });
|
||||||
|
}
|
||||||
|
if (filters?.to) {
|
||||||
|
conditions.push('c.lastSeenAt <= @to');
|
||||||
|
parameters.push({ name: '@to', value: filters.to });
|
||||||
|
}
|
||||||
|
|
||||||
|
const limit = filters?.limit ?? 50;
|
||||||
|
const query = `SELECT TOP ${limit} * FROM c WHERE ${conditions.join(' AND ')} ORDER BY c.totalCount DESC`;
|
||||||
|
|
||||||
|
const { resources } = await clustersContainer()
|
||||||
|
.items.query<TelemetryErrorCluster>({ query, parameters })
|
||||||
|
.fetchAll();
|
||||||
|
return resources;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function upsertCluster(cluster: TelemetryErrorCluster): Promise<void> {
|
||||||
|
await clustersContainer().items.upsert(cluster);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getCluster(id: string, pk: string): Promise<TelemetryErrorCluster | null> {
|
||||||
|
try {
|
||||||
|
const { resource } = await clustersContainer().item(id, pk).read<TelemetryErrorCluster>();
|
||||||
|
return resource ?? null;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
535
services/platform-service/src/modules/telemetry/routes.ts
Normal file
535
services/platform-service/src/modules/telemetry/routes.ts
Normal file
@ -0,0 +1,535 @@
|
|||||||
|
/**
|
||||||
|
* Telemetry REST endpoints.
|
||||||
|
*
|
||||||
|
* POST /telemetry/events — batch ingest (any auth)
|
||||||
|
* GET /telemetry/config — collection config for clients
|
||||||
|
* GET /telemetry/query — admin query
|
||||||
|
* GET /telemetry/clusters — admin error clusters
|
||||||
|
* GET /telemetry/policies — list policies (admin)
|
||||||
|
* POST /telemetry/policies — create policy (admin)
|
||||||
|
* PUT /telemetry/policies/:id — update policy (admin)
|
||||||
|
* DELETE /telemetry/policies/:id — delete policy (admin)
|
||||||
|
* DELETE /telemetry/user/:userId — GDPR erasure (admin)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { FastifyInstance } from 'fastify';
|
||||||
|
import { randomUUID } from 'node:crypto';
|
||||||
|
import { createHash } from 'node:crypto';
|
||||||
|
import { getRequestProductId } from '../../lib/request-context.js';
|
||||||
|
import { BadRequestError, NotFoundError, UnauthorizedError } from '../../lib/errors.js';
|
||||||
|
import { hashUserFlag } from '../flags/routes.js';
|
||||||
|
import * as repo from './repository.js';
|
||||||
|
import {
|
||||||
|
TelemetryIngestRequestSchema,
|
||||||
|
CreatePolicySchema,
|
||||||
|
UpdatePolicySchema,
|
||||||
|
TelemetryQuerySchema,
|
||||||
|
type TelemetryEventDoc,
|
||||||
|
type TelemetryCollectionPolicyDoc,
|
||||||
|
type TelemetryCollectionConfig,
|
||||||
|
type TelemetryErrorCluster,
|
||||||
|
} from './types.js';
|
||||||
|
|
||||||
|
// ─── Helpers ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const DEFAULT_EVENT_TTL_DAYS = parseInt(process.env.TELEMETRY_EVENT_TTL_DAYS ?? '30', 10);
|
||||||
|
const DEFAULT_CLUSTER_TTL_DAYS = parseInt(process.env.TELEMETRY_CLUSTER_TTL_DAYS ?? '90', 10);
|
||||||
|
const TELEMETRY_ENABLED = process.env.TELEMETRY_ENABLED !== 'false';
|
||||||
|
const PII_SCAN_ENABLED = process.env.TELEMETRY_PII_SCAN_ENABLED !== 'false';
|
||||||
|
const CLIENT_BATCH_SIZE = parseInt(process.env.TELEMETRY_CLIENT_BATCH_SIZE ?? '20', 10);
|
||||||
|
const CLIENT_FLUSH_MS = parseInt(process.env.TELEMETRY_CLIENT_FLUSH_MS ?? '60000', 10);
|
||||||
|
const CLIENT_MAX_QUEUE = parseInt(process.env.TELEMETRY_CLIENT_MAX_QUEUE ?? '200', 10);
|
||||||
|
|
||||||
|
/** PII patterns — reject events containing these. */
|
||||||
|
const PII_PATTERNS = [
|
||||||
|
/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z]{2,}\b/i, // email
|
||||||
|
/\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/, // US phone
|
||||||
|
/\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b/, // credit card
|
||||||
|
/\b\d{3}-\d{2}-\d{4}\b/, // SSN
|
||||||
|
];
|
||||||
|
|
||||||
|
function containsPII(text: string): boolean {
|
||||||
|
if (!PII_SCAN_ENABLED) return false;
|
||||||
|
return PII_PATTERNS.some(p => p.test(text));
|
||||||
|
}
|
||||||
|
|
||||||
|
function computePk(productId: string, occurredAt: string, platform: string): string {
|
||||||
|
const date = new Date(occurredAt);
|
||||||
|
const yyyyMM = `${date.getUTCFullYear()}${String(date.getUTCMonth() + 1).padStart(2, '0')}`;
|
||||||
|
return `${productId}:${yyyyMM}:${platform}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeMessage(msg: string): string {
|
||||||
|
return msg
|
||||||
|
.replace(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi, '<UUID>')
|
||||||
|
.replace(/\d+/g, '<N>')
|
||||||
|
.replace(/\/[\w/.]+/g, '<PATH>')
|
||||||
|
.toLowerCase()
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function generateFingerprint(event: TelemetryEventDoc): string {
|
||||||
|
const input = [
|
||||||
|
event.platform,
|
||||||
|
event.channel,
|
||||||
|
event.module,
|
||||||
|
event.eventName,
|
||||||
|
event.errorDomain ?? '',
|
||||||
|
event.errorCode ?? '',
|
||||||
|
normalizeMessage(event.message ?? ''),
|
||||||
|
].join(':');
|
||||||
|
return createHash('sha256').update(input).digest('hex').substring(0, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
function requireAdmin(req: { jwtPayload?: { role?: string } }): void {
|
||||||
|
if (req.jwtPayload?.role !== 'admin') {
|
||||||
|
throw new UnauthorizedError('Admin access required');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Policy evaluation helpers ──────────────────────────────────────
|
||||||
|
|
||||||
|
interface ClientContext {
|
||||||
|
userId?: string;
|
||||||
|
anonymousInstallId?: string;
|
||||||
|
platform?: string;
|
||||||
|
channel?: string;
|
||||||
|
osFamily?: string;
|
||||||
|
appVersion?: string;
|
||||||
|
buildNumber?: string;
|
||||||
|
releaseChannel?: string;
|
||||||
|
countryCode?: string;
|
||||||
|
regionCode?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function policyMatchesContext(policy: TelemetryCollectionPolicyDoc, ctx: ClientContext): boolean {
|
||||||
|
const t = policy.targeting;
|
||||||
|
|
||||||
|
// Check time bounds
|
||||||
|
const now = new Date().toISOString();
|
||||||
|
if (policy.startsAt && now < policy.startsAt) return false;
|
||||||
|
if (policy.expiresAt && now > policy.expiresAt) return false;
|
||||||
|
|
||||||
|
// User targeting
|
||||||
|
if (t.userIds?.length && ctx.userId && !t.userIds.includes(ctx.userId)) return false;
|
||||||
|
if (
|
||||||
|
t.anonymousInstallIds?.length &&
|
||||||
|
ctx.anonymousInstallId &&
|
||||||
|
!t.anonymousInstallIds.includes(ctx.anonymousInstallId)
|
||||||
|
)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Platform targeting
|
||||||
|
if (t.platforms?.length && ctx.platform && !t.platforms.includes(ctx.platform as never))
|
||||||
|
return false;
|
||||||
|
if (t.channels?.length && ctx.channel && !t.channels.includes(ctx.channel as never)) return false;
|
||||||
|
if (t.osFamilies?.length && ctx.osFamily && !t.osFamilies.includes(ctx.osFamily as never))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Version targeting
|
||||||
|
if (t.appVersions?.length && ctx.appVersion && !t.appVersions.includes(ctx.appVersion))
|
||||||
|
return false;
|
||||||
|
if (t.buildNumbers?.length && ctx.buildNumber && !t.buildNumbers.includes(ctx.buildNumber))
|
||||||
|
return false;
|
||||||
|
if (t.buildNumberRange && ctx.buildNumber) {
|
||||||
|
const bn = parseInt(ctx.buildNumber, 10);
|
||||||
|
if (!isNaN(bn)) {
|
||||||
|
if (t.buildNumberRange.min !== undefined && bn < t.buildNumberRange.min) return false;
|
||||||
|
if (t.buildNumberRange.max !== undefined && bn > t.buildNumberRange.max) return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Region targeting
|
||||||
|
if (t.countryCodes?.length && ctx.countryCode && !t.countryCodes.includes(ctx.countryCode))
|
||||||
|
return false;
|
||||||
|
if (t.regionCodes?.length && ctx.regionCode && !t.regionCodes.includes(ctx.regionCode))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Release channel targeting
|
||||||
|
if (
|
||||||
|
t.releaseChannels?.length &&
|
||||||
|
ctx.releaseChannel &&
|
||||||
|
!t.releaseChannels.includes(ctx.releaseChannel as never)
|
||||||
|
)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Percentage rollout (deterministic)
|
||||||
|
if (t.percentage !== undefined && t.percentage < 100) {
|
||||||
|
const identity = ctx.userId || ctx.anonymousInstallId || '';
|
||||||
|
if (!identity) return false;
|
||||||
|
const bucket = hashUserFlag(identity, `telemetry_policy_${policy.id}`);
|
||||||
|
if (bucket >= t.percentage) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function mergePolicies(policies: TelemetryCollectionPolicyDoc[]): TelemetryCollectionConfig {
|
||||||
|
if (policies.length === 0) {
|
||||||
|
// Hardcoded default
|
||||||
|
return {
|
||||||
|
enabled: TELEMETRY_ENABLED,
|
||||||
|
eventTypes: ['warn', 'error', 'fatal'],
|
||||||
|
modules: [],
|
||||||
|
samplingRates: { debug: 0, info: 0, warn: 0.5, error: 1, fatal: 1 },
|
||||||
|
batchSize: CLIENT_BATCH_SIZE,
|
||||||
|
flushIntervalMs: CLIENT_FLUSH_MS,
|
||||||
|
maxQueueSize: CLIENT_MAX_QUEUE,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Policies are already sorted by priority DESC
|
||||||
|
const eventTypesSet = new Set<string>();
|
||||||
|
const modulesSet = new Set<string>();
|
||||||
|
const rates: Record<string, number> = { debug: 0, info: 0, warn: 0, error: 0, fatal: 0 };
|
||||||
|
let enabled = TELEMETRY_ENABLED;
|
||||||
|
|
||||||
|
for (const p of policies) {
|
||||||
|
// Union event types
|
||||||
|
for (const et of p.eventTypes) {
|
||||||
|
eventTypesSet.add(et);
|
||||||
|
// Highest-priority policy's rate wins per event type
|
||||||
|
if (rates[et] === 0) {
|
||||||
|
rates[et] = p.samplingRate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Union modules
|
||||||
|
for (const m of p.modules) {
|
||||||
|
modulesSet.add(m);
|
||||||
|
}
|
||||||
|
// samplingRate 0 from highest-priority policy disables everything
|
||||||
|
if (p.priority >= 999 && p.samplingRate === 0) {
|
||||||
|
enabled = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
enabled,
|
||||||
|
eventTypes: Array.from(eventTypesSet),
|
||||||
|
modules: Array.from(modulesSet),
|
||||||
|
samplingRates: {
|
||||||
|
debug: rates.debug,
|
||||||
|
info: rates.info,
|
||||||
|
warn: rates.warn,
|
||||||
|
error: rates.error,
|
||||||
|
fatal: rates.fatal,
|
||||||
|
},
|
||||||
|
batchSize: CLIENT_BATCH_SIZE,
|
||||||
|
flushIntervalMs: CLIENT_FLUSH_MS,
|
||||||
|
maxQueueSize: CLIENT_MAX_QUEUE,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Cluster update ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async function updateClusterForEvent(event: TelemetryEventDoc): Promise<void> {
|
||||||
|
if (!['warn', 'error', 'fatal'].includes(event.eventType)) return;
|
||||||
|
|
||||||
|
const fingerprint = event.fingerprint || generateFingerprint(event);
|
||||||
|
const date = new Date(event.occurredAt);
|
||||||
|
const yyyyMM = `${date.getUTCFullYear()}${String(date.getUTCMonth() + 1).padStart(2, '0')}`;
|
||||||
|
const clusterId = `${fingerprint}:${yyyyMM}`;
|
||||||
|
const pk = `${event.productId}:${event.platform}:${event.module}`;
|
||||||
|
|
||||||
|
const existing = await repo.getCluster(clusterId, pk);
|
||||||
|
|
||||||
|
if (existing) {
|
||||||
|
existing.totalCount++;
|
||||||
|
existing.lastSeenAt = event.receivedAt;
|
||||||
|
existing.sampleMessage = event.message;
|
||||||
|
existing.sampleErrorDomain = event.errorDomain;
|
||||||
|
existing.sampleErrorCode = event.errorCode;
|
||||||
|
|
||||||
|
// Update affected users (dedup, cap 100)
|
||||||
|
const uid = event.userId || event.anonymousInstallId;
|
||||||
|
if (uid && event.userId && !existing.affectedUserIds.includes(uid)) {
|
||||||
|
if (existing.affectedUserIds.length < 100) existing.affectedUserIds.push(uid);
|
||||||
|
}
|
||||||
|
if (uid && event.anonymousInstallId && !existing.affectedInstallIds.includes(uid)) {
|
||||||
|
if (existing.affectedInstallIds.length < 100) existing.affectedInstallIds.push(uid);
|
||||||
|
}
|
||||||
|
if (!existing.affectedOsFamilies.includes(event.osFamily)) {
|
||||||
|
existing.affectedOsFamilies.push(event.osFamily);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update version breakdown
|
||||||
|
const vEntry = existing.affectedVersions.find(
|
||||||
|
v => v.appVersion === event.appVersion && v.buildNumber === event.buildNumber
|
||||||
|
);
|
||||||
|
if (vEntry) {
|
||||||
|
vEntry.count++;
|
||||||
|
vEntry.lastSeenAt = event.receivedAt;
|
||||||
|
} else if (existing.affectedVersions.length < 50) {
|
||||||
|
existing.affectedVersions.push({
|
||||||
|
appVersion: event.appVersion,
|
||||||
|
buildNumber: event.buildNumber,
|
||||||
|
count: 1,
|
||||||
|
lastSeenAt: event.receivedAt,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Escalate severity
|
||||||
|
const severityOrder = { warn: 0, error: 1, fatal: 2 };
|
||||||
|
const eventSev = event.eventType as 'warn' | 'error' | 'fatal';
|
||||||
|
if ((severityOrder[eventSev] ?? 0) > (severityOrder[existing.severity] ?? 0)) {
|
||||||
|
existing.severity = eventSev;
|
||||||
|
}
|
||||||
|
|
||||||
|
await repo.upsertCluster(existing);
|
||||||
|
} else {
|
||||||
|
const newCluster: TelemetryErrorCluster = {
|
||||||
|
id: clusterId,
|
||||||
|
pk,
|
||||||
|
productId: event.productId,
|
||||||
|
fingerprint,
|
||||||
|
platform: event.platform,
|
||||||
|
channel: event.channel,
|
||||||
|
module: event.module,
|
||||||
|
eventName: event.eventName,
|
||||||
|
affectedVersions: [
|
||||||
|
{
|
||||||
|
appVersion: event.appVersion,
|
||||||
|
buildNumber: event.buildNumber,
|
||||||
|
count: 1,
|
||||||
|
lastSeenAt: event.receivedAt,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
firstSeenAt: event.receivedAt,
|
||||||
|
lastSeenAt: event.receivedAt,
|
||||||
|
totalCount: 1,
|
||||||
|
affectedUserIds: event.userId ? [event.userId] : [],
|
||||||
|
affectedInstallIds: event.anonymousInstallId ? [event.anonymousInstallId] : [],
|
||||||
|
affectedOsFamilies: [event.osFamily],
|
||||||
|
sampleErrorDomain: event.errorDomain,
|
||||||
|
sampleErrorCode: event.errorCode,
|
||||||
|
sampleMessage: event.message,
|
||||||
|
severity: event.eventType as 'warn' | 'error' | 'fatal',
|
||||||
|
ttl: DEFAULT_CLUSTER_TTL_DAYS * 86400,
|
||||||
|
};
|
||||||
|
await repo.upsertCluster(newCluster);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Routes ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function telemetryRoutes(app: FastifyInstance) {
|
||||||
|
// ── Batch ingest ──────────────────────────────────────────────
|
||||||
|
app.post('/telemetry/events', async (req, reply) => {
|
||||||
|
if (!TELEMETRY_ENABLED) {
|
||||||
|
return { accepted: 0, rejected: 0, serverTime: new Date().toISOString() };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Auth: JWT or X-Install-Token
|
||||||
|
const installToken = req.headers['x-install-token'] as string | undefined;
|
||||||
|
if (!req.jwtPayload && !installToken) {
|
||||||
|
throw new UnauthorizedError('JWT or X-Install-Token required');
|
||||||
|
}
|
||||||
|
|
||||||
|
const parsed = TelemetryIngestRequestSchema.safeParse(req.body);
|
||||||
|
if (!parsed.success) {
|
||||||
|
throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
|
||||||
|
}
|
||||||
|
|
||||||
|
const { productId, events } = parsed.data;
|
||||||
|
const now = new Date().toISOString();
|
||||||
|
const ttl = DEFAULT_EVENT_TTL_DAYS * 86400;
|
||||||
|
|
||||||
|
let accepted = 0;
|
||||||
|
let rejected = 0;
|
||||||
|
const errors: Array<{ index: number; reason: string }> = [];
|
||||||
|
const docsToInsert: TelemetryEventDoc[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < events.length; i++) {
|
||||||
|
const event = events[i];
|
||||||
|
|
||||||
|
// Validate productId matches request-level
|
||||||
|
if (event.productId !== productId) {
|
||||||
|
errors.push({ index: i, reason: `productId mismatch: expected ${productId}` });
|
||||||
|
rejected++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// PII scan
|
||||||
|
const fieldsToScan = [event.message, event.eventName, event.errorDomain].filter(Boolean);
|
||||||
|
if (fieldsToScan.some(f => containsPII(f!))) {
|
||||||
|
errors.push({ index: i, reason: 'PII detected' });
|
||||||
|
rejected++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pk = computePk(productId, event.occurredAt, event.platform);
|
||||||
|
const doc: TelemetryEventDoc = {
|
||||||
|
...event,
|
||||||
|
pk,
|
||||||
|
receivedAt: now,
|
||||||
|
ttl,
|
||||||
|
};
|
||||||
|
|
||||||
|
docsToInsert.push(doc);
|
||||||
|
accepted++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch upsert (idempotent by event id)
|
||||||
|
if (docsToInsert.length > 0) {
|
||||||
|
await repo.upsertEventsBatch(docsToInsert);
|
||||||
|
|
||||||
|
// Fire-and-forget cluster updates for warn/error/fatal
|
||||||
|
for (const doc of docsToInsert) {
|
||||||
|
if (['warn', 'error', 'fatal'].includes(doc.eventType)) {
|
||||||
|
updateClusterForEvent(doc).catch(err => {
|
||||||
|
req.log.warn({ err, eventId: doc.id }, 'Cluster update failed');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
reply.code(accepted > 0 ? 200 : 400);
|
||||||
|
return {
|
||||||
|
accepted,
|
||||||
|
rejected,
|
||||||
|
errors: errors.length > 0 ? errors : undefined,
|
||||||
|
serverTime: now,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Collection config (client poll) ───────────────────────────
|
||||||
|
app.get('/telemetry/config', async req => {
|
||||||
|
if (!TELEMETRY_ENABLED) {
|
||||||
|
return {
|
||||||
|
enabled: false,
|
||||||
|
eventTypes: [],
|
||||||
|
modules: [],
|
||||||
|
samplingRates: { debug: 0, info: 0, warn: 0, error: 0, fatal: 0 },
|
||||||
|
batchSize: CLIENT_BATCH_SIZE,
|
||||||
|
flushIntervalMs: CLIENT_FLUSH_MS,
|
||||||
|
maxQueueSize: CLIENT_MAX_QUEUE,
|
||||||
|
} satisfies TelemetryCollectionConfig;
|
||||||
|
}
|
||||||
|
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
const ctx: ClientContext = req.query as ClientContext;
|
||||||
|
|
||||||
|
const allPolicies = await repo.listPolicies(productId);
|
||||||
|
const activePolicies = allPolicies.filter(p => p.enabled && policyMatchesContext(p, ctx));
|
||||||
|
|
||||||
|
return mergePolicies(activePolicies);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Admin: query events ───────────────────────────────────────
|
||||||
|
app.get('/telemetry/query', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
const parsed = TelemetryQuerySchema.safeParse(req.query);
|
||||||
|
if (!parsed.success) {
|
||||||
|
throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
|
||||||
|
}
|
||||||
|
const result = await repo.queryEvents(productId, parsed.data);
|
||||||
|
return {
|
||||||
|
events: result.events,
|
||||||
|
total: result.events.length,
|
||||||
|
continuationToken: result.continuationToken,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Admin: error clusters ─────────────────────────────────────
|
||||||
|
app.get('/telemetry/clusters', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
const {
|
||||||
|
platform,
|
||||||
|
module: mod,
|
||||||
|
from,
|
||||||
|
to,
|
||||||
|
limit,
|
||||||
|
} = req.query as {
|
||||||
|
platform?: string;
|
||||||
|
module?: string;
|
||||||
|
from?: string;
|
||||||
|
to?: string;
|
||||||
|
limit?: string;
|
||||||
|
};
|
||||||
|
const clusters = await repo.listClusters(productId, {
|
||||||
|
platform,
|
||||||
|
module: mod,
|
||||||
|
from,
|
||||||
|
to,
|
||||||
|
limit: limit ? parseInt(limit, 10) : undefined,
|
||||||
|
});
|
||||||
|
return { clusters, total: clusters.length };
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Admin: list policies ──────────────────────────────────────
|
||||||
|
app.get('/telemetry/policies', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
return { policies: await repo.listPolicies(productId) };
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Admin: create policy ──────────────────────────────────────
|
||||||
|
app.post('/telemetry/policies', async (req, reply) => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
const parsed = CreatePolicySchema.safeParse(req.body);
|
||||||
|
if (!parsed.success) {
|
||||||
|
throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
|
||||||
|
}
|
||||||
|
|
||||||
|
const now = new Date().toISOString();
|
||||||
|
const doc: TelemetryCollectionPolicyDoc = {
|
||||||
|
id: randomUUID(),
|
||||||
|
productId,
|
||||||
|
...parsed.data,
|
||||||
|
createdAt: now,
|
||||||
|
updatedAt: now,
|
||||||
|
createdBy: req.jwtPayload?.sub ?? 'unknown',
|
||||||
|
};
|
||||||
|
|
||||||
|
const created = await repo.createPolicy(doc);
|
||||||
|
reply.code(201);
|
||||||
|
return created;
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Admin: update policy ──────────────────────────────────────
|
||||||
|
app.put('/telemetry/policies/:id', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const { id } = req.params as { id: string };
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
|
||||||
|
const parsed = UpdatePolicySchema.safeParse(req.body);
|
||||||
|
if (!parsed.success) {
|
||||||
|
throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert null to undefined for optional fields (Zod nullable → TS optional)
|
||||||
|
const updates: Record<string, unknown> = { ...parsed.data };
|
||||||
|
if (updates.startsAt === null) updates.startsAt = undefined;
|
||||||
|
if (updates.expiresAt === null) updates.expiresAt = undefined;
|
||||||
|
|
||||||
|
const updated = await repo.updatePolicy(
|
||||||
|
id,
|
||||||
|
productId,
|
||||||
|
updates as Partial<TelemetryCollectionPolicyDoc>
|
||||||
|
);
|
||||||
|
if (!updated) throw new NotFoundError('Policy not found');
|
||||||
|
return updated;
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Admin: delete policy ──────────────────────────────────────
|
||||||
|
app.delete('/telemetry/policies/:id', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const { id } = req.params as { id: string };
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
const deleted = await repo.deletePolicy(id, productId);
|
||||||
|
if (!deleted) throw new NotFoundError('Policy not found');
|
||||||
|
return { success: true };
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Admin: GDPR erasure ───────────────────────────────────────
|
||||||
|
app.delete('/telemetry/user/:userId', async req => {
|
||||||
|
requireAdmin(req);
|
||||||
|
const { userId } = req.params as { userId: string };
|
||||||
|
const productId = getRequestProductId(req);
|
||||||
|
const eventsDeleted = await repo.deleteEventsByUserId(productId, userId);
|
||||||
|
return { userId, eventsDeleted, clustersUpdated: 0 };
|
||||||
|
});
|
||||||
|
}
|
||||||
@ -0,0 +1,317 @@
|
|||||||
|
/**
|
||||||
|
* Telemetry module tests — types, policy evaluation, fingerprinting.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import {
|
||||||
|
TelemetryEventSchema,
|
||||||
|
TelemetryIngestRequestSchema,
|
||||||
|
CreatePolicySchema,
|
||||||
|
UpdatePolicySchema,
|
||||||
|
TelemetryQuerySchema,
|
||||||
|
} from './types.js';
|
||||||
|
|
||||||
|
// ─── Minimal valid event for reuse ──────────────────────────────────
|
||||||
|
|
||||||
|
function validEvent(overrides: Record<string, unknown> = {}) {
|
||||||
|
return {
|
||||||
|
id: '550e8400-e29b-41d4-a716-446655440000',
|
||||||
|
productId: 'lysnrai',
|
||||||
|
anonymousInstallId: '660e8400-e29b-41d4-a716-446655440001',
|
||||||
|
sessionId: 'sess_abc123',
|
||||||
|
platform: 'ios',
|
||||||
|
channel: 'keyboard_extension',
|
||||||
|
osFamily: 'ios',
|
||||||
|
appVersion: '1.0.0',
|
||||||
|
buildNumber: '26',
|
||||||
|
releaseChannel: 'beta',
|
||||||
|
eventType: 'error',
|
||||||
|
module: 'keyboard_dictation',
|
||||||
|
eventName: 'recognition_failed',
|
||||||
|
occurredAt: '2026-02-17T08:00:00.000Z',
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── TelemetryEventSchema ───────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('TelemetryEventSchema', () => {
|
||||||
|
it('accepts a valid event with anonymousInstallId', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent());
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts a valid event with userId', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(
|
||||||
|
validEvent({ userId: 'usr_abc', anonymousInstallId: undefined })
|
||||||
|
);
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts event with both userId and anonymousInstallId', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ userId: 'usr_abc' }));
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects event with neither userId nor anonymousInstallId', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ anonymousInstallId: undefined }));
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects empty id', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ id: 'not-a-uuid' }));
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects invalid platform', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ platform: 'symbian' }));
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects invalid channel', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ channel: 'fax_machine' }));
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects invalid osFamily', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ osFamily: 'beos' }));
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects invalid eventType', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ eventType: 'trace' }));
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects message > 512 chars', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ message: 'x'.repeat(513) }));
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts message at exactly 512 chars', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ message: 'x'.repeat(512) }));
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects stackTrace > 8192 chars', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(
|
||||||
|
validEvent({ stackTrace: 'x'.repeat(8193), eventType: 'fatal' })
|
||||||
|
);
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts optional fields', () => {
|
||||||
|
const result = TelemetryEventSchema.safeParse(
|
||||||
|
validEvent({
|
||||||
|
feature: 'voice_typing',
|
||||||
|
errorDomain: 'kAFAssistantErrorDomain',
|
||||||
|
errorCode: '209',
|
||||||
|
message: 'Recognition timed out',
|
||||||
|
fingerprint: 'abc123def456',
|
||||||
|
tags: { backend: 'azure' },
|
||||||
|
metrics: { duration_ms: 5000 },
|
||||||
|
context: { dictation: { backend: 'azure' } },
|
||||||
|
osVersion: 'iOS 18.2',
|
||||||
|
deviceModel: 'iPhone17,3',
|
||||||
|
locale: 'en-US',
|
||||||
|
timezone: 'America/Los_Angeles',
|
||||||
|
})
|
||||||
|
);
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('validates all platform enum values', () => {
|
||||||
|
for (const p of ['ios', 'android', 'web', 'desktop']) {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ platform: p }));
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('validates all channel enum values', () => {
|
||||||
|
for (const c of [
|
||||||
|
'mobile_app',
|
||||||
|
'keyboard_extension',
|
||||||
|
'web_app',
|
||||||
|
'desktop_app',
|
||||||
|
'backend_service',
|
||||||
|
]) {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ channel: c }));
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('validates all osFamily enum values', () => {
|
||||||
|
for (const os of ['ios', 'android', 'macos', 'windows', 'linux', 'chromeos', 'other']) {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ osFamily: os }));
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('validates all eventType enum values', () => {
|
||||||
|
for (const et of ['debug', 'info', 'warn', 'error', 'fatal']) {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ eventType: et }));
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('validates all releaseChannel enum values', () => {
|
||||||
|
for (const rc of ['dev', 'beta', 'prod']) {
|
||||||
|
const result = TelemetryEventSchema.safeParse(validEvent({ releaseChannel: rc }));
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── TelemetryIngestRequestSchema ───────────────────────────────────
|
||||||
|
|
||||||
|
describe('TelemetryIngestRequestSchema', () => {
|
||||||
|
it('accepts valid batch', () => {
|
||||||
|
const result = TelemetryIngestRequestSchema.safeParse({
|
||||||
|
productId: 'lysnrai',
|
||||||
|
events: [validEvent()],
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects empty events array', () => {
|
||||||
|
const result = TelemetryIngestRequestSchema.safeParse({
|
||||||
|
productId: 'lysnrai',
|
||||||
|
events: [],
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects > 50 events', () => {
|
||||||
|
const events = Array.from({ length: 51 }, (_, i) =>
|
||||||
|
validEvent({ id: `550e8400-e29b-41d4-a716-44665544${String(i).padStart(4, '0')}` })
|
||||||
|
);
|
||||||
|
const result = TelemetryIngestRequestSchema.safeParse({
|
||||||
|
productId: 'lysnrai',
|
||||||
|
events,
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts clientClockSkewMs', () => {
|
||||||
|
const result = TelemetryIngestRequestSchema.safeParse({
|
||||||
|
productId: 'lysnrai',
|
||||||
|
events: [validEvent()],
|
||||||
|
clientClockSkewMs: -500,
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── CreatePolicySchema ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('CreatePolicySchema', () => {
|
||||||
|
it('accepts minimal policy', () => {
|
||||||
|
const result = CreatePolicySchema.safeParse({ name: 'Test policy' });
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
if (result.success) {
|
||||||
|
expect(result.data.enabled).toBe(true);
|
||||||
|
expect(result.data.priority).toBe(50);
|
||||||
|
expect(result.data.eventTypes).toEqual(['warn', 'error', 'fatal']);
|
||||||
|
expect(result.data.samplingRate).toBe(1.0);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts full policy with targeting', () => {
|
||||||
|
const result = CreatePolicySchema.safeParse({
|
||||||
|
name: 'Debug iOS keyboard',
|
||||||
|
description: 'Debug user X iOS keyboard dictation',
|
||||||
|
enabled: true,
|
||||||
|
priority: 100,
|
||||||
|
eventTypes: ['debug', 'info', 'warn', 'error', 'fatal'],
|
||||||
|
modules: ['keyboard_dictation'],
|
||||||
|
samplingRate: 1.0,
|
||||||
|
targeting: {
|
||||||
|
userIds: ['usr_abc'],
|
||||||
|
platforms: ['ios'],
|
||||||
|
channels: ['keyboard_extension'],
|
||||||
|
osFamilies: ['ios'],
|
||||||
|
buildNumberRange: { min: 1, max: 25 },
|
||||||
|
countryCodes: ['US'],
|
||||||
|
regionCodes: ['US:WA'],
|
||||||
|
releaseChannels: ['beta'],
|
||||||
|
percentage: 100,
|
||||||
|
},
|
||||||
|
startsAt: '2026-02-17T00:00:00.000Z',
|
||||||
|
expiresAt: '2026-02-20T00:00:00.000Z',
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects empty name', () => {
|
||||||
|
const result = CreatePolicySchema.safeParse({ name: '' });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects samplingRate > 1', () => {
|
||||||
|
const result = CreatePolicySchema.safeParse({ name: 'Test', samplingRate: 1.5 });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects priority > 999', () => {
|
||||||
|
const result = CreatePolicySchema.safeParse({ name: 'Test', priority: 1000 });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── UpdatePolicySchema ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('UpdatePolicySchema', () => {
|
||||||
|
it('accepts partial updates', () => {
|
||||||
|
const result = UpdatePolicySchema.safeParse({ enabled: false, priority: 10 });
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts empty update', () => {
|
||||||
|
const result = UpdatePolicySchema.safeParse({});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts nullable startsAt/expiresAt (to clear them)', () => {
|
||||||
|
const result = UpdatePolicySchema.safeParse({ startsAt: null, expiresAt: null });
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── TelemetryQuerySchema ───────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('TelemetryQuerySchema', () => {
|
||||||
|
it('accepts empty query (defaults)', () => {
|
||||||
|
const result = TelemetryQuerySchema.safeParse({});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
if (result.success) {
|
||||||
|
expect(result.data.limit).toBe(50);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts full filter set', () => {
|
||||||
|
const result = TelemetryQuerySchema.safeParse({
|
||||||
|
userId: 'usr_abc',
|
||||||
|
platform: 'ios',
|
||||||
|
channel: 'keyboard_extension',
|
||||||
|
module: 'keyboard_dictation',
|
||||||
|
eventType: 'error',
|
||||||
|
from: '2026-02-01T00:00:00.000Z',
|
||||||
|
to: '2026-02-17T23:59:59.000Z',
|
||||||
|
limit: 100,
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects limit > 200', () => {
|
||||||
|
const result = TelemetryQuerySchema.safeParse({ limit: 201 });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('coerces string limit to number', () => {
|
||||||
|
const result = TelemetryQuerySchema.safeParse({ limit: '25' });
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
if (result.success) {
|
||||||
|
expect(result.data.limit).toBe(25);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
255
services/platform-service/src/modules/telemetry/types.ts
Normal file
255
services/platform-service/src/modules/telemetry/types.ts
Normal file
@ -0,0 +1,255 @@
|
|||||||
|
/**
|
||||||
|
* Telemetry types — event schema, collection policies, and cluster types.
|
||||||
|
* See docs/WINDSURF/CLIENT_TELEMETRY_DESIGN.md for full design.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
// ─── Enums ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export const PlatformEnum = z.enum(['ios', 'android', 'web', 'desktop']);
|
||||||
|
export const ChannelEnum = z.enum([
|
||||||
|
'mobile_app',
|
||||||
|
'keyboard_extension',
|
||||||
|
'web_app',
|
||||||
|
'desktop_app',
|
||||||
|
'backend_service',
|
||||||
|
]);
|
||||||
|
export const OsFamilyEnum = z.enum([
|
||||||
|
'ios',
|
||||||
|
'android',
|
||||||
|
'macos',
|
||||||
|
'windows',
|
||||||
|
'linux',
|
||||||
|
'chromeos',
|
||||||
|
'other',
|
||||||
|
]);
|
||||||
|
export const EventTypeEnum = z.enum(['debug', 'info', 'warn', 'error', 'fatal']);
|
||||||
|
export const ReleaseChannelEnum = z.enum(['dev', 'beta', 'prod']);
|
||||||
|
|
||||||
|
// ─── Telemetry Event ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export const TelemetryEventSchema = z
|
||||||
|
.object({
|
||||||
|
// Identity
|
||||||
|
id: z.string().uuid(),
|
||||||
|
productId: z.string().min(1),
|
||||||
|
userId: z.string().optional(),
|
||||||
|
anonymousInstallId: z.string().uuid().optional(),
|
||||||
|
sessionId: z.string().min(1),
|
||||||
|
requestId: z.string().optional(),
|
||||||
|
|
||||||
|
// Source classification
|
||||||
|
platform: PlatformEnum,
|
||||||
|
channel: ChannelEnum,
|
||||||
|
osFamily: OsFamilyEnum,
|
||||||
|
osVersion: z.string().optional(),
|
||||||
|
deviceModel: z.string().optional(),
|
||||||
|
locale: z.string().optional(),
|
||||||
|
timezone: z.string().optional(),
|
||||||
|
|
||||||
|
// App release
|
||||||
|
appVersion: z.string().min(1),
|
||||||
|
buildNumber: z.string().min(1),
|
||||||
|
releaseChannel: ReleaseChannelEnum,
|
||||||
|
|
||||||
|
// Event semantics
|
||||||
|
eventType: EventTypeEnum,
|
||||||
|
module: z.string().min(1),
|
||||||
|
feature: z.string().optional(),
|
||||||
|
eventName: z.string().min(1),
|
||||||
|
|
||||||
|
// Error & diagnostics
|
||||||
|
errorDomain: z.string().optional(),
|
||||||
|
errorCode: z.string().optional(),
|
||||||
|
message: z.string().max(512).optional(),
|
||||||
|
stackTrace: z.string().max(8192).optional(),
|
||||||
|
fingerprint: z.string().optional(),
|
||||||
|
|
||||||
|
// Structured metadata
|
||||||
|
tags: z.record(z.string().max(128)).optional(),
|
||||||
|
metrics: z.record(z.number()).optional(),
|
||||||
|
context: z.record(z.unknown()).optional(),
|
||||||
|
|
||||||
|
// Timing (client provides occurredAt)
|
||||||
|
occurredAt: z.string().datetime(),
|
||||||
|
})
|
||||||
|
.refine(e => e.userId || e.anonymousInstallId, {
|
||||||
|
message: 'At least one of userId or anonymousInstallId is required',
|
||||||
|
});
|
||||||
|
|
||||||
|
export type TelemetryEvent = z.infer<typeof TelemetryEventSchema>;
|
||||||
|
|
||||||
|
/** Server-enriched event document stored in Cosmos. */
|
||||||
|
export interface TelemetryEventDoc extends TelemetryEvent {
|
||||||
|
pk: string; // ${productId}:${yyyyMM}:${platform}
|
||||||
|
receivedAt: string;
|
||||||
|
ttl: number; // Cosmos TTL in seconds
|
||||||
|
countryCode?: string;
|
||||||
|
regionCode?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Batch Ingest Request ───────────────────────────────────────────
|
||||||
|
|
||||||
|
export const TelemetryIngestRequestSchema = z.object({
|
||||||
|
productId: z.string().min(1),
|
||||||
|
events: z.array(TelemetryEventSchema).min(1).max(50),
|
||||||
|
clientClockSkewMs: z.number().optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type TelemetryIngestRequest = z.infer<typeof TelemetryIngestRequestSchema>;
|
||||||
|
|
||||||
|
export interface TelemetryIngestResponse {
|
||||||
|
accepted: number;
|
||||||
|
rejected: number;
|
||||||
|
errors?: Array<{ index: number; reason: string }>;
|
||||||
|
serverTime: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Collection Policy ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
export const TelemetryTargetingSchema = z.object({
|
||||||
|
userIds: z.array(z.string()).optional(),
|
||||||
|
anonymousInstallIds: z.array(z.string()).optional(),
|
||||||
|
platforms: z.array(PlatformEnum).optional(),
|
||||||
|
channels: z.array(ChannelEnum).optional(),
|
||||||
|
osFamilies: z.array(OsFamilyEnum).optional(),
|
||||||
|
appVersions: z.array(z.string()).optional(),
|
||||||
|
appVersionRange: z
|
||||||
|
.object({
|
||||||
|
min: z.string().optional(),
|
||||||
|
max: z.string().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
buildNumbers: z.array(z.string()).optional(),
|
||||||
|
buildNumberRange: z
|
||||||
|
.object({
|
||||||
|
min: z.number().optional(),
|
||||||
|
max: z.number().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
countryCodes: z.array(z.string()).optional(),
|
||||||
|
regionCodes: z.array(z.string()).optional(),
|
||||||
|
releaseChannels: z.array(ReleaseChannelEnum).optional(),
|
||||||
|
percentage: z.number().min(0).max(100).optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const CreatePolicySchema = z.object({
|
||||||
|
name: z.string().min(1).max(200),
|
||||||
|
description: z.string().default(''),
|
||||||
|
enabled: z.boolean().default(true),
|
||||||
|
priority: z.number().int().min(0).max(999).default(50),
|
||||||
|
eventTypes: z.array(EventTypeEnum).default(['warn', 'error', 'fatal']),
|
||||||
|
modules: z.array(z.string()).default([]),
|
||||||
|
samplingRate: z.number().min(0).max(1).default(1.0),
|
||||||
|
targeting: TelemetryTargetingSchema.default({}),
|
||||||
|
startsAt: z.string().datetime().optional(),
|
||||||
|
expiresAt: z.string().datetime().optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const UpdatePolicySchema = z.object({
|
||||||
|
name: z.string().min(1).max(200).optional(),
|
||||||
|
description: z.string().optional(),
|
||||||
|
enabled: z.boolean().optional(),
|
||||||
|
priority: z.number().int().min(0).max(999).optional(),
|
||||||
|
eventTypes: z.array(EventTypeEnum).optional(),
|
||||||
|
modules: z.array(z.string()).optional(),
|
||||||
|
samplingRate: z.number().min(0).max(1).optional(),
|
||||||
|
targeting: TelemetryTargetingSchema.optional(),
|
||||||
|
startsAt: z.string().datetime().optional().nullable(),
|
||||||
|
expiresAt: z.string().datetime().optional().nullable(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type CreatePolicyInput = z.infer<typeof CreatePolicySchema>;
|
||||||
|
export type UpdatePolicyInput = z.infer<typeof UpdatePolicySchema>;
|
||||||
|
|
||||||
|
export interface TelemetryCollectionPolicyDoc {
|
||||||
|
id: string;
|
||||||
|
productId: string;
|
||||||
|
name: string;
|
||||||
|
description: string;
|
||||||
|
enabled: boolean;
|
||||||
|
priority: number;
|
||||||
|
eventTypes: string[];
|
||||||
|
modules: string[];
|
||||||
|
samplingRate: number;
|
||||||
|
targeting: z.infer<typeof TelemetryTargetingSchema>;
|
||||||
|
startsAt?: string;
|
||||||
|
expiresAt?: string;
|
||||||
|
createdAt: string;
|
||||||
|
updatedAt: string;
|
||||||
|
createdBy: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Collection Config Response (for clients) ──────────────────────
|
||||||
|
|
||||||
|
export interface TelemetryCollectionConfig {
|
||||||
|
enabled: boolean;
|
||||||
|
eventTypes: string[];
|
||||||
|
modules: string[];
|
||||||
|
samplingRates: {
|
||||||
|
debug: number;
|
||||||
|
info: number;
|
||||||
|
warn: number;
|
||||||
|
error: number;
|
||||||
|
fatal: number;
|
||||||
|
};
|
||||||
|
batchSize: number;
|
||||||
|
flushIntervalMs: number;
|
||||||
|
maxQueueSize: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Error Cluster ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface TelemetryErrorCluster {
|
||||||
|
id: string; // ${fingerprint}:${yyyyMM}
|
||||||
|
pk: string; // ${productId}:${platform}:${module}
|
||||||
|
productId: string;
|
||||||
|
fingerprint: string;
|
||||||
|
|
||||||
|
platform: string;
|
||||||
|
channel: string;
|
||||||
|
module: string;
|
||||||
|
eventName: string;
|
||||||
|
|
||||||
|
affectedVersions: Array<{
|
||||||
|
appVersion: string;
|
||||||
|
buildNumber: string;
|
||||||
|
count: number;
|
||||||
|
lastSeenAt: string;
|
||||||
|
}>;
|
||||||
|
|
||||||
|
firstSeenAt: string;
|
||||||
|
lastSeenAt: string;
|
||||||
|
totalCount: number;
|
||||||
|
affectedUserIds: string[];
|
||||||
|
affectedInstallIds: string[];
|
||||||
|
affectedOsFamilies: string[];
|
||||||
|
|
||||||
|
sampleErrorDomain?: string;
|
||||||
|
sampleErrorCode?: string;
|
||||||
|
sampleMessage?: string;
|
||||||
|
severity: 'warn' | 'error' | 'fatal';
|
||||||
|
ttl: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Query / Admin types ────────────────────────────────────────────
|
||||||
|
|
||||||
|
export const TelemetryQuerySchema = z.object({
|
||||||
|
userId: z.string().optional(),
|
||||||
|
anonymousInstallId: z.string().optional(),
|
||||||
|
platform: z.string().optional(),
|
||||||
|
channel: z.string().optional(),
|
||||||
|
osFamily: z.string().optional(),
|
||||||
|
appVersion: z.string().optional(),
|
||||||
|
buildNumber: z.string().optional(),
|
||||||
|
module: z.string().optional(),
|
||||||
|
eventName: z.string().optional(),
|
||||||
|
eventType: z.string().optional(),
|
||||||
|
from: z.string().datetime().optional(),
|
||||||
|
to: z.string().datetime().optional(),
|
||||||
|
limit: z.coerce.number().int().min(1).max(200).default(50),
|
||||||
|
continuationToken: z.string().optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type TelemetryQueryInput = z.infer<typeof TelemetryQuerySchema>;
|
||||||
@ -46,6 +46,7 @@ import { publicRoutes } from './modules/public/routes.js';
|
|||||||
import { tokenRoutes } from './modules/tokens/routes.js';
|
import { tokenRoutes } from './modules/tokens/routes.js';
|
||||||
import { themeRoutes } from './modules/themes/routes.js';
|
import { themeRoutes } from './modules/themes/routes.js';
|
||||||
import { waitlistRoutes } from './modules/waitlist/routes.js';
|
import { waitlistRoutes } from './modules/waitlist/routes.js';
|
||||||
|
import { telemetryRoutes } from './modules/telemetry/routes.js';
|
||||||
import { initCosmosIfNeeded } from './lib/cosmos-init.js';
|
import { initCosmosIfNeeded } from './lib/cosmos-init.js';
|
||||||
import { config } from './lib/config.js';
|
import { config } from './lib/config.js';
|
||||||
|
|
||||||
@ -116,6 +117,8 @@ await app.register(tokenRoutes, { prefix: '/api' });
|
|||||||
await app.register(themeRoutes, { prefix: '/api' });
|
await app.register(themeRoutes, { prefix: '/api' });
|
||||||
// Waitlist module (pre-launch signups — public + admin routes)
|
// Waitlist module (pre-launch signups — public + admin routes)
|
||||||
await app.register(waitlistRoutes, { prefix: '/api' });
|
await app.register(waitlistRoutes, { prefix: '/api' });
|
||||||
|
// Telemetry module (client ingest + admin query + policies)
|
||||||
|
await app.register(telemetryRoutes, { prefix: '/api' });
|
||||||
// Public routes — no auth, registered at top level
|
// Public routes — no auth, registered at top level
|
||||||
await app.register(publicRoutes, { prefix: '/api' });
|
await app.register(publicRoutes, { prefix: '/api' });
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user