learning_ai_common_plat/services/extraction-service/src/lib/metrics.ts
saravanakumardb1 b8c0a73e89 feat(extraction): Phase 5 observability + error handling (5.7-5.12)
- 5.7: Enhanced structured logging with userId, productId, cacheHit, tokenCount
- 5.8: Metrics module (counters + histograms) + /extract/metrics endpoint
- 5.9: Grafana dashboard config for extraction-service (Loki queries)
- 5.10: Error mapping — sidecar errors → proper HTTP status codes (408, 429, 502, 503)
- 5.11: Circuit breaker for Python sidecar (5 failures → 30s OPEN)
- 5.12: Graceful degradation — circuit open returns 503, cached results still served
- 46 TS tests passing
2026-02-14 14:04:59 -08:00

108 lines
3.2 KiB
TypeScript

/**
* Prometheus metrics for extraction-service.
*
* Exposed via GET /metrics (auto-registered by fastify-metrics).
* Custom counters/histograms for extraction-specific telemetry.
*/
// ── In-memory counters (fastify-metrics handles HTTP metrics) ────
// These are simple counters since prom-client may not be directly available.
// They're exposed via the /extract/metrics endpoint.
interface MetricBucket {
labels: Record<string, string>;
value: number;
}
class Counter {
private _buckets = new Map<string, MetricBucket>();
constructor(public readonly name: string) {}
inc(labels: Record<string, string>, amount = 1): void {
const key = JSON.stringify(labels);
const existing = this._buckets.get(key);
if (existing) {
existing.value += amount;
} else {
this._buckets.set(key, { labels, value: amount });
}
}
toJSON(): Array<{ labels: Record<string, string>; value: number }> {
return [...this._buckets.values()];
}
}
class Histogram {
private _observations: Array<{ labels: Record<string, string>; value: number }> = [];
private _sum = 0;
private _count = 0;
constructor(public readonly name: string) {}
observe(labels: Record<string, string>, value: number): void {
this._observations.push({ labels, value });
this._sum += value;
this._count++;
}
toJSON(): { count: number; sum: number; avg: number } {
return {
count: this._count,
sum: Math.round(this._sum * 100) / 100,
avg: this._count > 0 ? Math.round((this._sum / this._count) * 100) / 100 : 0,
};
}
}
// ── Exported metrics ─────────────────────────────────────────────
export const extractionRequestsTotal = new Counter('extraction_requests_total');
export const extractionDurationSeconds = new Histogram('extraction_duration_seconds');
export const extractionEntitiesExtracted = new Histogram('extraction_entities_extracted');
export const extractionCacheHitTotal = new Counter('extraction_cache_hit_total');
/**
* Record an extraction event with all metric dimensions.
*/
export function recordExtraction(params: {
taskId?: string;
modelId?: string;
productId?: string;
status: 'success' | 'error' | 'cache_hit';
durationMs?: number;
entityCount?: number;
}): void {
const labels = {
task_id: params.taskId || 'unknown',
model_id: params.modelId || 'unknown',
product_id: params.productId || 'unknown',
status: params.status,
};
extractionRequestsTotal.inc(labels);
if (params.status === 'cache_hit') {
extractionCacheHitTotal.inc(labels);
}
if (params.durationMs !== undefined) {
extractionDurationSeconds.observe(labels, params.durationMs / 1000);
}
if (params.entityCount !== undefined) {
extractionEntitiesExtracted.observe(labels, params.entityCount);
}
}
/**
* Get all metrics as a JSON summary.
*/
export function getMetricsSummary(): Record<string, unknown> {
return {
extraction_requests_total: extractionRequestsTotal.toJSON(),
extraction_duration_seconds: extractionDurationSeconds.toJSON(),
extraction_entities_extracted: extractionEntitiesExtracted.toJSON(),
extraction_cache_hit_total: extractionCacheHitTotal.toJSON(),
};
}