learning_ai_common_plat/services/extraction-service/src/modules/extract/routes.ts
saravanakumardb1 9c8a3169dc feat(extraction): Phase 5 caching + cost controls (5.1-5.6)
- 5.1: Python sidecar LRU cache (cache.py) with configurable TTL + max size
- 5.2: Fastify-level cache with X-Extraction-Cache HIT/MISS header + /extract/cache-stats
- 5.3-5.5: Per-user daily quota (free=10, pro=100, enterprise=unlimited) with 429 response
- 5.6: GET /extract/usage endpoint for admin usage reporting
- Both Python + TS caches use sha256(taskId:modelId:text) keys
- 46 TS tests + 29 Python tests still passing
2026-02-14 14:02:21 -08:00

259 lines
7.9 KiB
TypeScript

import type { FastifyInstance } from 'fastify';
import rateLimit from '@fastify/rate-limit';
import { createHash } from 'node:crypto';
import { ExtractRequestSchema, BatchExtractRequestSchema } from './types.js';
import {
sidecarExtract,
sidecarExtractBatch,
sidecarHealth,
type SidecarExtractResponse,
} from '../../lib/python-bridge.js';
import { BadRequestError } from '../../lib/errors.js';
import { checkQuota, incrementUsage, getUsageSummary } from './usage.js';
// ── In-memory LRU cache ────────────────────────────────────────
const CACHE_TTL_MS = parseInt(process.env.EXTRACTION_CACHE_TTL_MS || '86400000', 10); // 24h
const CACHE_MAX = parseInt(process.env.EXTRACTION_CACHE_MAX || '500', 10);
interface CacheEntry {
response: SidecarExtractResponse;
createdAt: number;
}
const cache = new Map<string, CacheEntry>();
let cacheHits = 0;
let cacheMisses = 0;
function cacheKey(text: string, taskId?: string, modelId?: string): string {
return createHash('sha256')
.update(`${taskId || ''}:${modelId || ''}:${text}`)
.digest('hex');
}
function cacheGet(text: string, taskId?: string, modelId?: string): SidecarExtractResponse | null {
const key = cacheKey(text, taskId, modelId);
const entry = cache.get(key);
if (!entry) {
cacheMisses++;
return null;
}
if (Date.now() - entry.createdAt > CACHE_TTL_MS) {
cache.delete(key);
cacheMisses++;
return null;
}
cacheHits++;
return entry.response;
}
function cachePut(
text: string,
taskId: string | undefined,
modelId: string | undefined,
response: SidecarExtractResponse
): void {
// Evict oldest if at capacity
if (cache.size >= CACHE_MAX) {
const firstKey = cache.keys().next().value;
if (firstKey) cache.delete(firstKey);
}
cache.set(cacheKey(text, taskId, modelId), { response, createdAt: Date.now() });
}
export async function extractRoutes(app: FastifyInstance) {
// Rate limiting for extraction endpoints — 30 req/min per IP (configurable)
await app.register(rateLimit, {
max: 30,
timeWindow: '1 minute',
keyGenerator: req => req.ip,
});
/**
* POST /extract — Single document extraction.
*/
app.post('/extract', async (req, reply) => {
const parsed = ExtractRequestSchema.safeParse(req.body);
if (!parsed.success) {
throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
}
const { text, taskId, taskPrompt, examples, modelId, options } = parsed.data;
const requestId = req.headers['x-request-id'] as string | undefined;
// Enforce per-user daily quota
const userId = req.headers['x-user-id'] as string | undefined;
const userPlan = (req.headers['x-user-plan'] as string) || 'free';
if (userId) {
const quota = checkQuota(userId, userPlan);
if (!quota.allowed) {
reply.header('X-RateLimit-Limit', String(quota.limit));
reply.header('X-RateLimit-Remaining', '0');
return reply.status(429).send({
error: 'Daily extraction quota exceeded',
limit: quota.limit,
used: quota.used,
plan: userPlan,
});
}
}
req.log.info({ taskId, modelId, textLength: text.length }, 'extraction request');
// Check cache
const cached = cacheGet(text, taskId, modelId);
if (cached) {
req.log.info({ taskId }, 'cache hit');
reply.header('X-Extraction-Cache', 'HIT');
return reply.send({
extractions: cached.extractions,
metadata: {
modelId: cached.metadata.model_id,
durationMs: cached.metadata.duration_ms,
tokenCount: cached.metadata.token_count,
charCount: cached.metadata.char_count,
},
requestId,
});
}
reply.header('X-Extraction-Cache', 'MISS');
const result = await sidecarExtract(
{
text,
task_id: taskId,
task_prompt: taskPrompt,
examples: examples?.map(e => ({
text: e.text,
extractions: e.extractions.map(ex => ({
extraction_class: ex.extraction_class,
extraction_text: ex.extraction_text,
attributes: ex.attributes,
})),
})),
model_id: modelId,
extraction_passes: options?.extractionPasses,
max_workers: options?.maxWorkers,
max_char_buffer: options?.maxCharBuffer,
},
requestId
);
cachePut(text, taskId, modelId, result);
if (userId) incrementUsage(userId, userPlan);
req.log.info(
{ entityCount: result.extractions.length, durationMs: result.metadata.duration_ms },
'extraction complete'
);
return reply.send({
extractions: result.extractions,
metadata: {
modelId: result.metadata.model_id,
durationMs: result.metadata.duration_ms,
tokenCount: result.metadata.token_count,
charCount: result.metadata.char_count,
},
requestId,
});
});
/**
* POST /extract/batch — Batch extraction (multiple inputs, shared config).
*/
app.post('/extract/batch', async (req, reply) => {
const parsed = BatchExtractRequestSchema.safeParse(req.body);
if (!parsed.success) {
throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
}
const { inputs, examples, modelId } = parsed.data;
const requestId = req.headers['x-request-id'] as string | undefined;
req.log.info({ inputCount: inputs.length, modelId }, 'batch extraction request');
const sidecarRequests = inputs.map(input => ({
text: input.text,
task_id: input.taskId,
task_prompt: input.taskPrompt,
examples: examples?.map(e => ({
text: e.text,
extractions: e.extractions.map(ex => ({
extraction_class: ex.extraction_class,
extraction_text: ex.extraction_text,
attributes: ex.attributes,
})),
})),
model_id: modelId,
}));
const results = await sidecarExtractBatch(sidecarRequests, requestId);
return reply.send({
results: results.map(r => ({
extractions: r.extractions,
metadata: {
modelId: r.metadata.model_id,
durationMs: r.metadata.duration_ms,
tokenCount: r.metadata.token_count,
charCount: r.metadata.char_count,
},
})),
requestId,
});
});
/**
* GET /extract/models — List available model providers.
*/
app.get('/extract/models', async (_req, reply) => {
return reply.send({
models: [
{ id: 'gemini-2.5-flash', provider: 'google', description: 'Gemini 2.5 Flash (default)' },
{ id: 'gemini-2.5-pro', provider: 'google', description: 'Gemini 2.5 Pro' },
],
});
});
/**
* GET /extract/sidecar-health — Check Python sidecar status.
*/
app.get('/extract/sidecar-health', async (_req, reply) => {
try {
const health = await sidecarHealth();
return reply.send({ status: 'ok', sidecar: health });
} catch (err) {
const message = err instanceof Error ? err.message : 'Sidecar unavailable';
return reply.status(503).send({ status: 'error', error: message });
}
});
/**
* GET /extract/usage — Per-user extraction usage (admin).
*/
app.get('/extract/usage', async (req, reply) => {
const userId = (req.query as Record<string, string>).userId;
const plan = (req.query as Record<string, string>).plan || 'free';
if (!userId) {
throw new BadRequestError('userId query parameter is required');
}
return reply.send(getUsageSummary(userId, plan));
});
/**
* GET /extract/cache-stats — Cache statistics.
*/
app.get('/extract/cache-stats', async (_req, reply) => {
const total = cacheHits + cacheMisses;
return reply.send({
size: cache.size,
maxSize: CACHE_MAX,
ttlMs: CACHE_TTL_MS,
hits: cacheHits,
misses: cacheMisses,
hitRate: total > 0 ? Math.round((cacheHits / total) * 1000) / 1000 : 0,
});
});
}