import type { FastifyInstance } from 'fastify'; import rateLimit from '@fastify/rate-limit'; import { createHash } from 'node:crypto'; import { ExtractRequestSchema, BatchExtractRequestSchema } from './types.js'; import { sidecarExtract, sidecarExtractBatch, sidecarHealth, type SidecarExtractResponse, } from '../../lib/python-bridge.js'; import { BadRequestError } from '../../lib/errors.js'; import { checkQuota, incrementUsage, getUsageSummary } from './usage.js'; // ── In-memory LRU cache ──────────────────────────────────────── const CACHE_TTL_MS = parseInt(process.env.EXTRACTION_CACHE_TTL_MS || '86400000', 10); // 24h const CACHE_MAX = parseInt(process.env.EXTRACTION_CACHE_MAX || '500', 10); interface CacheEntry { response: SidecarExtractResponse; createdAt: number; } const cache = new Map(); let cacheHits = 0; let cacheMisses = 0; function cacheKey(text: string, taskId?: string, modelId?: string): string { return createHash('sha256') .update(`${taskId || ''}:${modelId || ''}:${text}`) .digest('hex'); } function cacheGet(text: string, taskId?: string, modelId?: string): SidecarExtractResponse | null { const key = cacheKey(text, taskId, modelId); const entry = cache.get(key); if (!entry) { cacheMisses++; return null; } if (Date.now() - entry.createdAt > CACHE_TTL_MS) { cache.delete(key); cacheMisses++; return null; } cacheHits++; return entry.response; } function cachePut( text: string, taskId: string | undefined, modelId: string | undefined, response: SidecarExtractResponse ): void { // Evict oldest if at capacity if (cache.size >= CACHE_MAX) { const firstKey = cache.keys().next().value; if (firstKey) cache.delete(firstKey); } cache.set(cacheKey(text, taskId, modelId), { response, createdAt: Date.now() }); } export async function extractRoutes(app: FastifyInstance) { // Rate limiting for extraction endpoints — 30 req/min per IP (configurable) await app.register(rateLimit, { max: 30, timeWindow: '1 minute', keyGenerator: req => req.ip, }); /** * POST /extract — Single document extraction. */ app.post('/extract', async (req, reply) => { const parsed = ExtractRequestSchema.safeParse(req.body); if (!parsed.success) { throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; ')); } const { text, taskId, taskPrompt, examples, modelId, options } = parsed.data; const requestId = req.headers['x-request-id'] as string | undefined; // Enforce per-user daily quota const userId = req.headers['x-user-id'] as string | undefined; const userPlan = (req.headers['x-user-plan'] as string) || 'free'; if (userId) { const quota = checkQuota(userId, userPlan); if (!quota.allowed) { reply.header('X-RateLimit-Limit', String(quota.limit)); reply.header('X-RateLimit-Remaining', '0'); return reply.status(429).send({ error: 'Daily extraction quota exceeded', limit: quota.limit, used: quota.used, plan: userPlan, }); } } req.log.info({ taskId, modelId, textLength: text.length }, 'extraction request'); // Check cache const cached = cacheGet(text, taskId, modelId); if (cached) { req.log.info({ taskId }, 'cache hit'); reply.header('X-Extraction-Cache', 'HIT'); return reply.send({ extractions: cached.extractions, metadata: { modelId: cached.metadata.model_id, durationMs: cached.metadata.duration_ms, tokenCount: cached.metadata.token_count, charCount: cached.metadata.char_count, }, requestId, }); } reply.header('X-Extraction-Cache', 'MISS'); const result = await sidecarExtract( { text, task_id: taskId, task_prompt: taskPrompt, examples: examples?.map(e => ({ text: e.text, extractions: e.extractions.map(ex => ({ extraction_class: ex.extraction_class, extraction_text: ex.extraction_text, attributes: ex.attributes, })), })), model_id: modelId, extraction_passes: options?.extractionPasses, max_workers: options?.maxWorkers, max_char_buffer: options?.maxCharBuffer, }, requestId ); cachePut(text, taskId, modelId, result); if (userId) incrementUsage(userId, userPlan); req.log.info( { entityCount: result.extractions.length, durationMs: result.metadata.duration_ms }, 'extraction complete' ); return reply.send({ extractions: result.extractions, metadata: { modelId: result.metadata.model_id, durationMs: result.metadata.duration_ms, tokenCount: result.metadata.token_count, charCount: result.metadata.char_count, }, requestId, }); }); /** * POST /extract/batch — Batch extraction (multiple inputs, shared config). */ app.post('/extract/batch', async (req, reply) => { const parsed = BatchExtractRequestSchema.safeParse(req.body); if (!parsed.success) { throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; ')); } const { inputs, examples, modelId } = parsed.data; const requestId = req.headers['x-request-id'] as string | undefined; req.log.info({ inputCount: inputs.length, modelId }, 'batch extraction request'); const sidecarRequests = inputs.map(input => ({ text: input.text, task_id: input.taskId, task_prompt: input.taskPrompt, examples: examples?.map(e => ({ text: e.text, extractions: e.extractions.map(ex => ({ extraction_class: ex.extraction_class, extraction_text: ex.extraction_text, attributes: ex.attributes, })), })), model_id: modelId, })); const results = await sidecarExtractBatch(sidecarRequests, requestId); return reply.send({ results: results.map(r => ({ extractions: r.extractions, metadata: { modelId: r.metadata.model_id, durationMs: r.metadata.duration_ms, tokenCount: r.metadata.token_count, charCount: r.metadata.char_count, }, })), requestId, }); }); /** * GET /extract/models — List available model providers. */ app.get('/extract/models', async (_req, reply) => { return reply.send({ models: [ { id: 'gemini-2.5-flash', provider: 'google', description: 'Gemini 2.5 Flash (default)' }, { id: 'gemini-2.5-pro', provider: 'google', description: 'Gemini 2.5 Pro' }, ], }); }); /** * GET /extract/sidecar-health — Check Python sidecar status. */ app.get('/extract/sidecar-health', async (_req, reply) => { try { const health = await sidecarHealth(); return reply.send({ status: 'ok', sidecar: health }); } catch (err) { const message = err instanceof Error ? err.message : 'Sidecar unavailable'; return reply.status(503).send({ status: 'error', error: message }); } }); /** * GET /extract/usage — Per-user extraction usage (admin). */ app.get('/extract/usage', async (req, reply) => { const userId = (req.query as Record).userId; const plan = (req.query as Record).plan || 'free'; if (!userId) { throw new BadRequestError('userId query parameter is required'); } return reply.send(getUsageSummary(userId, plan)); }); /** * GET /extract/cache-stats — Cache statistics. */ app.get('/extract/cache-stats', async (_req, reply) => { const total = cacheHits + cacheMisses; return reply.send({ size: cache.size, maxSize: CACHE_MAX, ttlMs: CACHE_TTL_MS, hits: cacheHits, misses: cacheMisses, hitRate: total > 0 ? Math.round((cacheHits / total) * 1000) / 1000 : 0, }); }); }