- 5.1: Python sidecar LRU cache (cache.py) with configurable TTL + max size - 5.2: Fastify-level cache with X-Extraction-Cache HIT/MISS header + /extract/cache-stats - 5.3-5.5: Per-user daily quota (free=10, pro=100, enterprise=unlimited) with 429 response - 5.6: GET /extract/usage endpoint for admin usage reporting - Both Python + TS caches use sha256(taskId:modelId:text) keys - 46 TS tests + 29 Python tests still passing
259 lines
7.9 KiB
TypeScript
259 lines
7.9 KiB
TypeScript
import type { FastifyInstance } from 'fastify';
|
|
import rateLimit from '@fastify/rate-limit';
|
|
import { createHash } from 'node:crypto';
|
|
|
|
import { ExtractRequestSchema, BatchExtractRequestSchema } from './types.js';
|
|
import {
|
|
sidecarExtract,
|
|
sidecarExtractBatch,
|
|
sidecarHealth,
|
|
type SidecarExtractResponse,
|
|
} from '../../lib/python-bridge.js';
|
|
import { BadRequestError } from '../../lib/errors.js';
|
|
import { checkQuota, incrementUsage, getUsageSummary } from './usage.js';
|
|
|
|
// ── In-memory LRU cache ────────────────────────────────────────
|
|
const CACHE_TTL_MS = parseInt(process.env.EXTRACTION_CACHE_TTL_MS || '86400000', 10); // 24h
|
|
const CACHE_MAX = parseInt(process.env.EXTRACTION_CACHE_MAX || '500', 10);
|
|
|
|
interface CacheEntry {
|
|
response: SidecarExtractResponse;
|
|
createdAt: number;
|
|
}
|
|
|
|
const cache = new Map<string, CacheEntry>();
|
|
let cacheHits = 0;
|
|
let cacheMisses = 0;
|
|
|
|
function cacheKey(text: string, taskId?: string, modelId?: string): string {
|
|
return createHash('sha256')
|
|
.update(`${taskId || ''}:${modelId || ''}:${text}`)
|
|
.digest('hex');
|
|
}
|
|
|
|
function cacheGet(text: string, taskId?: string, modelId?: string): SidecarExtractResponse | null {
|
|
const key = cacheKey(text, taskId, modelId);
|
|
const entry = cache.get(key);
|
|
if (!entry) {
|
|
cacheMisses++;
|
|
return null;
|
|
}
|
|
if (Date.now() - entry.createdAt > CACHE_TTL_MS) {
|
|
cache.delete(key);
|
|
cacheMisses++;
|
|
return null;
|
|
}
|
|
cacheHits++;
|
|
return entry.response;
|
|
}
|
|
|
|
function cachePut(
|
|
text: string,
|
|
taskId: string | undefined,
|
|
modelId: string | undefined,
|
|
response: SidecarExtractResponse
|
|
): void {
|
|
// Evict oldest if at capacity
|
|
if (cache.size >= CACHE_MAX) {
|
|
const firstKey = cache.keys().next().value;
|
|
if (firstKey) cache.delete(firstKey);
|
|
}
|
|
cache.set(cacheKey(text, taskId, modelId), { response, createdAt: Date.now() });
|
|
}
|
|
|
|
export async function extractRoutes(app: FastifyInstance) {
|
|
// Rate limiting for extraction endpoints — 30 req/min per IP (configurable)
|
|
await app.register(rateLimit, {
|
|
max: 30,
|
|
timeWindow: '1 minute',
|
|
keyGenerator: req => req.ip,
|
|
});
|
|
/**
|
|
* POST /extract — Single document extraction.
|
|
*/
|
|
app.post('/extract', async (req, reply) => {
|
|
const parsed = ExtractRequestSchema.safeParse(req.body);
|
|
if (!parsed.success) {
|
|
throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
|
|
}
|
|
|
|
const { text, taskId, taskPrompt, examples, modelId, options } = parsed.data;
|
|
const requestId = req.headers['x-request-id'] as string | undefined;
|
|
|
|
// Enforce per-user daily quota
|
|
const userId = req.headers['x-user-id'] as string | undefined;
|
|
const userPlan = (req.headers['x-user-plan'] as string) || 'free';
|
|
if (userId) {
|
|
const quota = checkQuota(userId, userPlan);
|
|
if (!quota.allowed) {
|
|
reply.header('X-RateLimit-Limit', String(quota.limit));
|
|
reply.header('X-RateLimit-Remaining', '0');
|
|
return reply.status(429).send({
|
|
error: 'Daily extraction quota exceeded',
|
|
limit: quota.limit,
|
|
used: quota.used,
|
|
plan: userPlan,
|
|
});
|
|
}
|
|
}
|
|
|
|
req.log.info({ taskId, modelId, textLength: text.length }, 'extraction request');
|
|
|
|
// Check cache
|
|
const cached = cacheGet(text, taskId, modelId);
|
|
if (cached) {
|
|
req.log.info({ taskId }, 'cache hit');
|
|
reply.header('X-Extraction-Cache', 'HIT');
|
|
return reply.send({
|
|
extractions: cached.extractions,
|
|
metadata: {
|
|
modelId: cached.metadata.model_id,
|
|
durationMs: cached.metadata.duration_ms,
|
|
tokenCount: cached.metadata.token_count,
|
|
charCount: cached.metadata.char_count,
|
|
},
|
|
requestId,
|
|
});
|
|
}
|
|
|
|
reply.header('X-Extraction-Cache', 'MISS');
|
|
|
|
const result = await sidecarExtract(
|
|
{
|
|
text,
|
|
task_id: taskId,
|
|
task_prompt: taskPrompt,
|
|
examples: examples?.map(e => ({
|
|
text: e.text,
|
|
extractions: e.extractions.map(ex => ({
|
|
extraction_class: ex.extraction_class,
|
|
extraction_text: ex.extraction_text,
|
|
attributes: ex.attributes,
|
|
})),
|
|
})),
|
|
model_id: modelId,
|
|
extraction_passes: options?.extractionPasses,
|
|
max_workers: options?.maxWorkers,
|
|
max_char_buffer: options?.maxCharBuffer,
|
|
},
|
|
requestId
|
|
);
|
|
|
|
cachePut(text, taskId, modelId, result);
|
|
if (userId) incrementUsage(userId, userPlan);
|
|
|
|
req.log.info(
|
|
{ entityCount: result.extractions.length, durationMs: result.metadata.duration_ms },
|
|
'extraction complete'
|
|
);
|
|
|
|
return reply.send({
|
|
extractions: result.extractions,
|
|
metadata: {
|
|
modelId: result.metadata.model_id,
|
|
durationMs: result.metadata.duration_ms,
|
|
tokenCount: result.metadata.token_count,
|
|
charCount: result.metadata.char_count,
|
|
},
|
|
requestId,
|
|
});
|
|
});
|
|
|
|
/**
|
|
* POST /extract/batch — Batch extraction (multiple inputs, shared config).
|
|
*/
|
|
app.post('/extract/batch', async (req, reply) => {
|
|
const parsed = BatchExtractRequestSchema.safeParse(req.body);
|
|
if (!parsed.success) {
|
|
throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
|
|
}
|
|
|
|
const { inputs, examples, modelId } = parsed.data;
|
|
const requestId = req.headers['x-request-id'] as string | undefined;
|
|
|
|
req.log.info({ inputCount: inputs.length, modelId }, 'batch extraction request');
|
|
|
|
const sidecarRequests = inputs.map(input => ({
|
|
text: input.text,
|
|
task_id: input.taskId,
|
|
task_prompt: input.taskPrompt,
|
|
examples: examples?.map(e => ({
|
|
text: e.text,
|
|
extractions: e.extractions.map(ex => ({
|
|
extraction_class: ex.extraction_class,
|
|
extraction_text: ex.extraction_text,
|
|
attributes: ex.attributes,
|
|
})),
|
|
})),
|
|
model_id: modelId,
|
|
}));
|
|
|
|
const results = await sidecarExtractBatch(sidecarRequests, requestId);
|
|
|
|
return reply.send({
|
|
results: results.map(r => ({
|
|
extractions: r.extractions,
|
|
metadata: {
|
|
modelId: r.metadata.model_id,
|
|
durationMs: r.metadata.duration_ms,
|
|
tokenCount: r.metadata.token_count,
|
|
charCount: r.metadata.char_count,
|
|
},
|
|
})),
|
|
requestId,
|
|
});
|
|
});
|
|
|
|
/**
|
|
* GET /extract/models — List available model providers.
|
|
*/
|
|
app.get('/extract/models', async (_req, reply) => {
|
|
return reply.send({
|
|
models: [
|
|
{ id: 'gemini-2.5-flash', provider: 'google', description: 'Gemini 2.5 Flash (default)' },
|
|
{ id: 'gemini-2.5-pro', provider: 'google', description: 'Gemini 2.5 Pro' },
|
|
],
|
|
});
|
|
});
|
|
|
|
/**
|
|
* GET /extract/sidecar-health — Check Python sidecar status.
|
|
*/
|
|
app.get('/extract/sidecar-health', async (_req, reply) => {
|
|
try {
|
|
const health = await sidecarHealth();
|
|
return reply.send({ status: 'ok', sidecar: health });
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : 'Sidecar unavailable';
|
|
return reply.status(503).send({ status: 'error', error: message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /extract/usage — Per-user extraction usage (admin).
|
|
*/
|
|
app.get('/extract/usage', async (req, reply) => {
|
|
const userId = (req.query as Record<string, string>).userId;
|
|
const plan = (req.query as Record<string, string>).plan || 'free';
|
|
if (!userId) {
|
|
throw new BadRequestError('userId query parameter is required');
|
|
}
|
|
return reply.send(getUsageSummary(userId, plan));
|
|
});
|
|
|
|
/**
|
|
* GET /extract/cache-stats — Cache statistics.
|
|
*/
|
|
app.get('/extract/cache-stats', async (_req, reply) => {
|
|
const total = cacheHits + cacheMisses;
|
|
return reply.send({
|
|
size: cache.size,
|
|
maxSize: CACHE_MAX,
|
|
ttlMs: CACHE_TTL_MS,
|
|
hits: cacheHits,
|
|
misses: cacheMisses,
|
|
hitRate: total > 0 ? Math.round((cacheHits / total) * 1000) / 1000 : 0,
|
|
});
|
|
});
|
|
}
|