learning_ai_common_plat/services/extraction-service/src/modules/extract/routes.ts

import type { FastifyInstance } from 'fastify';
import rateLimit from '@fastify/rate-limit';
import { createHash } from 'node:crypto';

import { ExtractRequestSchema, BatchExtractRequestSchema } from './types.js';
import {
  sidecarExtract,
  sidecarExtractBatch,
  sidecarHealth,
  type SidecarExtractResponse,
} from '../../lib/python-bridge.js';
import { BadRequestError } from '../../lib/errors.js';
import { checkQuota, incrementUsage, getUsageSummary } from './usage.js';

// ── In-memory LRU cache ────────────────────────────────────────
const CACHE_TTL_MS = parseInt(process.env.EXTRACTION_CACHE_TTL_MS || '86400000', 10); // 24h
const CACHE_MAX = parseInt(process.env.EXTRACTION_CACHE_MAX || '500', 10);

interface CacheEntry {
  response: SidecarExtractResponse;
  createdAt: number;
}

const cache = new Map<string, CacheEntry>();
let cacheHits = 0;
let cacheMisses = 0;

function cacheKey(text: string, taskId?: string, modelId?: string): string {
  return createHash('sha256')
    .update(`${taskId || ''}:${modelId || ''}:${text}`)
    .digest('hex');
}

function cacheGet(text: string, taskId?: string, modelId?: string): SidecarExtractResponse | null {
  const key = cacheKey(text, taskId, modelId);
  const entry = cache.get(key);
  if (!entry) {
    cacheMisses++;
    return null;
  }
  if (Date.now() - entry.createdAt > CACHE_TTL_MS) {
    cache.delete(key);
    cacheMisses++;
    return null;
  }
  cacheHits++;
  return entry.response;
}

function cachePut(
  text: string,
  taskId: string | undefined,
  modelId: string | undefined,
  response: SidecarExtractResponse
): void {
  // Evict oldest if at capacity
  if (cache.size >= CACHE_MAX) {
    const firstKey = cache.keys().next().value;
    if (firstKey) cache.delete(firstKey);
  }
  cache.set(cacheKey(text, taskId, modelId), { response, createdAt: Date.now() });
}

export async function extractRoutes(app: FastifyInstance) {
  // Rate limiting for extraction endpoints — 30 req/min per IP (configurable)
  await app.register(rateLimit, {
    max: 30,
    timeWindow: '1 minute',
    keyGenerator: req => req.ip,
  });
  /**
   * POST /extract — Single document extraction.
   */
  app.post('/extract', async (req, reply) => {
    const parsed = ExtractRequestSchema.safeParse(req.body);
    if (!parsed.success) {
      throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
    }

    const { text, taskId, taskPrompt, examples, modelId, options } = parsed.data;
    const requestId = req.headers['x-request-id'] as string | undefined;

    // Enforce per-user daily quota
    const userId = req.headers['x-user-id'] as string | undefined;
    const userPlan = (req.headers['x-user-plan'] as string) || 'free';
    if (userId) {
      const quota = checkQuota(userId, userPlan);
      if (!quota.allowed) {
        reply.header('X-RateLimit-Limit', String(quota.limit));
        reply.header('X-RateLimit-Remaining', '0');
        return reply.status(429).send({
          error: 'Daily extraction quota exceeded',
          limit: quota.limit,
          used: quota.used,
          plan: userPlan,
        });
      }
    }

    req.log.info({ taskId, modelId, textLength: text.length }, 'extraction request');

    // Check cache
    const cached = cacheGet(text, taskId, modelId);
    if (cached) {
      req.log.info({ taskId }, 'cache hit');
      reply.header('X-Extraction-Cache', 'HIT');
      return reply.send({
        extractions: cached.extractions,
        metadata: {
          modelId: cached.metadata.model_id,
          durationMs: cached.metadata.duration_ms,
          tokenCount: cached.metadata.token_count,
          charCount: cached.metadata.char_count,
        },
        requestId,
      });
    }

    reply.header('X-Extraction-Cache', 'MISS');

    const result = await sidecarExtract(
      {
        text,
        task_id: taskId,
        task_prompt: taskPrompt,
        examples: examples?.map(e => ({
          text: e.text,
          extractions: e.extractions.map(ex => ({
            extraction_class: ex.extraction_class,
            extraction_text: ex.extraction_text,
            attributes: ex.attributes,
          })),
        })),
        model_id: modelId,
        extraction_passes: options?.extractionPasses,
        max_workers: options?.maxWorkers,
        max_char_buffer: options?.maxCharBuffer,
      },
      requestId
    );

    cachePut(text, taskId, modelId, result);
    if (userId) incrementUsage(userId, userPlan);

    req.log.info(
      { entityCount: result.extractions.length, durationMs: result.metadata.duration_ms },
      'extraction complete'
    );

    return reply.send({
      extractions: result.extractions,
      metadata: {
        modelId: result.metadata.model_id,
        durationMs: result.metadata.duration_ms,
        tokenCount: result.metadata.token_count,
        charCount: result.metadata.char_count,
      },
      requestId,
    });
  });

  /**
   * POST /extract/batch — Batch extraction (multiple inputs, shared config).
   */
  app.post('/extract/batch', async (req, reply) => {
    const parsed = BatchExtractRequestSchema.safeParse(req.body);
    if (!parsed.success) {
      throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
    }

    const { inputs, examples, modelId } = parsed.data;
    const requestId = req.headers['x-request-id'] as string | undefined;

    req.log.info({ inputCount: inputs.length, modelId }, 'batch extraction request');

    const sidecarRequests = inputs.map(input => ({
      text: input.text,
      task_id: input.taskId,
      task_prompt: input.taskPrompt,
      examples: examples?.map(e => ({
        text: e.text,
        extractions: e.extractions.map(ex => ({
          extraction_class: ex.extraction_class,
          extraction_text: ex.extraction_text,
          attributes: ex.attributes,
        })),
      })),
      model_id: modelId,
    }));

    const results = await sidecarExtractBatch(sidecarRequests, requestId);

    return reply.send({
      results: results.map(r => ({
        extractions: r.extractions,
        metadata: {
          modelId: r.metadata.model_id,
          durationMs: r.metadata.duration_ms,
          tokenCount: r.metadata.token_count,
          charCount: r.metadata.char_count,
        },
      })),
      requestId,
    });
  });

  /**
   * GET /extract/models — List available model providers.
   */
  app.get('/extract/models', async (_req, reply) => {
    return reply.send({
      models: [
        { id: 'gemini-2.5-flash', provider: 'google', description: 'Gemini 2.5 Flash (default)' },
        { id: 'gemini-2.5-pro', provider: 'google', description: 'Gemini 2.5 Pro' },
      ],
    });
  });

  /**
   * GET /extract/sidecar-health — Check Python sidecar status.
   */
  app.get('/extract/sidecar-health', async (_req, reply) => {
    try {
      const health = await sidecarHealth();
      return reply.send({ status: 'ok', sidecar: health });
    } catch (err) {
      const message = err instanceof Error ? err.message : 'Sidecar unavailable';
      return reply.status(503).send({ status: 'error', error: message });
    }
  });

  /**
   * GET /extract/usage — Per-user extraction usage (admin).
   */
  app.get('/extract/usage', async (req, reply) => {
    const userId = (req.query as Record<string, string>).userId;
    const plan = (req.query as Record<string, string>).plan || 'free';
    if (!userId) {
      throw new BadRequestError('userId query parameter is required');
    }
    return reply.send(getUsageSummary(userId, plan));
  });

  /**
   * GET /extract/cache-stats — Cache statistics.
   */
  app.get('/extract/cache-stats', async (_req, reply) => {
    const total = cacheHits + cacheMisses;
    return reply.send({
      size: cache.size,
      maxSize: CACHE_MAX,
      ttlMs: CACHE_TTL_MS,
      hits: cacheHits,
      misses: cacheMisses,
      hitRate: total > 0 ? Math.round((cacheHits / total) * 1000) / 1000 : 0,
    });
  });
}