feat(extraction): add rate limiting + 21 schema tests

- Rate limiting on extract routes (30 req/min per IP via @fastify/rate-limit) - 13 tests for ExtractRequestSchema, BatchExtractRequestSchema, ExtractionExampleSchema - 8 tests for ExtractionTaskSchema, CreateTaskSchema, UpdateTaskSchema - All 21 tests passing, pnpm build clean
2026-02-14 13:34:26 -08:00 · 2026-02-14 13:34:26 -08:00 · 0a87d1937b
commit 0a87d1937b
parent 4b4720aebd
3 changed files with 226 additions and 0 deletions
--- a/services/extraction-service/src/modules/extract/routes.ts
+++ b/services/extraction-service/src/modules/extract/routes.ts
@ -1,10 +1,17 @@
 import type { FastifyInstance } from 'fastify';
 import rateLimit from '@fastify/rate-limit';
 import { ExtractRequestSchema, BatchExtractRequestSchema } from './types.js';
 import { sidecarExtract, sidecarExtractBatch, sidecarHealth } from '../../lib/python-bridge.js';
 import { BadRequestError } from '../../lib/errors.js';
 export async function extractRoutes(app: FastifyInstance) {
  // Rate limiting for extraction endpoints — 30 req/min per IP (configurable)
  await app.register(rateLimit, {
    max: 30,
    timeWindow: '1 minute',
    keyGenerator: req => req.ip,
  });
  /**
   * POST /extract — Single document extraction.
   */
--- a/services/extraction-service/src/modules/extract/types.test.ts
+++ b/services/extraction-service/src/modules/extract/types.test.ts
@ -0,0 +1,132 @@
 import { describe, it, expect } from 'vitest';
 import {
  ExtractRequestSchema,
  BatchExtractRequestSchema,
  ExtractionExampleSchema,
  ExtractionResultSchema,
 } from './types.js';
 describe('ExtractionExampleSchema', () => {
  it('accepts valid example', () => {
    const result = ExtractionExampleSchema.safeParse({
      text: 'John said ship by Friday',
      extractions: [{ extraction_class: 'deadline', extraction_text: 'ship by Friday' }],
    });
    expect(result.success).toBe(true);
  });
  it('rejects empty text', () => {
    const result = ExtractionExampleSchema.safeParse({
      text: '',
      extractions: [],
    });
    expect(result.success).toBe(false);
  });
  it('accepts extractions with attributes', () => {
    const result = ExtractionExampleSchema.safeParse({
      text: 'test',
      extractions: [
        {
          extraction_class: 'emotion',
          extraction_text: 'stressed',
          attributes: { valence: 'negative' },
        },
      ],
    });
    expect(result.success).toBe(true);
  });
 });
 describe('ExtractionResultSchema', () => {
  it('accepts result with offsets', () => {
    const result = ExtractionResultSchema.safeParse({
      extraction_class: 'action_item',
      extraction_text: 'call the dentist',
      start_offset: 10,
      end_offset: 26,
    });
    expect(result.success).toBe(true);
  });
  it('accepts result without optional fields', () => {
    const result = ExtractionResultSchema.safeParse({
      extraction_class: 'topic',
      extraction_text: 'meeting',
    });
    expect(result.success).toBe(true);
  });
 });
 describe('ExtractRequestSchema', () => {
  it('accepts minimal valid request', () => {
    const result = ExtractRequestSchema.safeParse({
      text: 'Hello world',
    });
    expect(result.success).toBe(true);
  });
  it('accepts full request with all options', () => {
    const result = ExtractRequestSchema.safeParse({
      text: 'John said ship by Friday. Sarah will test.',
      taskId: 'transcript-extraction',
      modelId: 'gemini-2.5-flash',
      examples: [
        {
          text: 'example text',
          extractions: [{ extraction_class: 'person', extraction_text: 'John' }],
        },
      ],
      options: {
        extractionPasses: 2,
        maxWorkers: 5,
        maxCharBuffer: 1000,
      },
      productId: 'lysnrai',
    });
    expect(result.success).toBe(true);
  });
  it('rejects empty text', () => {
    const result = ExtractRequestSchema.safeParse({ text: '' });
    expect(result.success).toBe(false);
  });
  it('rejects text exceeding 50,000 chars', () => {
    const result = ExtractRequestSchema.safeParse({
      text: 'a'.repeat(50_001),
    });
    expect(result.success).toBe(false);
  });
  it('rejects invalid options', () => {
    const result = ExtractRequestSchema.safeParse({
      text: 'test',
      options: { extractionPasses: 10 },
    });
    expect(result.success).toBe(false);
  });
 });
 describe('BatchExtractRequestSchema', () => {
  it('accepts valid batch', () => {
    const result = BatchExtractRequestSchema.safeParse({
      inputs: [{ text: 'first document' }, { text: 'second document', taskId: 'triage' }],
    });
    expect(result.success).toBe(true);
  });
  it('rejects empty inputs', () => {
    const result = BatchExtractRequestSchema.safeParse({ inputs: [] });
    expect(result.success).toBe(false);
  });
  it('rejects batch exceeding 50 inputs', () => {
    const inputs = Array.from({ length: 51 }, (_, i) => ({
      text: `document ${i}`,
    }));
    const result = BatchExtractRequestSchema.safeParse({ inputs });
    expect(result.success).toBe(false);
  });
 });
--- a/services/extraction-service/src/modules/tasks/types.test.ts
+++ b/services/extraction-service/src/modules/tasks/types.test.ts
@ -0,0 +1,87 @@
 import { describe, it, expect } from 'vitest';
 import { ExtractionTaskSchema, CreateTaskSchema, UpdateTaskSchema } from './types.js';
 describe('ExtractionTaskSchema', () => {
  it('accepts valid task', () => {
    const result = ExtractionTaskSchema.safeParse({
      id: 'transcript-extraction',
      name: 'Transcript Extraction',
      prompt: 'Extract entities from transcripts.',
      classes: ['action_item', 'decision', 'person'],
      builtIn: true,
      productId: 'lysnrai',
    });
    expect(result.success).toBe(true);
  });
  it('rejects task without required fields', () => {
    const result = ExtractionTaskSchema.safeParse({
      id: 'test',
    });
    expect(result.success).toBe(false);
  });
  it('defaults builtIn to false', () => {
    const result = ExtractionTaskSchema.safeParse({
      id: 'custom-task',
      name: 'Custom',
      prompt: 'Extract stuff.',
      classes: ['thing'],
      productId: 'lysnrai',
    });
    expect(result.success).toBe(true);
    if (result.success) {
      expect(result.data.builtIn).toBe(false);
    }
  });
 });
 describe('CreateTaskSchema', () => {
  it('accepts minimal create input', () => {
    const result = CreateTaskSchema.safeParse({
      id: 'my-task',
      name: 'My Task',
      prompt: 'Extract things.',
      classes: ['thing'],
    });
    expect(result.success).toBe(true);
  });
  it('accepts create with examples', () => {
    const result = CreateTaskSchema.safeParse({
      id: 'my-task',
      name: 'My Task',
      prompt: 'Extract things.',
      classes: ['thing'],
      examples: [
        {
          text: 'sample text',
          extractions: [{ extraction_class: 'thing', extraction_text: 'sample' }],
        },
      ],
    });
    expect(result.success).toBe(true);
  });
 });
 describe('UpdateTaskSchema', () => {
  it('accepts partial update', () => {
    const result = UpdateTaskSchema.safeParse({
      name: 'Updated Name',
    });
    expect(result.success).toBe(true);
  });
  it('accepts empty update', () => {
    const result = UpdateTaskSchema.safeParse({});
    expect(result.success).toBe(true);
  });
  it('rejects invalid classes', () => {
    const result = UpdateTaskSchema.safeParse({
      classes: [''],
    });
    expect(result.success).toBe(false);
  });
 });