feat(extraction): add rate limiting + 21 schema tests

- Rate limiting on extract routes (30 req/min per IP via @fastify/rate-limit) - 13 tests for ExtractRequestSchema, BatchExtractRequestSchema, ExtractionExampleSchema - 8 tests for ExtractionTaskSchema, CreateTaskSchema, UpdateTaskSchema - All 21 tests passing, pnpm build clean
2026-02-14 13:34:26 -08:00 · 2026-02-14 13:34:26 -08:00 · 0a87d1937b
commit 0a87d1937b
parent 4b4720aebd
3 changed files with 226 additions and 0 deletions
--- a/services/extraction-service/src/modules/extract/routes.ts
+++ b/services/extraction-service/src/modules/extract/routes.ts
@ -1,10 +1,17 @@
 import type { FastifyInstance } from 'fastify';
+import rateLimit from '@fastify/rate-limit';

 import { ExtractRequestSchema, BatchExtractRequestSchema } from './types.js';
 import { sidecarExtract, sidecarExtractBatch, sidecarHealth } from '../../lib/python-bridge.js';
 import { BadRequestError } from '../../lib/errors.js';

 export async function extractRoutes(app: FastifyInstance) {
+  // Rate limiting for extraction endpoints — 30 req/min per IP (configurable)
+  await app.register(rateLimit, {
+    max: 30,
+    timeWindow: '1 minute',
+    keyGenerator: req => req.ip,
+  });
  /**
   * POST /extract — Single document extraction.
   */
--- a/services/extraction-service/src/modules/extract/types.test.ts
+++ b/services/extraction-service/src/modules/extract/types.test.ts
@ -0,0 +1,132 @@
+import { describe, it, expect } from 'vitest';
+
+import {
+  ExtractRequestSchema,
+  BatchExtractRequestSchema,
+  ExtractionExampleSchema,
+  ExtractionResultSchema,
+} from './types.js';
+
+describe('ExtractionExampleSchema', () => {
+  it('accepts valid example', () => {
+    const result = ExtractionExampleSchema.safeParse({
+      text: 'John said ship by Friday',
+      extractions: [{ extraction_class: 'deadline', extraction_text: 'ship by Friday' }],
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('rejects empty text', () => {
+    const result = ExtractionExampleSchema.safeParse({
+      text: '',
+      extractions: [],
+    });
+    expect(result.success).toBe(false);
+  });
+
+  it('accepts extractions with attributes', () => {
+    const result = ExtractionExampleSchema.safeParse({
+      text: 'test',
+      extractions: [
+        {
+          extraction_class: 'emotion',
+          extraction_text: 'stressed',
+          attributes: { valence: 'negative' },
+        },
+      ],
+    });
+    expect(result.success).toBe(true);
+  });
+});
+
+describe('ExtractionResultSchema', () => {
+  it('accepts result with offsets', () => {
+    const result = ExtractionResultSchema.safeParse({
+      extraction_class: 'action_item',
+      extraction_text: 'call the dentist',
+      start_offset: 10,
+      end_offset: 26,
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('accepts result without optional fields', () => {
+    const result = ExtractionResultSchema.safeParse({
+      extraction_class: 'topic',
+      extraction_text: 'meeting',
+    });
+    expect(result.success).toBe(true);
+  });
+});
+
+describe('ExtractRequestSchema', () => {
+  it('accepts minimal valid request', () => {
+    const result = ExtractRequestSchema.safeParse({
+      text: 'Hello world',
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('accepts full request with all options', () => {
+    const result = ExtractRequestSchema.safeParse({
+      text: 'John said ship by Friday. Sarah will test.',
+      taskId: 'transcript-extraction',
+      modelId: 'gemini-2.5-flash',
+      examples: [
+        {
+          text: 'example text',
+          extractions: [{ extraction_class: 'person', extraction_text: 'John' }],
+        },
+      ],
+      options: {
+        extractionPasses: 2,
+        maxWorkers: 5,
+        maxCharBuffer: 1000,
+      },
+      productId: 'lysnrai',
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('rejects empty text', () => {
+    const result = ExtractRequestSchema.safeParse({ text: '' });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects text exceeding 50,000 chars', () => {
+    const result = ExtractRequestSchema.safeParse({
+      text: 'a'.repeat(50_001),
+    });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects invalid options', () => {
+    const result = ExtractRequestSchema.safeParse({
+      text: 'test',
+      options: { extractionPasses: 10 },
+    });
+    expect(result.success).toBe(false);
+  });
+});
+
+describe('BatchExtractRequestSchema', () => {
+  it('accepts valid batch', () => {
+    const result = BatchExtractRequestSchema.safeParse({
+      inputs: [{ text: 'first document' }, { text: 'second document', taskId: 'triage' }],
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('rejects empty inputs', () => {
+    const result = BatchExtractRequestSchema.safeParse({ inputs: [] });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects batch exceeding 50 inputs', () => {
+    const inputs = Array.from({ length: 51 }, (_, i) => ({
+      text: `document ${i}`,
+    }));
+    const result = BatchExtractRequestSchema.safeParse({ inputs });
+    expect(result.success).toBe(false);
+  });
+});
--- a/services/extraction-service/src/modules/tasks/types.test.ts
+++ b/services/extraction-service/src/modules/tasks/types.test.ts
@ -0,0 +1,87 @@
+import { describe, it, expect } from 'vitest';
+
+import { ExtractionTaskSchema, CreateTaskSchema, UpdateTaskSchema } from './types.js';
+
+describe('ExtractionTaskSchema', () => {
+  it('accepts valid task', () => {
+    const result = ExtractionTaskSchema.safeParse({
+      id: 'transcript-extraction',
+      name: 'Transcript Extraction',
+      prompt: 'Extract entities from transcripts.',
+      classes: ['action_item', 'decision', 'person'],
+      builtIn: true,
+      productId: 'lysnrai',
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('rejects task without required fields', () => {
+    const result = ExtractionTaskSchema.safeParse({
+      id: 'test',
+    });
+    expect(result.success).toBe(false);
+  });
+
+  it('defaults builtIn to false', () => {
+    const result = ExtractionTaskSchema.safeParse({
+      id: 'custom-task',
+      name: 'Custom',
+      prompt: 'Extract stuff.',
+      classes: ['thing'],
+      productId: 'lysnrai',
+    });
+    expect(result.success).toBe(true);
+    if (result.success) {
+      expect(result.data.builtIn).toBe(false);
+    }
+  });
+});
+
+describe('CreateTaskSchema', () => {
+  it('accepts minimal create input', () => {
+    const result = CreateTaskSchema.safeParse({
+      id: 'my-task',
+      name: 'My Task',
+      prompt: 'Extract things.',
+      classes: ['thing'],
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('accepts create with examples', () => {
+    const result = CreateTaskSchema.safeParse({
+      id: 'my-task',
+      name: 'My Task',
+      prompt: 'Extract things.',
+      classes: ['thing'],
+      examples: [
+        {
+          text: 'sample text',
+          extractions: [{ extraction_class: 'thing', extraction_text: 'sample' }],
+        },
+      ],
+    });
+    expect(result.success).toBe(true);
+  });
+});
+
+describe('UpdateTaskSchema', () => {
+  it('accepts partial update', () => {
+    const result = UpdateTaskSchema.safeParse({
+      name: 'Updated Name',
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('accepts empty update', () => {
+    const result = UpdateTaskSchema.safeParse({});
+    expect(result.success).toBe(true);
+  });
+
+  it('rejects invalid classes', () => {
+    const result = UpdateTaskSchema.safeParse({
+      classes: [''],
+    });
+    expect(result.success).toBe(false);
+  });
+});