diff --git a/services/extraction-service/src/modules/extract/routes.ts b/services/extraction-service/src/modules/extract/routes.ts index 5ddc74dc..a0371bfb 100644 --- a/services/extraction-service/src/modules/extract/routes.ts +++ b/services/extraction-service/src/modules/extract/routes.ts @@ -1,10 +1,17 @@ import type { FastifyInstance } from 'fastify'; +import rateLimit from '@fastify/rate-limit'; import { ExtractRequestSchema, BatchExtractRequestSchema } from './types.js'; import { sidecarExtract, sidecarExtractBatch, sidecarHealth } from '../../lib/python-bridge.js'; import { BadRequestError } from '../../lib/errors.js'; export async function extractRoutes(app: FastifyInstance) { + // Rate limiting for extraction endpoints — 30 req/min per IP (configurable) + await app.register(rateLimit, { + max: 30, + timeWindow: '1 minute', + keyGenerator: req => req.ip, + }); /** * POST /extract — Single document extraction. */ diff --git a/services/extraction-service/src/modules/extract/types.test.ts b/services/extraction-service/src/modules/extract/types.test.ts new file mode 100644 index 00000000..940367f5 --- /dev/null +++ b/services/extraction-service/src/modules/extract/types.test.ts @@ -0,0 +1,132 @@ +import { describe, it, expect } from 'vitest'; + +import { + ExtractRequestSchema, + BatchExtractRequestSchema, + ExtractionExampleSchema, + ExtractionResultSchema, +} from './types.js'; + +describe('ExtractionExampleSchema', () => { + it('accepts valid example', () => { + const result = ExtractionExampleSchema.safeParse({ + text: 'John said ship by Friday', + extractions: [{ extraction_class: 'deadline', extraction_text: 'ship by Friday' }], + }); + expect(result.success).toBe(true); + }); + + it('rejects empty text', () => { + const result = ExtractionExampleSchema.safeParse({ + text: '', + extractions: [], + }); + expect(result.success).toBe(false); + }); + + it('accepts extractions with attributes', () => { + const result = ExtractionExampleSchema.safeParse({ + text: 'test', + extractions: [ + { + extraction_class: 'emotion', + extraction_text: 'stressed', + attributes: { valence: 'negative' }, + }, + ], + }); + expect(result.success).toBe(true); + }); +}); + +describe('ExtractionResultSchema', () => { + it('accepts result with offsets', () => { + const result = ExtractionResultSchema.safeParse({ + extraction_class: 'action_item', + extraction_text: 'call the dentist', + start_offset: 10, + end_offset: 26, + }); + expect(result.success).toBe(true); + }); + + it('accepts result without optional fields', () => { + const result = ExtractionResultSchema.safeParse({ + extraction_class: 'topic', + extraction_text: 'meeting', + }); + expect(result.success).toBe(true); + }); +}); + +describe('ExtractRequestSchema', () => { + it('accepts minimal valid request', () => { + const result = ExtractRequestSchema.safeParse({ + text: 'Hello world', + }); + expect(result.success).toBe(true); + }); + + it('accepts full request with all options', () => { + const result = ExtractRequestSchema.safeParse({ + text: 'John said ship by Friday. Sarah will test.', + taskId: 'transcript-extraction', + modelId: 'gemini-2.5-flash', + examples: [ + { + text: 'example text', + extractions: [{ extraction_class: 'person', extraction_text: 'John' }], + }, + ], + options: { + extractionPasses: 2, + maxWorkers: 5, + maxCharBuffer: 1000, + }, + productId: 'lysnrai', + }); + expect(result.success).toBe(true); + }); + + it('rejects empty text', () => { + const result = ExtractRequestSchema.safeParse({ text: '' }); + expect(result.success).toBe(false); + }); + + it('rejects text exceeding 50,000 chars', () => { + const result = ExtractRequestSchema.safeParse({ + text: 'a'.repeat(50_001), + }); + expect(result.success).toBe(false); + }); + + it('rejects invalid options', () => { + const result = ExtractRequestSchema.safeParse({ + text: 'test', + options: { extractionPasses: 10 }, + }); + expect(result.success).toBe(false); + }); +}); + +describe('BatchExtractRequestSchema', () => { + it('accepts valid batch', () => { + const result = BatchExtractRequestSchema.safeParse({ + inputs: [{ text: 'first document' }, { text: 'second document', taskId: 'triage' }], + }); + expect(result.success).toBe(true); + }); + + it('rejects empty inputs', () => { + const result = BatchExtractRequestSchema.safeParse({ inputs: [] }); + expect(result.success).toBe(false); + }); + + it('rejects batch exceeding 50 inputs', () => { + const inputs = Array.from({ length: 51 }, (_, i) => ({ + text: `document ${i}`, + })); + const result = BatchExtractRequestSchema.safeParse({ inputs }); + expect(result.success).toBe(false); + }); +}); diff --git a/services/extraction-service/src/modules/tasks/types.test.ts b/services/extraction-service/src/modules/tasks/types.test.ts new file mode 100644 index 00000000..c206f01e --- /dev/null +++ b/services/extraction-service/src/modules/tasks/types.test.ts @@ -0,0 +1,87 @@ +import { describe, it, expect } from 'vitest'; + +import { ExtractionTaskSchema, CreateTaskSchema, UpdateTaskSchema } from './types.js'; + +describe('ExtractionTaskSchema', () => { + it('accepts valid task', () => { + const result = ExtractionTaskSchema.safeParse({ + id: 'transcript-extraction', + name: 'Transcript Extraction', + prompt: 'Extract entities from transcripts.', + classes: ['action_item', 'decision', 'person'], + builtIn: true, + productId: 'lysnrai', + }); + expect(result.success).toBe(true); + }); + + it('rejects task without required fields', () => { + const result = ExtractionTaskSchema.safeParse({ + id: 'test', + }); + expect(result.success).toBe(false); + }); + + it('defaults builtIn to false', () => { + const result = ExtractionTaskSchema.safeParse({ + id: 'custom-task', + name: 'Custom', + prompt: 'Extract stuff.', + classes: ['thing'], + productId: 'lysnrai', + }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.builtIn).toBe(false); + } + }); +}); + +describe('CreateTaskSchema', () => { + it('accepts minimal create input', () => { + const result = CreateTaskSchema.safeParse({ + id: 'my-task', + name: 'My Task', + prompt: 'Extract things.', + classes: ['thing'], + }); + expect(result.success).toBe(true); + }); + + it('accepts create with examples', () => { + const result = CreateTaskSchema.safeParse({ + id: 'my-task', + name: 'My Task', + prompt: 'Extract things.', + classes: ['thing'], + examples: [ + { + text: 'sample text', + extractions: [{ extraction_class: 'thing', extraction_text: 'sample' }], + }, + ], + }); + expect(result.success).toBe(true); + }); +}); + +describe('UpdateTaskSchema', () => { + it('accepts partial update', () => { + const result = UpdateTaskSchema.safeParse({ + name: 'Updated Name', + }); + expect(result.success).toBe(true); + }); + + it('accepts empty update', () => { + const result = UpdateTaskSchema.safeParse({}); + expect(result.success).toBe(true); + }); + + it('rejects invalid classes', () => { + const result = UpdateTaskSchema.safeParse({ + classes: [''], + }); + expect(result.success).toBe(false); + }); +});