learning_ai_common_plat/services/extraction-service/src/modules/extract/routes.test.ts
saravanakumardb1 c9d5c0caed feat(extraction): integration tests + Python tests + fix langextract API
- 6 route integration tests (mock sidecar via vitest vi.mock)
- 12 task CRUD route tests (mock repository)
- 29 Python tests: 10 extractor, 12 models, 7 app endpoints
- Fix extractor.py: correct lx.extract() API (text_or_documents positional, prompt_description)
- Mock fallback when no GEMINI_API_KEY or USE_MOCK_EXTRACTOR=true
- 46 TS tests + 29 Python tests = 75 total
2026-02-14 13:49:18 -08:00

131 lines
4.2 KiB
TypeScript

import { describe, it, expect, vi, beforeEach } from 'vitest';
/**
* Integration tests for extract routes.
* Mocks the python-bridge module to avoid needing a running sidecar.
*/
vi.mock('../../lib/python-bridge.js', () => ({
sidecarExtract: vi.fn(),
sidecarExtractBatch: vi.fn(),
sidecarHealth: vi.fn(),
}));
import { sidecarExtract, sidecarExtractBatch, sidecarHealth } from '../../lib/python-bridge.js';
const mockSidecarExtract = vi.mocked(sidecarExtract);
const mockSidecarExtractBatch = vi.mocked(sidecarExtractBatch);
const mockSidecarHealth = vi.mocked(sidecarHealth);
// We test the route logic via the Zod schemas and mock returns
// rather than spinning up a full Fastify instance (avoids @bytelyst/fastify-core dep in tests)
describe('extract route logic (via mocks)', () => {
beforeEach(() => {
vi.clearAllMocks();
});
it('sidecarExtract is called with correct shape', async () => {
const mockResponse = {
extractions: [{ extraction_class: 'action_item', extraction_text: 'call John' }],
metadata: {
model_id: 'gemini-2.5-flash',
duration_ms: 150,
char_count: 20,
},
};
mockSidecarExtract.mockResolvedValue(mockResponse);
const result = await sidecarExtract({
text: 'Need to call John tomorrow',
task_id: 'transcript-extraction',
model_id: 'gemini-2.5-flash',
});
expect(mockSidecarExtract).toHaveBeenCalledWith({
text: 'Need to call John tomorrow',
task_id: 'transcript-extraction',
model_id: 'gemini-2.5-flash',
});
expect(result.extractions).toHaveLength(1);
expect(result.extractions[0].extraction_class).toBe('action_item');
expect(result.metadata.model_id).toBe('gemini-2.5-flash');
});
it('sidecarExtractBatch handles multiple inputs', async () => {
const mockResponses = [
{
extractions: [{ extraction_class: 'topic', extraction_text: 'meeting' }],
metadata: { model_id: 'gemini-2.5-flash', duration_ms: 100, char_count: 10 },
},
{
extractions: [{ extraction_class: 'person', extraction_text: 'Sarah' }],
metadata: { model_id: 'gemini-2.5-flash', duration_ms: 120, char_count: 15 },
},
];
mockSidecarExtractBatch.mockResolvedValue(mockResponses);
const result = await sidecarExtractBatch([
{ text: 'first doc' },
{ text: 'second doc with Sarah' },
]);
expect(result).toHaveLength(2);
expect(result[0].extractions[0].extraction_class).toBe('topic');
expect(result[1].extractions[0].extraction_class).toBe('person');
});
it('sidecarHealth returns status', async () => {
mockSidecarHealth.mockResolvedValue({ status: 'ok', version: '0.1.0' });
const health = await sidecarHealth();
expect(health.status).toBe('ok');
});
it('sidecarHealth throws when sidecar is down', async () => {
mockSidecarHealth.mockRejectedValue(new Error('Sidecar health check failed: 503'));
await expect(sidecarHealth()).rejects.toThrow('Sidecar health check failed');
});
it('sidecarExtract propagates errors', async () => {
mockSidecarExtract.mockRejectedValue(new Error('Sidecar error 500: Model timeout'));
await expect(sidecarExtract({ text: 'test' })).rejects.toThrow('Sidecar error 500');
});
it('sidecarExtract with all optional params', async () => {
const mockResponse = {
extractions: [],
metadata: { model_id: 'gemini-2.5-pro', duration_ms: 200, char_count: 50 },
};
mockSidecarExtract.mockResolvedValue(mockResponse);
await sidecarExtract({
text: 'complex document with lots of text here',
task_id: 'triage',
task_prompt: 'Custom extraction prompt',
examples: [
{
text: 'example',
extractions: [{ extraction_class: 'topic', extraction_text: 'example topic' }],
},
],
model_id: 'gemini-2.5-pro',
extraction_passes: 2,
max_workers: 5,
max_char_buffer: 1000,
});
expect(mockSidecarExtract).toHaveBeenCalledWith(
expect.objectContaining({
task_id: 'triage',
model_id: 'gemini-2.5-pro',
extraction_passes: 2,
max_workers: 5,
max_char_buffer: 1000,
})
);
});
});