learning_ai_common_plat/services/extraction-service/src/modules/extract/routes.ts
saravanakumardb1 c292bb5cc1 feat(extraction): scaffold extraction-service + @bytelyst/extraction package
- extraction-service: Fastify scaffold (port 4005) with extract/tasks modules
- src/lib/: config, errors, cosmos, product-config, python-bridge
- src/modules/extract/: types (Zod schemas), routes (POST /extract, batch, models)
- src/modules/tasks/: types, repository (Cosmos CRUD), routes (CRUD endpoints)
- Python sidecar: FastAPI app, LangExtract wrapper, models, task registry
- @bytelyst/extraction package: types, client factory, index exports
- Both pnpm build pass clean
2026-02-14 13:31:40 -08:00

130 lines
4.1 KiB
TypeScript

import type { FastifyInstance } from 'fastify';
import { ExtractRequestSchema, BatchExtractRequestSchema } from './types.js';
import { sidecarExtract, sidecarExtractBatch, sidecarHealth } from '../../lib/python-bridge.js';
import { BadRequestError } from '../../lib/errors.js';
export async function extractRoutes(app: FastifyInstance) {
/**
* POST /extract — Single document extraction.
*/
app.post('/extract', async (req, reply) => {
const parsed = ExtractRequestSchema.safeParse(req.body);
if (!parsed.success) {
throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
}
const { text, taskId, taskPrompt, examples, modelId, options } = parsed.data;
const requestId = req.headers['x-request-id'] as string | undefined;
req.log.info({ taskId, modelId, textLength: text.length }, 'extraction request');
const result = await sidecarExtract(
{
text,
task_id: taskId,
task_prompt: taskPrompt,
examples: examples?.map(e => ({
text: e.text,
extractions: e.extractions.map(ex => ({
extraction_class: ex.extraction_class,
extraction_text: ex.extraction_text,
attributes: ex.attributes,
})),
})),
model_id: modelId,
extraction_passes: options?.extractionPasses,
max_workers: options?.maxWorkers,
max_char_buffer: options?.maxCharBuffer,
},
requestId
);
req.log.info(
{ entityCount: result.extractions.length, durationMs: result.metadata.duration_ms },
'extraction complete'
);
return reply.send({
extractions: result.extractions,
metadata: {
modelId: result.metadata.model_id,
durationMs: result.metadata.duration_ms,
tokenCount: result.metadata.token_count,
charCount: result.metadata.char_count,
},
requestId,
});
});
/**
* POST /extract/batch — Batch extraction (multiple inputs, shared config).
*/
app.post('/extract/batch', async (req, reply) => {
const parsed = BatchExtractRequestSchema.safeParse(req.body);
if (!parsed.success) {
throw new BadRequestError(parsed.error.issues.map(i => i.message).join('; '));
}
const { inputs, examples, modelId } = parsed.data;
const requestId = req.headers['x-request-id'] as string | undefined;
req.log.info({ inputCount: inputs.length, modelId }, 'batch extraction request');
const sidecarRequests = inputs.map(input => ({
text: input.text,
task_id: input.taskId,
task_prompt: input.taskPrompt,
examples: examples?.map(e => ({
text: e.text,
extractions: e.extractions.map(ex => ({
extraction_class: ex.extraction_class,
extraction_text: ex.extraction_text,
attributes: ex.attributes,
})),
})),
model_id: modelId,
}));
const results = await sidecarExtractBatch(sidecarRequests, requestId);
return reply.send({
results: results.map(r => ({
extractions: r.extractions,
metadata: {
modelId: r.metadata.model_id,
durationMs: r.metadata.duration_ms,
tokenCount: r.metadata.token_count,
charCount: r.metadata.char_count,
},
})),
requestId,
});
});
/**
* GET /extract/models — List available model providers.
*/
app.get('/extract/models', async (_req, reply) => {
return reply.send({
models: [
{ id: 'gemini-2.5-flash', provider: 'google', description: 'Gemini 2.5 Flash (default)' },
{ id: 'gemini-2.5-pro', provider: 'google', description: 'Gemini 2.5 Pro' },
],
});
});
/**
* GET /extract/sidecar-health — Check Python sidecar status.
*/
app.get('/extract/sidecar-health', async (_req, reply) => {
try {
const health = await sidecarHealth();
return reply.send({ status: 'ok', sidecar: health });
} catch (err) {
const message = err instanceof Error ? err.message : 'Sidecar unavailable';
return reply.status(503).send({ status: 'error', error: message });
}
});
}