- extraction-service: Fastify scaffold (port 4005) with extract/tasks modules - src/lib/: config, errors, cosmos, product-config, python-bridge - src/modules/extract/: types (Zod schemas), routes (POST /extract, batch, models) - src/modules/tasks/: types, repository (Cosmos CRUD), routes (CRUD endpoints) - Python sidecar: FastAPI app, LangExtract wrapper, models, task registry - @bytelyst/extraction package: types, client factory, index exports - Both pnpm build pass clean
146 lines
3.5 KiB
TypeScript
146 lines
3.5 KiB
TypeScript
/**
|
|
* HTTP bridge to the Python sidecar running LangExtract.
|
|
*
|
|
* The sidecar exposes FastAPI endpoints on an internal port (default 4006).
|
|
* This module provides a typed client with timeout, retry, and error mapping.
|
|
*/
|
|
|
|
import { config } from './config.js';
|
|
|
|
const SIDECAR_URL = config.PYTHON_SIDECAR_URL;
|
|
const DEFAULT_TIMEOUT_MS = 120_000;
|
|
|
|
export interface SidecarExtractRequest {
|
|
text: string;
|
|
task_id?: string;
|
|
task_prompt?: string;
|
|
examples?: SidecarExample[];
|
|
model_id?: string;
|
|
extraction_passes?: number;
|
|
max_workers?: number;
|
|
max_char_buffer?: number;
|
|
}
|
|
|
|
export interface SidecarExample {
|
|
text: string;
|
|
extractions: SidecarExtraction[];
|
|
}
|
|
|
|
export interface SidecarExtraction {
|
|
extraction_class: string;
|
|
extraction_text: string;
|
|
attributes?: Record<string, string>;
|
|
}
|
|
|
|
export interface SidecarExtractResponse {
|
|
extractions: SidecarExtraction[];
|
|
metadata: {
|
|
model_id: string;
|
|
duration_ms: number;
|
|
token_count?: number;
|
|
char_count: number;
|
|
};
|
|
}
|
|
|
|
export interface SidecarHealthResponse {
|
|
status: string;
|
|
version?: string;
|
|
}
|
|
|
|
/**
|
|
* POST /extract on the Python sidecar.
|
|
*/
|
|
export async function sidecarExtract(
|
|
req: SidecarExtractRequest,
|
|
requestId?: string,
|
|
timeoutMs: number = DEFAULT_TIMEOUT_MS
|
|
): Promise<SidecarExtractResponse> {
|
|
const headers: Record<string, string> = {
|
|
'Content-Type': 'application/json',
|
|
};
|
|
if (requestId) {
|
|
headers['x-request-id'] = requestId;
|
|
}
|
|
|
|
const res = await fetch(`${SIDECAR_URL}/extract`, {
|
|
method: 'POST',
|
|
headers,
|
|
body: JSON.stringify(req),
|
|
signal: AbortSignal.timeout(timeoutMs),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const body = await res.json().catch(() => ({ error: res.statusText }));
|
|
throw new Error(`Sidecar error ${res.status}: ${body.error || body.detail || res.statusText}`);
|
|
}
|
|
|
|
return res.json() as Promise<SidecarExtractResponse>;
|
|
}
|
|
|
|
/**
|
|
* POST /extract/batch on the Python sidecar.
|
|
*/
|
|
export async function sidecarExtractBatch(
|
|
requests: SidecarExtractRequest[],
|
|
requestId?: string,
|
|
timeoutMs: number = DEFAULT_TIMEOUT_MS
|
|
): Promise<SidecarExtractResponse[]> {
|
|
const headers: Record<string, string> = {
|
|
'Content-Type': 'application/json',
|
|
};
|
|
if (requestId) {
|
|
headers['x-request-id'] = requestId;
|
|
}
|
|
|
|
const res = await fetch(`${SIDECAR_URL}/extract/batch`, {
|
|
method: 'POST',
|
|
headers,
|
|
body: JSON.stringify({ requests }),
|
|
signal: AbortSignal.timeout(timeoutMs),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const body = await res.json().catch(() => ({ error: res.statusText }));
|
|
throw new Error(
|
|
`Sidecar batch error ${res.status}: ${body.error || body.detail || res.statusText}`
|
|
);
|
|
}
|
|
|
|
return res.json() as Promise<SidecarExtractResponse[]>;
|
|
}
|
|
|
|
/**
|
|
* GET /health on the Python sidecar.
|
|
*/
|
|
export async function sidecarHealth(): Promise<SidecarHealthResponse> {
|
|
const res = await fetch(`${SIDECAR_URL}/health`, {
|
|
signal: AbortSignal.timeout(5_000),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
throw new Error(`Sidecar health check failed: ${res.status}`);
|
|
}
|
|
|
|
return res.json() as Promise<SidecarHealthResponse>;
|
|
}
|
|
|
|
/**
|
|
* Wait for the Python sidecar to become ready (poll /health).
|
|
*/
|
|
export async function waitForSidecar(
|
|
maxRetries: number = 30,
|
|
intervalMs: number = 2_000
|
|
): Promise<boolean> {
|
|
for (let i = 0; i < maxRetries; i++) {
|
|
try {
|
|
await sidecarHealth();
|
|
return true;
|
|
} catch {
|
|
if (i < maxRetries - 1) {
|
|
await new Promise(resolve => globalThis.setTimeout(resolve, intervalMs));
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|