diff --git a/services/mcp-server/src/lib/extraction-client.ts b/services/mcp-server/src/lib/extraction-client.ts index 5df3534d..e4809be8 100644 --- a/services/mcp-server/src/lib/extraction-client.ts +++ b/services/mcp-server/src/lib/extraction-client.ts @@ -70,3 +70,163 @@ export async function extractionSidecarHealth(opts: { requestId?: string }): Pro const res = await fetch(url, { headers, signal: AbortSignal.timeout(10_000) }); return res.json(); } + +export async function extractionExtractBatch( + params: { + inputs: Array<{ + text: string; + taskId?: string; + taskPrompt?: string; + }>; + examples?: Array<{ + text: string; + extractions: ExtractionItem[]; + }>; + modelId?: string; + }, + opts: { requestId?: string } +): Promise { + const url = `${config.EXTRACTION_SERVICE_URL}/api/extract/batch`; + const headers: Record = { + 'Content-Type': 'application/json', + ...(opts.requestId ? { 'x-request-id': opts.requestId } : {}), + }; + const res = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify(params), + signal: AbortSignal.timeout(30_000), + }); + if (!res.ok) { + const body = await res.text().catch(() => ''); + throw new Error(`extraction-service POST /api/extract/batch → ${res.status}: ${body}`); + } + return res.json(); +} + +export async function extractionSubmitJob( + params: { + inputs: Array<{ + text: string; + taskId?: string; + taskPrompt?: string; + }>; + examples?: Array<{ + text: string; + extractions: ExtractionItem[]; + }>; + modelId?: string; + productId?: string; + webhookUrl?: string; + webhookSecret?: string; + webhookRetryAttempts?: number; + }, + opts: { requestId?: string } +): Promise { + const url = `${config.EXTRACTION_SERVICE_URL}/api/extract/jobs`; + const headers: Record = { + 'Content-Type': 'application/json', + ...(opts.requestId ? { 'x-request-id': opts.requestId } : {}), + }; + const res = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify(params), + signal: AbortSignal.timeout(30_000), + }); + if (!res.ok) { + const body = await res.text().catch(() => ''); + throw new Error(`extraction-service POST /api/extract/jobs → ${res.status}: ${body}`); + } + return res.json(); +} + +export async function extractionGetJob( + jobId: string, + opts: { requestId?: string } +): Promise { + const url = `${config.EXTRACTION_SERVICE_URL}/api/extract/jobs/${jobId}`; + const headers: Record = { + ...(opts.requestId ? { 'x-request-id': opts.requestId } : {}), + }; + const res = await fetch(url, { headers, signal: AbortSignal.timeout(10_000) }); + if (!res.ok) { + const body = await res.text().catch(() => ''); + throw new Error(`extraction-service GET /api/extract/jobs/${jobId} → ${res.status}: ${body}`); + } + return res.json(); +} + +export async function extractionListJobs(opts: { requestId?: string }): Promise { + const url = `${config.EXTRACTION_SERVICE_URL}/api/extract/jobs`; + const headers: Record = { + ...(opts.requestId ? { 'x-request-id': opts.requestId } : {}), + }; + const res = await fetch(url, { headers, signal: AbortSignal.timeout(10_000) }); + if (!res.ok) { + const body = await res.text().catch(() => ''); + throw new Error(`extraction-service GET /api/extract/jobs → ${res.status}: ${body}`); + } + return res.json(); +} + +export async function extractionGetProductRateLimitStatus( + productId?: string, + opts?: { requestId?: string } +): Promise { + const url = productId + ? `${config.EXTRACTION_SERVICE_URL}/api/extract/rate-limits/product?productId=${encodeURIComponent(productId)}` + : `${config.EXTRACTION_SERVICE_URL}/api/extract/rate-limits/product`; + const headers: Record = { + ...(opts?.requestId ? { 'x-request-id': opts.requestId } : {}), + }; + const res = await fetch(url, { headers, signal: AbortSignal.timeout(10_000) }); + if (!res.ok) { + const body = await res.text().catch(() => ''); + throw new Error( + `extraction-service GET /api/extract/rate-limits/product → ${res.status}: ${body}` + ); + } + return res.json(); +} + +export async function extractionResetProductRateLimit( + productId: string, + opts: { requestId?: string } +): Promise { + const url = `${config.EXTRACTION_SERVICE_URL}/api/extract/rate-limits/product/reset`; + const headers: Record = { + 'Content-Type': 'application/json', + ...(opts.requestId ? { 'x-request-id': opts.requestId } : {}), + }; + const res = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify({ productId }), + signal: AbortSignal.timeout(10_000), + }); + if (!res.ok) { + const body = await res.text().catch(() => ''); + throw new Error( + `extraction-service POST /api/extract/rate-limits/product/reset → ${res.status}: ${body}` + ); + } + return res.json(); +} + +export async function extractionSidecarMonitoringState(opts: { + requestId?: string; +}): Promise { + const url = `${config.EXTRACTION_SERVICE_URL}/api/extract/monitoring/sidecar`; + const headers: Record = { + ...(opts.requestId ? { 'x-request-id': opts.requestId } : {}), + }; + const res = await fetch(url, { headers, signal: AbortSignal.timeout(10_000) }); + if (!res.ok) { + const body = await res.text().catch(() => ''); + throw new Error( + `extraction-service GET /api/extract/monitoring/sidecar → ${res.status}: ${body}` + ); + } + return res.json(); +} diff --git a/services/mcp-server/src/modules/extraction/extraction-tools.ts b/services/mcp-server/src/modules/extraction/extraction-tools.ts index 711fbce3..5b6379eb 100644 --- a/services/mcp-server/src/modules/extraction/extraction-tools.ts +++ b/services/mcp-server/src/modules/extraction/extraction-tools.ts @@ -5,6 +5,13 @@ import { extractionModels, extractionCacheStats, extractionSidecarHealth, + extractionExtractBatch, + extractionSubmitJob, + extractionGetJob, + extractionListJobs, + extractionGetProductRateLimitStatus, + extractionResetProductRateLimit, + extractionSidecarMonitoringState, } from '../../lib/extraction-client.js'; registerTool({ @@ -55,3 +62,147 @@ registerTool({ return extractionSidecarHealth({ requestId: req.id }); }, }); + +registerTool({ + name: 'extraction.extractBatch', + description: + 'Run batch extraction on multiple inputs with shared configuration. Returns array of extraction results. Requires admin role.', + requiredRole: 'admin', + inputSchema: z.object({ + inputs: z + .array( + z.object({ + text: z.string().min(1).describe('Text to extract from'), + taskId: z.string().optional().describe('Extraction task ID'), + taskPrompt: z.string().optional().describe('Custom task prompt'), + }) + ) + .min(1) + .describe('Array of extraction inputs'), + examples: z + .array( + z.object({ + text: z.string().min(1).describe('Example text'), + extractions: z.array( + z.object({ + extraction_class: z.string(), + extraction_text: z.string(), + attributes: z.record(z.string()).optional(), + }) + ), + }) + ) + .optional() + .describe('Few-shot examples'), + modelId: z.string().optional().describe('Override model ID'), + }), + async execute(args, req) { + return extractionExtractBatch(args, { requestId: req.id }); + }, +}); + +registerTool({ + name: 'extraction.submitJob', + description: + 'Submit an async batch extraction job. Returns jobId for polling. Supports webhook callbacks. Requires admin role.', + requiredRole: 'admin', + inputSchema: z.object({ + inputs: z + .array( + z.object({ + text: z.string().min(1).describe('Text to extract from'), + taskId: z.string().optional().describe('Extraction task ID'), + taskPrompt: z.string().optional().describe('Custom task prompt'), + }) + ) + .min(1) + .describe('Array of extraction inputs'), + examples: z + .array( + z.object({ + text: z.string().min(1).describe('Example text'), + extractions: z.array( + z.object({ + extraction_class: z.string(), + extraction_text: z.string(), + attributes: z.record(z.string()).optional(), + }) + ), + }) + ) + .optional() + .describe('Few-shot examples'), + modelId: z.string().optional().describe('Override model ID'), + productId: z.string().optional().describe('Product ID for rate limiting'), + webhookUrl: z.string().url().optional().describe('Webhook URL for job completion'), + webhookSecret: z.string().optional().describe('Webhook secret for HMAC validation'), + webhookRetryAttempts: z + .number() + .int() + .min(0) + .max(10) + .optional() + .describe('Webhook retry attempts'), + }), + async execute(args, req) { + return extractionSubmitJob(args, { requestId: req.id }); + }, +}); + +registerTool({ + name: 'extraction.getJob', + description: 'Get status and results of an async extraction job by ID. Requires admin role.', + requiredRole: 'admin', + inputSchema: z.object({ + jobId: z.string().min(1).describe('Job ID to retrieve'), + }), + async execute(args, req) { + return extractionGetJob(args.jobId, { requestId: req.id }); + }, +}); + +registerTool({ + name: 'extraction.listJobs', + description: 'List recent async extraction jobs. Requires admin role.', + requiredRole: 'admin', + inputSchema: z.object({}), + async execute(_args, req) { + return extractionListJobs({ requestId: req.id }); + }, +}); + +registerTool({ + name: 'extraction.getProductRateLimitStatus', + description: + 'Get product rate limit status. Pass productId for specific product, omit for summary of all products. Requires admin role.', + requiredRole: 'admin', + inputSchema: z.object({ + productId: z.string().optional().describe('Product ID to check (omit for all products)'), + }), + async execute(args, req) { + return extractionGetProductRateLimitStatus(args.productId, { requestId: req.id }); + }, +}); + +registerTool({ + name: 'extraction.resetProductRateLimit', + description: 'Reset rate limit for a specific product (admin operation). Requires admin role.', + requiredRole: 'admin', + inputSchema: z.object({ + productId: z.string().min(1).describe('Product ID to reset rate limit for'), + }), + async execute(args, req) { + return extractionResetProductRateLimit(args.productId, { requestId: req.id }); + }, +}); + +registerTool({ + name: 'extraction.sidecarMonitoringState', + description: + 'Get detailed sidecar health monitoring state and circuit breaker information. Requires admin role.', + requiredRole: 'admin', + inputSchema: z.object({}), + async execute(_args, req) { + return extractionSidecarMonitoringState({ requestId: req.id }); + }, +});