learning_ai_common_plat/services/mcp-server/src/modules/extraction/extraction-tools.ts
saravanakumardb1 c8fafbb564 feat(mcp-server): Add 7 missing extraction async jobs tools
- extraction.extractBatch: batch extraction with shared config
- extraction.submitJob: async job submission with webhook support
- extraction.getJob: get job status/results by ID
- extraction.listJobs: list recent async jobs
- extraction.getProductRateLimitStatus: per-product or summary rate limits
- extraction.resetProductRateLimit: admin rate limit reset
- extraction.sidecarMonitoringState: detailed sidecar circuit breaker state

All tools require admin role and map to existing extraction-service endpoints.
Fixes TypeScript optional parameter error in extractionGetProductRateLimitStatus.
2026-03-05 22:05:00 -08:00

209 lines
6.4 KiB
TypeScript

import { z } from 'zod';
import { registerTool } from '../tools/registry.js';
import {
extractionRun,
extractionModels,
extractionCacheStats,
extractionSidecarHealth,
extractionExtractBatch,
extractionSubmitJob,
extractionGetJob,
extractionListJobs,
extractionGetProductRateLimitStatus,
extractionResetProductRateLimit,
extractionSidecarMonitoringState,
} from '../../lib/extraction-client.js';
registerTool({
name: 'extraction.run',
description:
'Run text extraction using the extraction-service. Returns typed ExtractionItem[] array. Requires admin role.',
requiredRole: 'admin',
inputSchema: z.object({
text: z.string().min(1).describe('Text to extract from'),
taskId: z
.string()
.optional()
.describe('Extraction task ID (e.g. triage, memory-insight, reflection-enrichment)'),
modelId: z.string().optional().describe('Override model ID'),
}),
async execute(args, req) {
return extractionRun(args, { requestId: req.id });
},
});
registerTool({
name: 'extraction.models',
description: 'List available extraction model providers. Requires admin role.',
requiredRole: 'admin',
inputSchema: z.object({}),
async execute(_args, req) {
return extractionModels({ requestId: req.id });
},
});
registerTool({
name: 'extraction.cacheStats',
description: 'Get extraction result cache statistics. Requires admin role.',
requiredRole: 'admin',
inputSchema: z.object({}),
async execute(_args, req) {
return extractionCacheStats({ requestId: req.id });
},
});
registerTool({
name: 'extraction.sidecarHealth',
description:
'Check the health of the Python extraction sidecar process. Returns status and last-seen timestamp. Requires admin role.',
requiredRole: 'admin',
inputSchema: z.object({}),
async execute(_args, req) {
return extractionSidecarHealth({ requestId: req.id });
},
});
registerTool({
name: 'extraction.extractBatch',
description:
'Run batch extraction on multiple inputs with shared configuration. Returns array of extraction results. Requires admin role.',
requiredRole: 'admin',
inputSchema: z.object({
inputs: z
.array(
z.object({
text: z.string().min(1).describe('Text to extract from'),
taskId: z.string().optional().describe('Extraction task ID'),
taskPrompt: z.string().optional().describe('Custom task prompt'),
})
)
.min(1)
.describe('Array of extraction inputs'),
examples: z
.array(
z.object({
text: z.string().min(1).describe('Example text'),
extractions: z.array(
z.object({
extraction_class: z.string(),
extraction_text: z.string(),
attributes: z.record(z.string()).optional(),
})
),
})
)
.optional()
.describe('Few-shot examples'),
modelId: z.string().optional().describe('Override model ID'),
}),
async execute(args, req) {
return extractionExtractBatch(args, { requestId: req.id });
},
});
registerTool({
name: 'extraction.submitJob',
description:
'Submit an async batch extraction job. Returns jobId for polling. Supports webhook callbacks. Requires admin role.',
requiredRole: 'admin',
inputSchema: z.object({
inputs: z
.array(
z.object({
text: z.string().min(1).describe('Text to extract from'),
taskId: z.string().optional().describe('Extraction task ID'),
taskPrompt: z.string().optional().describe('Custom task prompt'),
})
)
.min(1)
.describe('Array of extraction inputs'),
examples: z
.array(
z.object({
text: z.string().min(1).describe('Example text'),
extractions: z.array(
z.object({
extraction_class: z.string(),
extraction_text: z.string(),
attributes: z.record(z.string()).optional(),
})
),
})
)
.optional()
.describe('Few-shot examples'),
modelId: z.string().optional().describe('Override model ID'),
productId: z.string().optional().describe('Product ID for rate limiting'),
webhookUrl: z.string().url().optional().describe('Webhook URL for job completion'),
webhookSecret: z.string().optional().describe('Webhook secret for HMAC validation'),
webhookRetryAttempts: z
.number()
.int()
.min(0)
.max(10)
.optional()
.describe('Webhook retry attempts'),
}),
async execute(args, req) {
return extractionSubmitJob(args, { requestId: req.id });
},
});
registerTool({
name: 'extraction.getJob',
description: 'Get status and results of an async extraction job by ID. Requires admin role.',
requiredRole: 'admin',
inputSchema: z.object({
jobId: z.string().min(1).describe('Job ID to retrieve'),
}),
async execute(args, req) {
return extractionGetJob(args.jobId, { requestId: req.id });
},
});
registerTool({
name: 'extraction.listJobs',
description: 'List recent async extraction jobs. Requires admin role.',
requiredRole: 'admin',
inputSchema: z.object({}),
async execute(_args, req) {
return extractionListJobs({ requestId: req.id });
},
});
registerTool({
name: 'extraction.getProductRateLimitStatus',
description:
'Get product rate limit status. Pass productId for specific product, omit for summary of all products. Requires admin role.',
requiredRole: 'admin',
inputSchema: z.object({
productId: z.string().optional().describe('Product ID to check (omit for all products)'),
}),
async execute(args, req) {
return extractionGetProductRateLimitStatus(args.productId, { requestId: req.id });
},
});
registerTool({
name: 'extraction.resetProductRateLimit',
description: 'Reset rate limit for a specific product (admin operation). Requires admin role.',
requiredRole: 'admin',
inputSchema: z.object({
productId: z.string().min(1).describe('Product ID to reset rate limit for'),
}),
async execute(args, req) {
return extractionResetProductRateLimit(args.productId, { requestId: req.id });
},
});
registerTool({
name: 'extraction.sidecarMonitoringState',
description:
'Get detailed sidecar health monitoring state and circuit breaker information. Requires admin role.',
requiredRole: 'admin',
inputSchema: z.object({}),
async execute(_args, req) {
return extractionSidecarMonitoringState({ requestId: req.id });
},
});