/** * TranscriptExtractionPipelineAgent — A2A pipeline for LysnrAI transcript enrichment. * * Agent roster (3 steps): * 1. TranscriptCollectorAgent — list transcripts, filter where extractedAt is null * 2. ExtractionBatchAgent — run extraction on each unprocessed transcript (serial, best-effort) * 3. ExtractionReportAgent — assemble report with counts, errors, sample entities * * MCP tools: * lysnrai.transcripts.runExtractionPipeline(limit?, dryRun?) — run pipeline */ import { randomUUID } from 'node:crypto'; import { z } from 'zod'; import { registerTool } from '../tools/registry.js'; import type { McpToolRequest } from '../tools/types.js'; import { lysnraiTranscriptsList, lysnraiTranscriptRunExtraction, type TranscriptDoc, } from '../../lib/lysnrai-client.js'; import { config } from '../../lib/config.js'; import { trackRunCompleted, trackRunFailed, trackRunStarted, trackStepCompleted, trackStepFailed, trackStepStarted, } from './run-tracker.js'; import { createSupportCaseForRun, recordBudgetSpend } from './governance.js'; // ── Types ────────────────────────────────────────────────────────────────────── interface CollectionResult { totalFetched: number; unextractedIds: string[]; sampleTranscripts: TranscriptDoc[]; } interface BatchResult { processed: number; succeeded: string[]; failed: Array<{ id: string; error: string }>; skipped: boolean; } export interface TranscriptExtractionReport { runId: string; productId: 'lysnrai'; dryRun: boolean; totalFetched: number; unextractedCount: number; processed: number; succeeded: number; failed: number; failedIds: string[]; sampleExtractedIds: string[]; summary: string; generatedAt: string; } // ── Step 1: TranscriptCollectorAgent ────────────────────────────────────────── async function collectUnextracted( limit: number, opts: { token?: string; requestId?: string } ): Promise { const result = await lysnraiTranscriptsList({ limit }, opts); const transcripts = result.transcripts; const unextracted = transcripts.filter(t => !t.extractedAt); return { totalFetched: transcripts.length, unextractedIds: unextracted.map(t => t.id), sampleTranscripts: unextracted.slice(0, 5), }; } // ── Step 2: ExtractionBatchAgent ────────────────────────────────────────────── async function runExtractionBatch( transcriptIds: string[], dryRun: boolean, opts: { token?: string; requestId?: string } ): Promise { if (dryRun || transcriptIds.length === 0) { return { processed: 0, succeeded: [], failed: [], skipped: dryRun, }; } const succeeded: string[] = []; const failed: Array<{ id: string; error: string }> = []; for (const id of transcriptIds) { try { await lysnraiTranscriptRunExtraction(id, opts); succeeded.push(id); } catch (err) { failed.push({ id, error: err instanceof Error ? err.message : String(err) }); } } return { processed: transcriptIds.length, succeeded, failed, skipped: false }; } // ── Step 3: ExtractionReportAgent ───────────────────────────────────────────── function buildReport( runId: string, dryRun: boolean, collection: CollectionResult, batch: BatchResult ): TranscriptExtractionReport { const now = new Date().toISOString(); const unextractedCount = collection.unextractedIds.length; let summary: string; if (dryRun) { summary = `DRY RUN: Found ${unextractedCount} unextracted transcripts out of ${collection.totalFetched} fetched. No extraction was run.`; } else if (unextractedCount === 0) { summary = `All ${collection.totalFetched} transcripts are already extracted. Nothing to do.`; } else { const failNote = batch.failed.length > 0 ? ` ${batch.failed.length} failed.` : ''; summary = `Extracted ${batch.succeeded.length}/${unextractedCount} transcripts.${failNote}`; } return { runId, productId: 'lysnrai', dryRun, totalFetched: collection.totalFetched, unextractedCount, processed: batch.processed, succeeded: batch.succeeded.length, failed: batch.failed.length, failedIds: batch.failed.map(f => f.id), sampleExtractedIds: batch.succeeded.slice(0, 5), summary, generatedAt: now, }; } // ── Pipeline runner ──────────────────────────────────────────────────────────── export async function runTranscriptExtractionPipeline( limit: number, dryRun: boolean, req: McpToolRequest ): Promise { const runId = randomUUID(); const opts = { token: req.headers.authorization?.slice(7), requestId: req.id, }; let currentStep: { stepName: 'collect' | 'batch' | 'report'; order: number } | undefined; await safeTrack(() => trackRunStarted({ runId, productId: 'lysnrai', name: 'transcript-extraction-pipeline', requestId: req.id, token: opts.token, input: { limit, dryRun }, }) ); try { currentStep = { stepName: 'collect', order: 1 }; const collectStep = currentStep; await safeTrack(() => trackStepStarted({ runId, productId: 'lysnrai', stepName: collectStep.stepName, order: collectStep.order, token: opts.token, requestId: req.id, input: { limit, dryRun }, }) ); req.log.info({ runId, stepId: 'collect', limit, dryRun }, 'TranscriptCollectorAgent start'); const collection = await collectUnextracted(limit, opts); await safeTrack(() => trackStepCompleted({ runId, productId: 'lysnrai', stepName: collectStep.stepName, order: collectStep.order, token: opts.token, requestId: req.id, output: { totalFetched: collection.totalFetched, unextractedCount: collection.unextractedIds.length, }, }) ); req.log.info( { runId, stepId: 'collect', totalFetched: collection.totalFetched, unextractedCount: collection.unextractedIds.length, }, 'TranscriptCollectorAgent done' ); currentStep = { stepName: 'batch', order: 2 }; const batchStep = currentStep; await safeTrack(() => trackStepStarted({ runId, productId: 'lysnrai', stepName: batchStep.stepName, order: batchStep.order, token: opts.token, requestId: req.id, input: { count: collection.unextractedIds.length, dryRun }, }) ); req.log.info( { runId, stepId: 'batch', count: collection.unextractedIds.length, dryRun }, 'ExtractionBatchAgent start' ); const batch = await runExtractionBatch(collection.unextractedIds, dryRun, opts); await safeTrack(() => trackStepCompleted({ runId, productId: 'lysnrai', stepName: batchStep.stepName, order: batchStep.order, token: opts.token, requestId: req.id, output: { succeeded: batch.succeeded.length, failed: batch.failed.length }, }) ); await recordBudgetSpend( { productId: 'lysnrai', runId, source: 'a2a.transcript_extraction', costUsd: Number((batch.succeeded.length * 0.002).toFixed(6)), tokensUsed: batch.succeeded.length * 1000, }, opts ); req.log.info( { runId, stepId: 'batch', succeeded: batch.succeeded.length, failed: batch.failed.length }, 'ExtractionBatchAgent done' ); currentStep = { stepName: 'report', order: 3 }; const reportStep = currentStep; await safeTrack(() => trackStepStarted({ runId, productId: 'lysnrai', stepName: reportStep.stepName, order: reportStep.order, token: opts.token, requestId: req.id, }) ); req.log.info({ runId, stepId: 'report' }, 'ExtractionReportAgent start'); const report = buildReport(runId, dryRun, collection, batch); await safeTrack(() => trackStepCompleted({ runId, productId: 'lysnrai', stepName: reportStep.stepName, order: reportStep.order, token: opts.token, requestId: req.id, output: { succeeded: report.succeeded, failed: report.failed }, }) ); await safeTrack(() => trackRunCompleted({ runId, productId: 'lysnrai', name: 'transcript-extraction-pipeline', requestId: req.id, token: opts.token, output: { succeeded: report.succeeded, failed: report.failed }, }) ); if (report.failed > 0) { await createSupportCaseForRun( { productId: 'lysnrai', runId, title: 'Transcript extraction pipeline reported failures', description: report.summary, priority: report.failed >= 5 ? 'high' : 'medium', tags: ['a2a', 'transcripts', 'extraction'], }, opts ); } req.log.info( { runId, stepId: 'report', summary: report.summary }, 'ExtractionReportAgent done' ); return report; } catch (error) { const message = error instanceof Error ? error.message : String(error); if (currentStep) { const failedStep = currentStep; await safeTrack(() => trackStepFailed({ runId, productId: 'lysnrai', stepName: failedStep.stepName, order: failedStep.order, token: opts.token, requestId: req.id, error: message, }) ); } await safeTrack(() => trackRunFailed({ runId, productId: 'lysnrai', name: 'transcript-extraction-pipeline', requestId: req.id, token: opts.token, error: message, }) ); throw error; } } async function safeTrack(fn: () => Promise): Promise { try { await fn(); } catch { // Tracking must never fail the pipeline itself. } } // ── MCP tool registration ───────────────────────────────────────────────────── registerTool({ name: 'lysnrai.transcripts.runExtractionPipeline', description: 'A2A pipeline: fetches LysnrAI transcripts missing extraction data, runs the extraction service on each, and returns a report with counts and failures. Use dryRun=true to preview without running extraction. Requires admin role.', requiredRole: 'admin', inputSchema: z.object({ limit: z.coerce .number() .min(1) .max(config.QUERY_MAX_LIMIT) .default(config.QUERY_DEFAULT_LIMIT) .describe('Max transcripts to fetch and process per run'), dryRun: z .boolean() .default(false) .describe('If true, only collect and count — do not run extraction'), }), async execute(args, req) { return runTranscriptExtractionPipeline(args.limit, args.dryRun, req); }, });