From f8e15880d2e3c16d65bf3096fddf3d4906218d47 Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Mon, 6 Apr 2026 11:11:17 -0700 Subject: [PATCH] =?UTF-8?q?feat(extraction):=20add=20transcribe()=20to=20E?= =?UTF-8?q?xtractionClient=20=E2=80=94=20reusable=20speech-to-text=20clien?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - TranscribeRequest/TranscribeResponse types exported from @bytelyst/extraction - transcribe() method on ExtractionClient calls POST /api/transcribe - 3 new client tests (correct body, optional fields, error propagation) - Package test count: 16 → 19 --- .../src/__tests__/extraction.test.ts | 75 +++++++++++++++++-- packages/extraction/src/client.ts | 12 +++ packages/extraction/src/index.ts | 2 + packages/extraction/src/types.ts | 32 ++++++++ 4 files changed, 114 insertions(+), 7 deletions(-) diff --git a/packages/extraction/src/__tests__/extraction.test.ts b/packages/extraction/src/__tests__/extraction.test.ts index 25e31cf5..5c15c986 100644 --- a/packages/extraction/src/__tests__/extraction.test.ts +++ b/packages/extraction/src/__tests__/extraction.test.ts @@ -41,9 +41,7 @@ describe('createExtractionClient', () => { describe('extract', () => { it('calls POST /api/extract with correct body', async () => { const mockResponse: ExtractResponse = { - extractions: [ - { extraction_class: 'person', extraction_text: 'John' }, - ], + extractions: [{ extraction_class: 'person', extraction_text: 'John' }], metadata: { modelId: 'gemini-1.5', durationMs: 150, charCount: 35 }, }; mockApiFetch.mockResolvedValue(mockResponse); @@ -73,7 +71,9 @@ describe('createExtractionClient', () => { modelId: 'gpt-4', productId: 'lysnrai', options: { extractionPasses: 2, maxWorkers: 4, maxCharBuffer: 1000 }, - examples: [{ text: 'Hi Bob', extractions: [{ extraction_class: 'person', extraction_text: 'Bob' }] }], + examples: [ + { text: 'Hi Bob', extractions: [{ extraction_class: 'person', extraction_text: 'Bob' }] }, + ], }; await client.extract(req); @@ -94,9 +94,7 @@ describe('createExtractionClient', () => { describe('extractBatch', () => { it('calls POST /api/extract/batch with correct body', async () => { const mockResponse: BatchExtractResponse = { - results: [ - { extractions: [], metadata: { modelId: 'test', durationMs: 10, charCount: 5 } }, - ], + results: [{ extractions: [], metadata: { modelId: 'test', durationMs: 10, charCount: 5 } }], requestId: 'req-123', }; mockApiFetch.mockResolvedValue(mockResponse); @@ -199,6 +197,69 @@ describe('createExtractionClient', () => { }); }); + describe('transcribe', () => { + it('calls POST /api/transcribe with correct body', async () => { + const mockResponse = { + text: 'Hello, this is a test recording.', + language: 'en', + durationSeconds: 5.2, + model: 'whisper-1', + durationMs: 1200, + }; + mockApiFetch.mockResolvedValue(mockResponse); + + const client = createExtractionClient({ baseUrl: 'http://localhost:4005' }); + const req = { + audioUrl: 'https://blob.example.com/audio.mp3', + language: 'en', + productId: 'notelett', + }; + + const result = await client.transcribe(req); + + expect(mockApiFetch).toHaveBeenCalledWith('/api/transcribe', { + method: 'POST', + body: JSON.stringify(req), + }); + expect(result.text).toBe('Hello, this is a test recording.'); + expect(result.language).toBe('en'); + expect(result.durationSeconds).toBe(5.2); + }); + + it('passes optional model and prompt fields', async () => { + mockApiFetch.mockResolvedValue({ + text: 'test', + language: null, + durationSeconds: null, + model: 'whisper-1', + durationMs: 100, + }); + const client = createExtractionClient({ baseUrl: 'http://localhost:4005' }); + + const req = { + audioUrl: 'https://blob.example.com/meeting.wav', + model: 'whisper-1', + prompt: 'Technical meeting about software architecture.', + responseFormat: 'verbose_json' as const, + }; + + await client.transcribe(req); + expect(mockApiFetch).toHaveBeenCalledWith('/api/transcribe', { + method: 'POST', + body: JSON.stringify(req), + }); + }); + + it('propagates errors from api client', async () => { + mockApiFetch.mockRejectedValue(new Error('Service unavailable')); + const client = createExtractionClient({ baseUrl: 'http://localhost:4005' }); + + await expect( + client.transcribe({ audioUrl: 'https://blob.example.com/audio.mp3' }) + ).rejects.toThrow('Service unavailable'); + }); + }); + describe('config options', () => { it('passes getToken to createApiClient', async () => { const { createApiClient } = await import('@bytelyst/api-client'); diff --git a/packages/extraction/src/client.ts b/packages/extraction/src/client.ts index 8d037a27..c9de8e48 100644 --- a/packages/extraction/src/client.ts +++ b/packages/extraction/src/client.ts @@ -12,6 +12,8 @@ import type { BatchExtractRequest, BatchExtractResponse, ExtractionTask, + TranscribeRequest, + TranscribeResponse, } from './types.js'; export interface ExtractionClient { @@ -26,6 +28,9 @@ export interface ExtractionClient { /** Get a single task by ID. */ getTask(id: string, productId?: string): Promise; + + /** Transcribe audio from a URL via OpenAI Whisper API. */ + transcribe(req: TranscribeRequest): Promise; } /** @@ -74,5 +79,12 @@ export function createExtractionClient(config: ExtractionClientConfig): Extracti const qs = productId ? `?productId=${encodeURIComponent(productId)}` : ''; return api.fetch(`/api/tasks/${encodeURIComponent(id)}${qs}`); }, + + async transcribe(req: TranscribeRequest): Promise { + return api.fetch('/api/transcribe', { + method: 'POST', + body: JSON.stringify(req), + }); + }, }; } diff --git a/packages/extraction/src/index.ts b/packages/extraction/src/index.ts index 4d8eff76..e8dbffb7 100644 --- a/packages/extraction/src/index.ts +++ b/packages/extraction/src/index.ts @@ -8,5 +8,7 @@ export type { ExtractResponse, BatchExtractRequest, BatchExtractResponse, + TranscribeRequest, + TranscribeResponse, ExtractionClientConfig, } from './types.js'; diff --git a/packages/extraction/src/types.ts b/packages/extraction/src/types.ts index 46aa0be2..a3fbab39 100644 --- a/packages/extraction/src/types.ts +++ b/packages/extraction/src/types.ts @@ -70,6 +70,38 @@ export interface BatchExtractResponse { requestId?: string; } +// ── Transcription types ───────────────────────────────────────── + +export interface TranscribeRequest { + /** URL of the audio file (e.g. Azure Blob SAS URL). */ + audioUrl: string; + /** Override the Whisper model (default: whisper-1). */ + model?: string; + /** ISO 639-1 language hint (e.g. 'en', 'es'). Improves accuracy. */ + language?: string; + /** Optional prompt to guide the transcription style. */ + prompt?: string; + /** Response format: 'text' | 'json' | 'verbose_json'. */ + responseFormat?: 'text' | 'json' | 'verbose_json'; + /** Product ID for scoping / rate limiting. */ + productId?: string; +} + +export interface TranscribeResponse { + /** The transcribed text. */ + text: string; + /** Detected or specified language code. */ + language: string | null; + /** Duration of the audio in seconds (when available). */ + durationSeconds: number | null; + /** Whisper model used. */ + model: string; + /** Processing time in milliseconds. */ + durationMs: number; + /** Request ID for tracing. */ + requestId?: string; +} + // ── Client config ─────────────────────────────────────────────── export interface ExtractionClientConfig {