feat(extraction): add transcribe() to ExtractionClient — reusable speech-to-text client

- TranscribeRequest/TranscribeResponse types exported from @bytelyst/extraction - transcribe() method on ExtractionClient calls POST /api/transcribe - 3 new client tests (correct body, optional fields, error propagation) - Package test count: 16 → 19
2026-04-06 11:11:17 -07:00 · 2026-04-06 11:11:17 -07:00 · f8e15880d2
commit f8e15880d2
parent cc3fbf8187
4 changed files with 114 additions and 7 deletions
--- a/packages/extraction/src/tests/extraction.test.ts
+++ b/packages/extraction/src/tests/extraction.test.ts
@ -41,9 +41,7 @@ describe('createExtractionClient', () => {
  describe('extract', () => {
    it('calls POST /api/extract with correct body', async () => {
      const mockResponse: ExtractResponse = {
-        extractions: [
-          { extraction_class: 'person', extraction_text: 'John' },
-        ],
+        extractions: [{ extraction_class: 'person', extraction_text: 'John' }],
        metadata: { modelId: 'gemini-1.5', durationMs: 150, charCount: 35 },
      };
      mockApiFetch.mockResolvedValue(mockResponse);
@ -73,7 +71,9 @@ describe('createExtractionClient', () => {
        modelId: 'gpt-4',
        productId: 'lysnrai',
        options: { extractionPasses: 2, maxWorkers: 4, maxCharBuffer: 1000 },
-        examples: [{ text: 'Hi Bob', extractions: [{ extraction_class: 'person', extraction_text: 'Bob' }] }],
+        examples: [
+          { text: 'Hi Bob', extractions: [{ extraction_class: 'person', extraction_text: 'Bob' }] },
+        ],
      };

      await client.extract(req);
@ -94,9 +94,7 @@ describe('createExtractionClient', () => {
  describe('extractBatch', () => {
    it('calls POST /api/extract/batch with correct body', async () => {
      const mockResponse: BatchExtractResponse = {
-        results: [
-          { extractions: [], metadata: { modelId: 'test', durationMs: 10, charCount: 5 } },
-        ],
+        results: [{ extractions: [], metadata: { modelId: 'test', durationMs: 10, charCount: 5 } }],
        requestId: 'req-123',
      };
      mockApiFetch.mockResolvedValue(mockResponse);
@ -199,6 +197,69 @@ describe('createExtractionClient', () => {
    });
  });

+  describe('transcribe', () => {
+    it('calls POST /api/transcribe with correct body', async () => {
+      const mockResponse = {
+        text: 'Hello, this is a test recording.',
+        language: 'en',
+        durationSeconds: 5.2,
+        model: 'whisper-1',
+        durationMs: 1200,
+      };
+      mockApiFetch.mockResolvedValue(mockResponse);
+
+      const client = createExtractionClient({ baseUrl: 'http://localhost:4005' });
+      const req = {
+        audioUrl: 'https://blob.example.com/audio.mp3',
+        language: 'en',
+        productId: 'notelett',
+      };
+
+      const result = await client.transcribe(req);
+
+      expect(mockApiFetch).toHaveBeenCalledWith('/api/transcribe', {
+        method: 'POST',
+        body: JSON.stringify(req),
+      });
+      expect(result.text).toBe('Hello, this is a test recording.');
+      expect(result.language).toBe('en');
+      expect(result.durationSeconds).toBe(5.2);
+    });
+
+    it('passes optional model and prompt fields', async () => {
+      mockApiFetch.mockResolvedValue({
+        text: 'test',
+        language: null,
+        durationSeconds: null,
+        model: 'whisper-1',
+        durationMs: 100,
+      });
+      const client = createExtractionClient({ baseUrl: 'http://localhost:4005' });
+
+      const req = {
+        audioUrl: 'https://blob.example.com/meeting.wav',
+        model: 'whisper-1',
+        prompt: 'Technical meeting about software architecture.',
+        responseFormat: 'verbose_json' as const,
+      };
+
+      await client.transcribe(req);
+      expect(mockApiFetch).toHaveBeenCalledWith('/api/transcribe', {
+        method: 'POST',
+        body: JSON.stringify(req),
+      });
+    });
+
+    it('propagates errors from api client', async () => {
+      mockApiFetch.mockRejectedValue(new Error('Service unavailable'));
+      const client = createExtractionClient({ baseUrl: 'http://localhost:4005' });
+
+      await expect(
+        client.transcribe({ audioUrl: 'https://blob.example.com/audio.mp3' })
+      ).rejects.toThrow('Service unavailable');
+    });
+  });
+
  describe('config options', () => {
    it('passes getToken to createApiClient', async () => {
      const { createApiClient } = await import('@bytelyst/api-client');
--- a/packages/extraction/src/client.ts
+++ b/packages/extraction/src/client.ts
@ -12,6 +12,8 @@ import type {
  BatchExtractRequest,
  BatchExtractResponse,
  ExtractionTask,
+  TranscribeRequest,
+  TranscribeResponse,
 } from './types.js';

 export interface ExtractionClient {
@ -26,6 +28,9 @@ export interface ExtractionClient {

  /** Get a single task by ID. */
  getTask(id: string, productId?: string): Promise<ExtractionTask>;
+
+  /** Transcribe audio from a URL via OpenAI Whisper API. */
+  transcribe(req: TranscribeRequest): Promise<TranscribeResponse>;
 }

 /**
@ -74,5 +79,12 @@ export function createExtractionClient(config: ExtractionClientConfig): Extracti
      const qs = productId ? `?productId=${encodeURIComponent(productId)}` : '';
      return api.fetch<ExtractionTask>(`/api/tasks/${encodeURIComponent(id)}${qs}`);
    },
+
+    async transcribe(req: TranscribeRequest): Promise<TranscribeResponse> {
+      return api.fetch<TranscribeResponse>('/api/transcribe', {
+        method: 'POST',
+        body: JSON.stringify(req),
+      });
+    },
  };
 }
--- a/packages/extraction/src/index.ts
+++ b/packages/extraction/src/index.ts
@ -8,5 +8,7 @@ export type {
  ExtractResponse,
  BatchExtractRequest,
  BatchExtractResponse,
+  TranscribeRequest,
+  TranscribeResponse,
  ExtractionClientConfig,
 } from './types.js';
--- a/packages/extraction/src/types.ts
+++ b/packages/extraction/src/types.ts
@ -70,6 +70,38 @@ export interface BatchExtractResponse {
  requestId?: string;
 }

+// ── Transcription types ─────────────────────────────────────────
+
+export interface TranscribeRequest {
+  /** URL of the audio file (e.g. Azure Blob SAS URL). */
+  audioUrl: string;
+  /** Override the Whisper model (default: whisper-1). */
+  model?: string;
+  /** ISO 639-1 language hint (e.g. 'en', 'es'). Improves accuracy. */
+  language?: string;
+  /** Optional prompt to guide the transcription style. */
+  prompt?: string;
+  /** Response format: 'text' | 'json' | 'verbose_json'. */
+  responseFormat?: 'text' | 'json' | 'verbose_json';
+  /** Product ID for scoping / rate limiting. */
+  productId?: string;
+}
+
+export interface TranscribeResponse {
+  /** The transcribed text. */
+  text: string;
+  /** Detected or specified language code. */
+  language: string | null;
+  /** Duration of the audio in seconds (when available). */
+  durationSeconds: number | null;
+  /** Whisper model used. */
+  model: string;
+  /** Processing time in milliseconds. */
+  durationMs: number;
+  /** Request ID for tracing. */
+  requestId?: string;
+}
+
 // ── Client config ───────────────────────────────────────────────

 export interface ExtractionClientConfig {