learning_ai_common_plat/packages/extraction/src/client.ts
saravanakumardb1 031e910607 fix(extraction-service): review fixes — locale mapping, model passthrough, content-type validation
BUG 1: Azure locale derivation produced 'en-EN' (invalid) for 2-letter codes.
  → Added toAzureLocale() with 28-language mapping table (en→en-US, pt→pt-BR, etc.)
  → Exported for testing; falls back to code-CODE for unmapped languages.

BUG 2: model field from request schema was silently dropped after provider refactor.
  → Added optional model field to TranscriptionInput interface.
  → OpenAI provider now uses input.model override (falls back to config.model).
  → Route passes model through to provider.transcribe().

GAP 4: SUPPORTED_AUDIO_TYPES was defined but never validated against.
  → Route now rejects unsupported content-types with a clear error message.
  → Allows application/octet-stream (Azure Blob SAS URLs often return this).

GAP 5: Client JSDoc still said 'via OpenAI Whisper API' — now 'via configured STT provider'.

GAP 8: Azure WAV content-type hardcoded samplerate=16000 — now generic audio/wav.

Tests: 42 transcription tests (was 35), 178 total passing.
  → toAzureLocale: 4 tests (locale mapping, passthrough, fallback, case-insensitive)
  → setSTT: 1 test (singleton override)
  → model passthrough: 2 tests (mock ignores, input accepts)
2026-04-06 11:40:27 -07:00

91 lines
2.6 KiB
TypeScript

/**
* Extraction service client factory.
* Uses @bytelyst/api-client under the hood for consistent auth token injection.
*/
import { createApiClient } from '@bytelyst/api-client';
import type {
ExtractionClientConfig,
ExtractRequest,
ExtractResponse,
BatchExtractRequest,
BatchExtractResponse,
ExtractionTask,
TranscribeRequest,
TranscribeResponse,
} from './types.js';
export interface ExtractionClient {
/** Single document extraction. */
extract(req: ExtractRequest): Promise<ExtractResponse>;
/** Batch extraction (multiple inputs, shared config). */
extractBatch(req: BatchExtractRequest): Promise<BatchExtractResponse>;
/** List available extraction tasks. */
listTasks(productId?: string): Promise<ExtractionTask[]>;
/** Get a single task by ID. */
getTask(id: string, productId?: string): Promise<ExtractionTask>;
/** Transcribe audio from a URL via the configured STT provider. */
transcribe(req: TranscribeRequest): Promise<TranscribeResponse>;
}
/**
* Create a typed extraction service client.
*
* @example
* ```ts
* const client = createExtractionClient({
* baseUrl: "http://localhost:4005",
* getToken: () => localStorage.getItem("access_token"),
* });
*
* const result = await client.extract({
* text: "John said we should ship by Friday.",
* taskId: "transcript-extraction",
* });
* ```
*/
export function createExtractionClient(config: ExtractionClientConfig): ExtractionClient {
const api = createApiClient({
baseUrl: config.baseUrl,
getToken: config.getToken,
});
return {
async extract(req: ExtractRequest): Promise<ExtractResponse> {
return api.fetch<ExtractResponse>('/api/extract', {
method: 'POST',
body: JSON.stringify(req),
});
},
async extractBatch(req: BatchExtractRequest): Promise<BatchExtractResponse> {
return api.fetch<BatchExtractResponse>('/api/extract/batch', {
method: 'POST',
body: JSON.stringify(req),
});
},
async listTasks(productId?: string): Promise<ExtractionTask[]> {
const qs = productId ? `?productId=${encodeURIComponent(productId)}` : '';
return api.fetch<ExtractionTask[]>(`/api/tasks${qs}`);
},
async getTask(id: string, productId?: string): Promise<ExtractionTask> {
const qs = productId ? `?productId=${encodeURIComponent(productId)}` : '';
return api.fetch<ExtractionTask>(`/api/tasks/${encodeURIComponent(id)}${qs}`);
},
async transcribe(req: TranscribeRequest): Promise<TranscribeResponse> {
return api.fetch<TranscribeResponse>('/api/transcribe', {
method: 'POST',
body: JSON.stringify(req),
});
},
};
}