BUG 1: Azure locale derivation produced 'en-EN' (invalid) for 2-letter codes. → Added toAzureLocale() with 28-language mapping table (en→en-US, pt→pt-BR, etc.) → Exported for testing; falls back to code-CODE for unmapped languages. BUG 2: model field from request schema was silently dropped after provider refactor. → Added optional model field to TranscriptionInput interface. → OpenAI provider now uses input.model override (falls back to config.model). → Route passes model through to provider.transcribe(). GAP 4: SUPPORTED_AUDIO_TYPES was defined but never validated against. → Route now rejects unsupported content-types with a clear error message. → Allows application/octet-stream (Azure Blob SAS URLs often return this). GAP 5: Client JSDoc still said 'via OpenAI Whisper API' — now 'via configured STT provider'. GAP 8: Azure WAV content-type hardcoded samplerate=16000 — now generic audio/wav. Tests: 42 transcription tests (was 35), 178 total passing. → toAzureLocale: 4 tests (locale mapping, passthrough, fallback, case-insensitive) → setSTT: 1 test (singleton override) → model passthrough: 2 tests (mock ignores, input accepts)
91 lines
2.6 KiB
TypeScript
91 lines
2.6 KiB
TypeScript
/**
|
|
* Extraction service client factory.
|
|
* Uses @bytelyst/api-client under the hood for consistent auth token injection.
|
|
*/
|
|
|
|
import { createApiClient } from '@bytelyst/api-client';
|
|
|
|
import type {
|
|
ExtractionClientConfig,
|
|
ExtractRequest,
|
|
ExtractResponse,
|
|
BatchExtractRequest,
|
|
BatchExtractResponse,
|
|
ExtractionTask,
|
|
TranscribeRequest,
|
|
TranscribeResponse,
|
|
} from './types.js';
|
|
|
|
export interface ExtractionClient {
|
|
/** Single document extraction. */
|
|
extract(req: ExtractRequest): Promise<ExtractResponse>;
|
|
|
|
/** Batch extraction (multiple inputs, shared config). */
|
|
extractBatch(req: BatchExtractRequest): Promise<BatchExtractResponse>;
|
|
|
|
/** List available extraction tasks. */
|
|
listTasks(productId?: string): Promise<ExtractionTask[]>;
|
|
|
|
/** Get a single task by ID. */
|
|
getTask(id: string, productId?: string): Promise<ExtractionTask>;
|
|
|
|
/** Transcribe audio from a URL via the configured STT provider. */
|
|
transcribe(req: TranscribeRequest): Promise<TranscribeResponse>;
|
|
}
|
|
|
|
/**
|
|
* Create a typed extraction service client.
|
|
*
|
|
* @example
|
|
* ```ts
|
|
* const client = createExtractionClient({
|
|
* baseUrl: "http://localhost:4005",
|
|
* getToken: () => localStorage.getItem("access_token"),
|
|
* });
|
|
*
|
|
* const result = await client.extract({
|
|
* text: "John said we should ship by Friday.",
|
|
* taskId: "transcript-extraction",
|
|
* });
|
|
* ```
|
|
*/
|
|
export function createExtractionClient(config: ExtractionClientConfig): ExtractionClient {
|
|
const api = createApiClient({
|
|
baseUrl: config.baseUrl,
|
|
getToken: config.getToken,
|
|
});
|
|
|
|
return {
|
|
async extract(req: ExtractRequest): Promise<ExtractResponse> {
|
|
return api.fetch<ExtractResponse>('/api/extract', {
|
|
method: 'POST',
|
|
body: JSON.stringify(req),
|
|
});
|
|
},
|
|
|
|
async extractBatch(req: BatchExtractRequest): Promise<BatchExtractResponse> {
|
|
return api.fetch<BatchExtractResponse>('/api/extract/batch', {
|
|
method: 'POST',
|
|
body: JSON.stringify(req),
|
|
});
|
|
},
|
|
|
|
async listTasks(productId?: string): Promise<ExtractionTask[]> {
|
|
const qs = productId ? `?productId=${encodeURIComponent(productId)}` : '';
|
|
return api.fetch<ExtractionTask[]>(`/api/tasks${qs}`);
|
|
},
|
|
|
|
async getTask(id: string, productId?: string): Promise<ExtractionTask> {
|
|
const qs = productId ? `?productId=${encodeURIComponent(productId)}` : '';
|
|
return api.fetch<ExtractionTask>(`/api/tasks/${encodeURIComponent(id)}${qs}`);
|
|
},
|
|
|
|
async transcribe(req: TranscribeRequest): Promise<TranscribeResponse> {
|
|
return api.fetch<TranscribeResponse>('/api/transcribe', {
|
|
method: 'POST',
|
|
body: JSON.stringify(req),
|
|
});
|
|
},
|
|
};
|
|
}
|