learning_ai_common_plat/services/extraction-service/src/lib/config.ts
saravanakumardb1 a77b3ff931 refactor(extraction-service): provider-agnostic transcription — OpenAI + Azure Speech + Mock
- TranscriptionProvider interface with transcribe() + isConfigured()
- OpenAITranscriptionProvider: Whisper API (existing behavior)
- AzureTranscriptionProvider: Azure Speech REST API for short audio
- MockTranscriptionProvider: deterministic results for testing
- Factory: getSTT() singleton with env-driven auto-detection
  - STT_PROVIDER=openai|azure|mock (explicit)
  - Auto-detect: AZURE_SPEECH_KEY → azure, OPENAI_API_KEY → openai, else mock
- Config: add STT_PROVIDER, AZURE_SPEECH_KEY, AZURE_SPEECH_REGION env vars
- Route refactored: audio download (common) → provider.transcribe() (swappable)
- deriveFilename() extracted to types.ts (shared by route + providers)
- 35 transcription tests (was 12), 171 total passing
- Follows same pattern as @bytelyst/llm provider abstraction
2026-04-06 11:30:22 -07:00

31 lines
1.4 KiB
TypeScript

import { z } from 'zod';
const envSchema = z.object({
PORT: z.coerce.number().default(4005),
HOST: z.string().default('0.0.0.0'),
NODE_ENV: z.enum(['development', 'production', 'test']).default('development'),
CORS_ORIGIN: z.string().optional(),
SERVICE_NAME: z.string().default('extraction-service'),
COSMOS_ENDPOINT: z.string().min(1, 'COSMOS_ENDPOINT is required'),
COSMOS_KEY: z.string().min(1, 'COSMOS_KEY is required'),
COSMOS_DATABASE: z.string().default('lysnrai'),
JWT_SECRET: z.string().min(1, 'JWT_SECRET is required'),
DEFAULT_PRODUCT_ID: z.string().default('lysnrai'),
PYTHON_SIDECAR_URL: z.string().default('http://localhost:4006'),
DEFAULT_MODEL_ID: z.string().default('gemini-2.5-flash'),
EXTRACTION_QUEUE_BACKEND: z.enum(['memory', 'file']).default('file'),
PRODUCT_RATE_LIMIT_STORE: z.enum(['memory', 'valkey']).default('memory'),
VALKEY_URL: z.string().optional(),
EXTRACTION_QUEUE_FILE: z.string().optional(),
EXTRACTION_QUEUE_POLL_MS: z.coerce.number().default(100),
EXTRACTION_QUEUE_LEASE_MS: z.coerce.number().default(30000),
STT_PROVIDER: z.enum(['openai', 'azure', 'mock']).optional(),
OPENAI_API_KEY: z.string().optional(),
OPENAI_BASE_URL: z.string().default('https://api.openai.com/v1'),
WHISPER_MODEL: z.string().default('whisper-1'),
AZURE_SPEECH_KEY: z.string().optional(),
AZURE_SPEECH_REGION: z.string().optional(),
});
export const config = envSchema.parse(process.env);