learning_ai_common_plat/services/extraction-service/python/src/models.py
saravanakumardb1 5c1744d3a4 feat(extraction): Phase 6 advanced features (6.1-6.8)
- 6.1-6.2: Entity visualization components (bar chart, pie chart, timeline) [in LysnrAI repo]
- 6.3-6.4: Async job queue — POST /extract/jobs, GET /extract/jobs/:id, GET /extract/jobs
- 6.5-6.6: Model registry with tier (standard/premium/free/mock) + GET /extract/models
- 6.7-6.8: Multi-language detection (es/fr/de/pt/ja/zh/ko/ar) + prompt enrichment
- ExtractMetadata.language field added to Python models
- 46 TS tests passing, build clean
2026-02-14 14:08:02 -08:00

54 lines
1.3 KiB
Python

"""
Pydantic models for the extraction sidecar API.
These mirror the Zod schemas in the TypeScript service.
"""
from __future__ import annotations
from pydantic import BaseModel, Field
class Extraction(BaseModel):
extraction_class: str
extraction_text: str
attributes: dict[str, str] | None = None
class ExtractionExample(BaseModel):
text: str
extractions: list[Extraction]
class ExtractRequest(BaseModel):
text: str = Field(..., min_length=1, max_length=50_000)
task_id: str | None = None
task_prompt: str | None = None
examples: list[ExtractionExample] | None = None
model_id: str | None = None
extraction_passes: int | None = Field(None, ge=1, le=5)
max_workers: int | None = Field(None, ge=1, le=50)
max_char_buffer: int | None = Field(None, ge=100, le=10_000)
class BatchExtractRequest(BaseModel):
requests: list[ExtractRequest] = Field(..., min_length=1, max_length=50)
class ExtractMetadata(BaseModel):
model_id: str
duration_ms: float
token_count: int | None = None
char_count: int
language: str | None = None
class ExtractResponse(BaseModel):
extractions: list[Extraction]
metadata: ExtractMetadata
class HealthResponse(BaseModel):
status: str = "ok"
version: str = "0.1.0"
sidecar: str = "langextract"