learning_ai_common_plat/services/extraction-service/python/tests/test_models.py
saravanakumardb1 c9d5c0caed feat(extraction): integration tests + Python tests + fix langextract API
- 6 route integration tests (mock sidecar via vitest vi.mock)
- 12 task CRUD route tests (mock repository)
- 29 Python tests: 10 extractor, 12 models, 7 app endpoints
- Fix extractor.py: correct lx.extract() API (text_or_documents positional, prompt_description)
- Mock fallback when no GEMINI_API_KEY or USE_MOCK_EXTRACTOR=true
- 46 TS tests + 29 Python tests = 75 total
2026-02-14 13:49:18 -08:00

125 lines
2.9 KiB
Python

"""
Unit tests for Pydantic models.
"""
import pytest
from pydantic import ValidationError
from src.models import (
Extraction,
ExtractionExample,
ExtractRequest,
BatchExtractRequest,
ExtractMetadata,
ExtractResponse,
HealthResponse,
)
def test_extraction_basic():
e = Extraction(extraction_class="topic", extraction_text="meeting")
assert e.extraction_class == "topic"
assert e.attributes is None
def test_extraction_with_attributes():
e = Extraction(
extraction_class="emotion",
extraction_text="stressed",
attributes={"valence": "negative"},
)
assert e.attributes["valence"] == "negative"
def test_extraction_example():
ex = ExtractionExample(
text="sample text",
extractions=[
Extraction(extraction_class="topic", extraction_text="sample"),
],
)
assert len(ex.extractions) == 1
def test_extract_request_minimal():
req = ExtractRequest(text="Hello world")
assert req.text == "Hello world"
assert req.task_id is None
assert req.model_id is None
def test_extract_request_full():
req = ExtractRequest(
text="Test text",
task_id="triage",
task_prompt="Extract entities",
model_id="gemini-2.5-flash",
extraction_passes=2,
max_workers=5,
max_char_buffer=500,
)
assert req.extraction_passes == 2
assert req.max_workers == 5
def test_extract_request_rejects_empty_text():
with pytest.raises(ValidationError):
ExtractRequest(text="")
def test_extract_request_rejects_oversized_text():
with pytest.raises(ValidationError):
ExtractRequest(text="a" * 50_001)
def test_extract_request_rejects_invalid_passes():
with pytest.raises(ValidationError):
ExtractRequest(text="test", extraction_passes=10)
def test_batch_extract_request():
batch = BatchExtractRequest(
requests=[
ExtractRequest(text="doc 1"),
ExtractRequest(text="doc 2"),
]
)
assert len(batch.requests) == 2
def test_batch_extract_request_rejects_empty():
with pytest.raises(ValidationError):
BatchExtractRequest(requests=[])
def test_extract_metadata():
meta = ExtractMetadata(
model_id="gemini-2.5-flash",
duration_ms=150.5,
char_count=42,
)
assert meta.token_count is None
assert meta.duration_ms == 150.5
def test_extract_response():
resp = ExtractResponse(
extractions=[
Extraction(extraction_class="topic", extraction_text="AI"),
],
metadata=ExtractMetadata(
model_id="gemini-2.5-flash",
duration_ms=100,
char_count=20,
),
)
assert len(resp.extractions) == 1
assert resp.metadata.model_id == "gemini-2.5-flash"
def test_health_response_defaults():
h = HealthResponse()
assert h.status == "ok"
assert h.version == "0.1.0"
assert h.sidecar == "langextract"