From b69cf33e23080480afd6d468bfbe97de3181c7e9 Mon Sep 17 00:00:00 2001 From: LZL0 <12474488+LZL0@users.noreply.github.com> Date: Wed, 10 Dec 2025 14:39:39 +0100 Subject: [PATCH 1/3] Add pydantic exports. --- README.md | 20 ++ src/l0/pydantic/__init__.py | 258 ++++++++++++++++ src/l0/pydantic/consensus.py | 172 +++++++++++ src/l0/pydantic/drift.py | 47 +++ src/l0/pydantic/errors.py | 125 ++++++++ src/l0/pydantic/event_sourcing.py | 111 +++++++ src/l0/pydantic/events.py | 202 +++++++++++++ src/l0/pydantic/guardrails.py | 130 +++++++++ src/l0/pydantic/metrics.py | 25 ++ src/l0/pydantic/parallel.py | 58 ++++ src/l0/pydantic/pipeline.py | 82 ++++++ src/l0/pydantic/pool.py | 35 +++ src/l0/pydantic/state_machine.py | 36 +++ src/l0/pydantic/types.py | 268 +++++++++++++++++ src/l0/pydantic/window.py | 95 ++++++ tests/test_pydantic_models.py | 469 ++++++++++++++++++++++++++++++ 16 files changed, 2133 insertions(+) create mode 100644 src/l0/pydantic/__init__.py create mode 100644 src/l0/pydantic/consensus.py create mode 100644 src/l0/pydantic/drift.py create mode 100644 src/l0/pydantic/errors.py create mode 100644 src/l0/pydantic/event_sourcing.py create mode 100644 src/l0/pydantic/events.py create mode 100644 src/l0/pydantic/guardrails.py create mode 100644 src/l0/pydantic/metrics.py create mode 100644 src/l0/pydantic/parallel.py create mode 100644 src/l0/pydantic/pipeline.py create mode 100644 src/l0/pydantic/pool.py create mode 100644 src/l0/pydantic/state_machine.py create mode 100644 src/l0/pydantic/types.py create mode 100644 src/l0/pydantic/window.py create mode 100644 tests/test_pydantic_models.py diff --git a/README.md b/README.md index 71a7d21..d5e6c37 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ L0 includes 1,800+ tests covering all major reliability features. | **📍 Last-Known-Good Token Resumption** | `continue_from_last_good_token` resumes from the last checkpoint on timeout or failure. No lost tokens. | | **🧠 Drift Detection** | Detects repetition, stalls, and format drift before corruption propagates. | | **🧱 Structured Output** | Guaranteed-valid JSON with Pydantic. Auto-corrects missing braces, commas, and markdown fences. | +| **📋 Pydantic Validation Models** | All L0 types available as Pydantic models for runtime validation, JSON serialization, and schema generation via `l0.pydantic`. | | **🩹 JSON Auto-Healing** | Automatic correction of truncated or malformed JSON (missing braces, brackets, quotes), and repair of broken Markdown code fences. | | **🛡️ Guardrails** | JSON, Markdown, and pattern validation with fast streaming checks. Delta-only checks run sync; full-content scans defer to async. | | **⚡ Race: Fastest-Model Wins** | Run multiple models or providers in parallel and return the fastest valid stream. Ideal for ultra-low-latency chat. | @@ -258,6 +259,25 @@ for prompt, result in zip(prompts, results): print(f"{prompt}: {result.state.content.strip()}") ``` +### Pydantic Validation Models + +L0 provides Pydantic models for all its types, enabling runtime validation, JSON serialization, and schema generation: + +```python +from l0.pydantic import StateModel, RetryModel, DriftResultModel + +# Validate external data +state = StateModel(content="Hello", token_count=5, completed=True) + +# Serialize to JSON +json_data = state.model_dump_json() + +# Generate JSON schema for documentation or APIs +schema = StateModel.model_json_schema() +``` + +All L0 types have corresponding Pydantic models: `StateModel`, `RetryModel`, `TimeoutModel`, `ConsensusResultModel`, `DriftResultModel`, `MetricsSnapshotModel`, and more. + ## Philosophy - **No magic** - Everything is explicit and predictable diff --git a/src/l0/pydantic/__init__.py b/src/l0/pydantic/__init__.py new file mode 100644 index 0000000..1590cad --- /dev/null +++ b/src/l0/pydantic/__init__.py @@ -0,0 +1,258 @@ +"""Pydantic models for L0 types. + +This module provides Pydantic BaseModel equivalents for all L0 dataclasses, +enabling runtime validation, JSON serialization, and schema generation. + +Usage: + from l0.pydantic import StateModel, RetryModel, TimeoutModel + + # Validate data + state = StateModel(content="Hello", token_count=5) + + # Serialize to JSON + json_data = state.model_dump_json() + + # Generate JSON schema + schema = StateModel.model_json_schema() +""" + +# Core types +# Consensus types +from .consensus import ( + AgreementModel, + AgreementTypeModel, + ConflictResolutionModel, + ConsensusAnalysisModel, + ConsensusOutputModel, + ConsensusPresetModel, + ConsensusResultModel, + DisagreementModel, + DisagreementSeverityModel, + DisagreementValueModel, + FieldAgreementModel, + FieldConsensusModel, + StrategyModel, +) + +# Drift types +from .drift import ( + DriftConfigModel, + DriftResultModel, +) + +# Error types +from .errors import ( + ErrorCodeModel, + ErrorContextModel, + FailureTypeModel, + NetworkErrorAnalysisModel, + NetworkErrorTypeModel, + RecoveryPolicyModel, + RecoveryStrategyModel, + SerializedErrorModel, +) + +# Event sourcing types +from .event_sourcing import ( + EventEnvelopeModel, + RecordedEventModel, + RecordedEventTypeModel, + ReplayComparisonModel, + ReplayedStateModel, + SnapshotModel, + StreamMetadataModel, +) +from .event_sourcing import ( + SerializedErrorModel as EventSourcingSerializedErrorModel, +) + +# Event/observability types +from .events import ( + CompleteEventModel, + ErrorEventModel, + ObservabilityEventModel, + ObservabilityEventTypeModel, + RetryAttemptEventModel, + SessionEndEventModel, + SessionStartEventModel, +) + +# Guardrail types +from .guardrails import ( + GuardrailConfigModel, + GuardrailContextModel, + GuardrailResultModel, + GuardrailResultSummaryModel, + GuardrailStateModel, + GuardrailViolationModel, + JsonAnalysisModel, + LatexAnalysisModel, + MarkdownAnalysisModel, +) + +# Metrics types +from .metrics import ( + MetricsSnapshotModel, +) + +# Parallel types +from .parallel import ( + AggregatedTelemetryModel, + ParallelOptionsModel, + ParallelResultModel, + RaceResultModel, +) + +# Pipeline types +from .pipeline import ( + PipelineOptionsModel, + PipelineResultModel, + PipelineStepModel, + StepContextModel, + StepResultModel, +) + +# Pool types +from .pool import ( + PoolOptionsModel, + PoolStatsModel, +) + +# State machine types +from .state_machine import ( + RuntimeStateModel, + StateTransitionModel, +) +from .types import ( + BackoffStrategyModel, + CheckIntervalsModel, + ContentTypeModel, + DataPayloadModel, + ErrorCategoryModel, + ErrorTypeDelaysModel, + EventModel, + EventTypeModel, + ProgressModel, + RetryableErrorTypeModel, + RetryModel, + StateModel, + TelemetryContinuationModel, + TelemetryDriftModel, + TelemetryGuardrailsModel, + TelemetryMetricsModel, + TelemetryModel, + TelemetryNetworkModel, + TimeoutModel, +) + +# Window types +from .window import ( + ChunkResultModel, + ContextRestorationOptionsModel, + DocumentChunkModel, + ProcessingStatsModel, + WindowConfigModel, + WindowStatsModel, +) + +__all__ = [ + # Core types + "BackoffStrategyModel", + "CheckIntervalsModel", + "ContentTypeModel", + "DataPayloadModel", + "ErrorCategoryModel", + "ErrorTypeDelaysModel", + "EventModel", + "EventTypeModel", + "ProgressModel", + "RetryableErrorTypeModel", + "RetryModel", + "StateModel", + "TelemetryContinuationModel", + "TelemetryDriftModel", + "TelemetryGuardrailsModel", + "TelemetryMetricsModel", + "TelemetryModel", + "TelemetryNetworkModel", + "TimeoutModel", + # Error types + "ErrorCodeModel", + "ErrorContextModel", + "FailureTypeModel", + "NetworkErrorAnalysisModel", + "NetworkErrorTypeModel", + "RecoveryPolicyModel", + "RecoveryStrategyModel", + "SerializedErrorModel", + # Guardrail types + "GuardrailConfigModel", + "GuardrailContextModel", + "GuardrailResultModel", + "GuardrailResultSummaryModel", + "GuardrailStateModel", + "GuardrailViolationModel", + "JsonAnalysisModel", + "LatexAnalysisModel", + "MarkdownAnalysisModel", + # Event/observability types + "CompleteEventModel", + "ErrorEventModel", + "ObservabilityEventModel", + "ObservabilityEventTypeModel", + "RetryAttemptEventModel", + "SessionEndEventModel", + "SessionStartEventModel", + # Consensus types + "AgreementModel", + "AgreementTypeModel", + "ConflictResolutionModel", + "ConsensusAnalysisModel", + "ConsensusOutputModel", + "ConsensusPresetModel", + "ConsensusResultModel", + "DisagreementModel", + "DisagreementSeverityModel", + "DisagreementValueModel", + "FieldAgreementModel", + "FieldConsensusModel", + "StrategyModel", + # Parallel types + "AggregatedTelemetryModel", + "ParallelOptionsModel", + "ParallelResultModel", + "RaceResultModel", + # Pipeline types + "PipelineOptionsModel", + "PipelineResultModel", + "PipelineStepModel", + "StepContextModel", + "StepResultModel", + # Pool types + "PoolOptionsModel", + "PoolStatsModel", + # Window types + "ChunkResultModel", + "ContextRestorationOptionsModel", + "DocumentChunkModel", + "ProcessingStatsModel", + "WindowConfigModel", + "WindowStatsModel", + # Drift types + "DriftConfigModel", + "DriftResultModel", + # State machine types + "RuntimeStateModel", + "StateTransitionModel", + # Event sourcing types + "EventEnvelopeModel", + "EventSourcingSerializedErrorModel", + "RecordedEventModel", + "RecordedEventTypeModel", + "ReplayComparisonModel", + "ReplayedStateModel", + "SnapshotModel", + "StreamMetadataModel", + # Metrics types + "MetricsSnapshotModel", +] diff --git a/src/l0/pydantic/consensus.py b/src/l0/pydantic/consensus.py new file mode 100644 index 0000000..11e93cf --- /dev/null +++ b/src/l0/pydantic/consensus.py @@ -0,0 +1,172 @@ +"""Pydantic models for L0 consensus types. + +These models mirror the dataclasses in l0.consensus for runtime validation. +""" + +from __future__ import annotations + +from enum import Enum +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + + +class StrategyModel(str, Enum): + """Consensus strategy.""" + + UNANIMOUS = "unanimous" + MAJORITY = "majority" + WEIGHTED = "weighted" + BEST = "best" + + +class ConflictResolutionModel(str, Enum): + """Conflict resolution strategy.""" + + VOTE = "vote" + MERGE = "merge" + BEST = "best" + FAIL = "fail" + + +class AgreementTypeModel(str, Enum): + """Type of agreement.""" + + EXACT = "exact" + SIMILAR = "similar" + STRUCTURAL = "structural" + SEMANTIC = "semantic" + + +class DisagreementSeverityModel(str, Enum): + """Severity of disagreement.""" + + MINOR = "minor" + MODERATE = "moderate" + MAJOR = "major" + CRITICAL = "critical" + + +class AgreementModel(BaseModel): + """Pydantic model for what outputs agreed on.""" + + model_config = ConfigDict(extra="forbid") + + content: Any + path: str | None = None + count: int = 0 + ratio: float = 0.0 + indices: list[int] = Field(default_factory=list) + type: AgreementTypeModel = AgreementTypeModel.EXACT + + +class DisagreementValueModel(BaseModel): + """Pydantic model for a single value in a disagreement.""" + + model_config = ConfigDict(extra="forbid") + + value: Any + count: int + indices: list[int] + + +class DisagreementModel(BaseModel): + """Pydantic model for where outputs differed.""" + + model_config = ConfigDict(extra="forbid") + + path: str | None = None + values: list[DisagreementValueModel] = Field(default_factory=list) + severity: DisagreementSeverityModel = DisagreementSeverityModel.MINOR + resolution: str | None = None + resolution_confidence: float | None = None + + +class ConsensusAnalysisModel(BaseModel): + """Pydantic model for detailed consensus statistics.""" + + model_config = ConfigDict(extra="forbid") + + total_outputs: int = 0 + successful_outputs: int = 0 + failed_outputs: int = 0 + identical_outputs: int = 0 + similarity_matrix: list[list[float]] = Field(default_factory=list) + average_similarity: float = 0.0 + min_similarity: float = 0.0 + max_similarity: float = 0.0 + total_agreements: int = 0 + total_disagreements: int = 0 + strategy: str = "" + conflict_resolution: str = "" + duration_ms: float = 0.0 + + +class FieldAgreementModel(BaseModel): + """Pydantic model for per-field consensus information.""" + + model_config = ConfigDict(extra="forbid") + + path: str + value: Any + agreement: float + votes: dict[str, int] + values: list[Any] + unanimous: bool + confidence: float + + +class FieldConsensusModel(BaseModel): + """Pydantic model for field-by-field consensus.""" + + model_config = ConfigDict(extra="forbid") + + fields: dict[str, FieldAgreementModel] = Field(default_factory=dict) + overall_agreement: float = 0.0 + agreed_fields: list[str] = Field(default_factory=list) + disagreed_fields: list[str] = Field(default_factory=list) + + +class ConsensusOutputModel(BaseModel): + """Pydantic model for individual output from a stream.""" + + model_config = ConfigDict(extra="forbid") + + index: int + text: str + value: Any + success: bool + data: Any = None + l0_result: Any = None + structured_result: Any = None + error: str | None = None + duration_ms: float = 0.0 + weight: float = 1.0 + similarities: list[float] | None = None + + +class ConsensusResultModel(BaseModel): + """Pydantic model for result of consensus operation.""" + + model_config = ConfigDict(extra="forbid") + + consensus: Any + confidence: float + outputs: list[ConsensusOutputModel] + agreements: list[AgreementModel] + disagreements: list[DisagreementModel] + analysis: ConsensusAnalysisModel + type: Literal["text", "structured"] = "text" + field_consensus: FieldConsensusModel | None = None + status: Literal["success", "partial", "failed"] = "success" + + +class ConsensusPresetModel(BaseModel): + """Pydantic model for preset consensus configuration.""" + + model_config = ConfigDict(extra="forbid") + + strategy: StrategyModel + threshold: float + resolve_conflicts: ConflictResolutionModel + minimum_agreement: float diff --git a/src/l0/pydantic/drift.py b/src/l0/pydantic/drift.py new file mode 100644 index 0000000..21ad25d --- /dev/null +++ b/src/l0/pydantic/drift.py @@ -0,0 +1,47 @@ +"""Pydantic models for L0 drift detection types. + +These models mirror the dataclasses in l0.drift for runtime validation. +""" + +from __future__ import annotations + +from enum import Enum +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field + +DriftType = Literal[ + "tone_shift", + "meta_commentary", + "format_collapse", + "repetition", + "entropy_spike", + "markdown_collapse", + "hedging", +] + + +class DriftResultModel(BaseModel): + """Pydantic model for drift detection result.""" + + model_config = ConfigDict(extra="forbid") + + detected: bool + confidence: float + types: list[DriftType] + details: str | None = None + + +class DriftConfigModel(BaseModel): + """Pydantic model for drift detection configuration.""" + + model_config = ConfigDict(extra="forbid") + + detect_tone_shift: bool = True + detect_meta_commentary: bool = True + detect_repetition: bool = True + detect_entropy_spike: bool = True + repetition_threshold: int = 3 + entropy_threshold: float = 2.5 + entropy_window: int = 50 + sliding_window_size: int = 500 diff --git a/src/l0/pydantic/errors.py b/src/l0/pydantic/errors.py new file mode 100644 index 0000000..68bb4db --- /dev/null +++ b/src/l0/pydantic/errors.py @@ -0,0 +1,125 @@ +"""Pydantic models for L0 error types.""" + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + + +class ErrorCodeModel(str, Enum): + """Error codes for programmatic handling.""" + + STREAM_ABORTED = "STREAM_ABORTED" + INITIAL_TOKEN_TIMEOUT = "INITIAL_TOKEN_TIMEOUT" + INTER_TOKEN_TIMEOUT = "INTER_TOKEN_TIMEOUT" + ZERO_OUTPUT = "ZERO_OUTPUT" + GUARDRAIL_VIOLATION = "GUARDRAIL_VIOLATION" + FATAL_GUARDRAIL_VIOLATION = "FATAL_GUARDRAIL_VIOLATION" + DRIFT_DETECTED = "DRIFT_DETECTED" + INVALID_STREAM = "INVALID_STREAM" + ADAPTER_NOT_FOUND = "ADAPTER_NOT_FOUND" + FEATURE_NOT_ENABLED = "FEATURE_NOT_ENABLED" + ALL_STREAMS_EXHAUSTED = "ALL_STREAMS_EXHAUSTED" + NETWORK_ERROR = "NETWORK_ERROR" + + +class FailureTypeModel(str, Enum): + """What actually went wrong - the root cause of the failure.""" + + NETWORK = "network" + MODEL = "model" + TOOL = "tool" + TIMEOUT = "timeout" + ABORT = "abort" + ZERO_OUTPUT = "zero_output" + UNKNOWN = "unknown" + + +class RecoveryStrategyModel(str, Enum): + """What L0 decided to do next after an error.""" + + RETRY = "retry" + FALLBACK = "fallback" + CONTINUE = "continue" + HALT = "halt" + + +class NetworkErrorTypeModel(str, Enum): + """Network error types that L0 can detect.""" + + CONNECTION_DROPPED = "connection_dropped" + FETCH_ERROR = "fetch_error" + ECONNRESET = "econnreset" + ECONNREFUSED = "econnrefused" + SSE_ABORTED = "sse_aborted" + NO_BYTES = "no_bytes" + PARTIAL_CHUNKS = "partial_chunks" + RUNTIME_KILLED = "runtime_killed" + BACKGROUND_THROTTLE = "background_throttle" + DNS_ERROR = "dns_error" + SSL_ERROR = "ssl_error" + TIMEOUT = "timeout" + UNKNOWN = "unknown" + + +class RecoveryPolicyModel(BaseModel): + """Pydantic model for recovery policy.""" + + model_config = ConfigDict(extra="forbid") + + retry_enabled: bool = True + fallback_enabled: bool = False + max_retries: int = 3 + max_fallbacks: int = 0 + attempt: int = 1 + fallback_index: int | None = None + + +class ErrorContextModel(BaseModel): + """Pydantic model for error context.""" + + model_config = ConfigDict(extra="forbid") + + code: ErrorCodeModel + checkpoint: str | None = None + token_count: int | None = None + content_length: int | None = None + model_retry_count: int | None = None + network_retry_count: int | None = None + fallback_index: int | None = None + metadata: dict[str, Any] | None = None + context: dict[str, Any] | None = None + + +class NetworkErrorAnalysisModel(BaseModel): + """Pydantic model for network error analysis.""" + + model_config = ConfigDict(extra="forbid") + + type: NetworkErrorTypeModel + retryable: bool + counts_toward_limit: bool + suggestion: str + context: dict[str, Any] = Field(default_factory=dict) + + +class SerializedErrorModel(BaseModel): + """Pydantic model for serialized error (for transport/storage).""" + + model_config = ConfigDict(extra="forbid") + + name: str + code: str + category: str + message: str + timestamp: float + has_checkpoint: bool = Field(alias="hasCheckpoint") + checkpoint: str | None = None + token_count: int | None = Field(default=None, alias="tokenCount") + model_retry_count: int | None = Field(default=None, alias="modelRetryCount") + network_retry_count: int | None = Field(default=None, alias="networkRetryCount") + fallback_index: int | None = Field(default=None, alias="fallbackIndex") + metadata: dict[str, Any] | None = None + context: dict[str, Any] | None = None diff --git a/src/l0/pydantic/event_sourcing.py b/src/l0/pydantic/event_sourcing.py new file mode 100644 index 0000000..c0e44d0 --- /dev/null +++ b/src/l0/pydantic/event_sourcing.py @@ -0,0 +1,111 @@ +"""Pydantic models for L0 event sourcing types. + +These models mirror the dataclasses in l0.event_sourcing for runtime validation. +""" + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + + +class RecordedEventTypeModel(str, Enum): + """Types of recorded events.""" + + START = "START" + TOKEN = "TOKEN" + CHECKPOINT = "CHECKPOINT" + GUARDRAIL = "GUARDRAIL" + DRIFT = "DRIFT" + RETRY = "RETRY" + FALLBACK = "FALLBACK" + CONTINUATION = "CONTINUATION" + COMPLETE = "COMPLETE" + ERROR = "ERROR" + + +class RecordedEventModel(BaseModel): + """Pydantic model for a recorded event from L0 execution.""" + + model_config = ConfigDict(extra="forbid") + + type: RecordedEventTypeModel + ts: float + data: dict[str, Any] = Field(default_factory=dict) + + +class EventEnvelopeModel(BaseModel): + """Pydantic model for wrapper around a recorded event.""" + + model_config = ConfigDict(extra="forbid") + + stream_id: str + seq: int + event: RecordedEventModel + + +class SnapshotModel(BaseModel): + """Pydantic model for state snapshot.""" + + model_config = ConfigDict(extra="forbid") + + stream_id: str + seq: int + state: dict[str, Any] + ts: float + + +class SerializedErrorModel(BaseModel): + """Pydantic model for serialized error.""" + + model_config = ConfigDict(extra="forbid") + + name: str + message: str + stack: str | None = None + code: str | None = None + + +class ReplayedStateModel(BaseModel): + """Pydantic model for state reconstructed from replay.""" + + model_config = ConfigDict(extra="forbid") + + content: str = "" + token_count: int = 0 + checkpoint: str = "" + violations: list[Any] = Field(default_factory=list) + drift_detected: bool = False + retry_attempts: int = 0 + network_retry_count: int = 0 + fallback_index: int = 0 + completed: bool = False + error: SerializedErrorModel | None = None + start_ts: float = 0 + end_ts: float = 0 + + +class StreamMetadataModel(BaseModel): + """Pydantic model for stream metadata.""" + + model_config = ConfigDict(extra="forbid") + + stream_id: str + event_count: int + token_count: int + start_ts: float + end_ts: float + completed: bool + has_error: bool + options: dict[str, Any] + + +class ReplayComparisonModel(BaseModel): + """Pydantic model for comparing two replays.""" + + model_config = ConfigDict(extra="forbid") + + identical: bool + differences: list[str] diff --git a/src/l0/pydantic/events.py b/src/l0/pydantic/events.py new file mode 100644 index 0000000..aa300b7 --- /dev/null +++ b/src/l0/pydantic/events.py @@ -0,0 +1,202 @@ +"""Pydantic models for L0 events/observability types.""" + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + + +class ObservabilityEventTypeModel(str, Enum): + """Observability event types.""" + + # Session events + SESSION_START = "SESSION_START" + SESSION_END = "SESSION_END" + SESSION_SUMMARY = "SESSION_SUMMARY" + ATTEMPT_START = "ATTEMPT_START" + + # Stream events + STREAM_INIT = "STREAM_INIT" + STREAM_READY = "STREAM_READY" + TOKEN = "TOKEN" + + # Adapter events + ADAPTER_DETECTED = "ADAPTER_DETECTED" + ADAPTER_WRAP_START = "ADAPTER_WRAP_START" + ADAPTER_WRAP_END = "ADAPTER_WRAP_END" + + # Timeout events + TIMEOUT_START = "TIMEOUT_START" + TIMEOUT_RESET = "TIMEOUT_RESET" + TIMEOUT_TRIGGERED = "TIMEOUT_TRIGGERED" + + # Network events + NETWORK_ERROR = "NETWORK_ERROR" + NETWORK_RECOVERY = "NETWORK_RECOVERY" + CONNECTION_DROPPED = "CONNECTION_DROPPED" + CONNECTION_RESTORED = "CONNECTION_RESTORED" + + # Abort events + ABORT_REQUESTED = "ABORT_REQUESTED" + ABORT_COMPLETED = "ABORT_COMPLETED" + + # Guardrail events + GUARDRAIL_PHASE_START = "GUARDRAIL_PHASE_START" + GUARDRAIL_RULE_START = "GUARDRAIL_RULE_START" + GUARDRAIL_RULE_RESULT = "GUARDRAIL_RULE_RESULT" + GUARDRAIL_RULE_END = "GUARDRAIL_RULE_END" + GUARDRAIL_PHASE_END = "GUARDRAIL_PHASE_END" + GUARDRAIL_CALLBACK_START = "GUARDRAIL_CALLBACK_START" + GUARDRAIL_CALLBACK_END = "GUARDRAIL_CALLBACK_END" + + # Drift events + DRIFT_CHECK_START = "DRIFT_CHECK_START" + DRIFT_CHECK_RESULT = "DRIFT_CHECK_RESULT" + DRIFT_CHECK_END = "DRIFT_CHECK_END" + DRIFT_CHECK_SKIPPED = "DRIFT_CHECK_SKIPPED" + + # Checkpoint events + CHECKPOINT_SAVED = "CHECKPOINT_SAVED" + RESUME_START = "RESUME_START" + + # Retry events + RETRY_START = "RETRY_START" + RETRY_ATTEMPT = "RETRY_ATTEMPT" + RETRY_END = "RETRY_END" + RETRY_GIVE_UP = "RETRY_GIVE_UP" + RETRY_FN_START = "RETRY_FN_START" + RETRY_FN_RESULT = "RETRY_FN_RESULT" + RETRY_FN_ERROR = "RETRY_FN_ERROR" + + # Fallback events + FALLBACK_START = "FALLBACK_START" + FALLBACK_MODEL_SELECTED = "FALLBACK_MODEL_SELECTED" + FALLBACK_END = "FALLBACK_END" + + # Structured events + STRUCTURED_PARSE_START = "STRUCTURED_PARSE_START" + STRUCTURED_PARSE_END = "STRUCTURED_PARSE_END" + STRUCTURED_PARSE_ERROR = "STRUCTURED_PARSE_ERROR" + STRUCTURED_VALIDATION_START = "STRUCTURED_VALIDATION_START" + STRUCTURED_VALIDATION_END = "STRUCTURED_VALIDATION_END" + STRUCTURED_VALIDATION_ERROR = "STRUCTURED_VALIDATION_ERROR" + STRUCTURED_AUTO_CORRECT_START = "STRUCTURED_AUTO_CORRECT_START" + STRUCTURED_AUTO_CORRECT_END = "STRUCTURED_AUTO_CORRECT_END" + + # Continuation events + CONTINUATION_START = "CONTINUATION_START" + + # Tool events + TOOL_REQUESTED = "TOOL_REQUESTED" + TOOL_START = "TOOL_START" + TOOL_RESULT = "TOOL_RESULT" + TOOL_ERROR = "TOOL_ERROR" + TOOL_COMPLETED = "TOOL_COMPLETED" + + # Completion events + COMPLETE = "COMPLETE" + ERROR = "ERROR" + + +class ObservabilityEventModel(BaseModel): + """Pydantic model for observability event.""" + + model_config = ConfigDict( + extra="allow" + ) # Allow extra fields for event-specific data + + type: ObservabilityEventTypeModel + ts: float = Field(alias="timestamp") + stream_id: str + context: dict[str, Any] = Field(default_factory=dict) + meta: dict[str, Any] | None = None + + # Common optional fields for specific event types + attempt: int | None = None + is_retry: bool | None = None + is_fallback: bool | None = None + duration_ms: float | None = None + success: bool | None = None + token_count: int | None = None + error: str | None = None + reason: str | None = None + + +class SessionStartEventModel(BaseModel): + """Pydantic model for session start event.""" + + model_config = ConfigDict(extra="forbid") + + type: ObservabilityEventTypeModel = ObservabilityEventTypeModel.SESSION_START + ts: float + stream_id: str + context: dict[str, Any] = Field(default_factory=dict) + attempt: int + is_retry: bool + is_fallback: bool + + +class SessionEndEventModel(BaseModel): + """Pydantic model for session end event.""" + + model_config = ConfigDict(extra="forbid") + + type: ObservabilityEventTypeModel = ObservabilityEventTypeModel.SESSION_END + ts: float + stream_id: str + context: dict[str, Any] = Field(default_factory=dict) + duration_ms: float + success: bool + token_count: int + + +class RetryAttemptEventModel(BaseModel): + """Pydantic model for retry attempt event.""" + + model_config = ConfigDict(extra="forbid") + + type: ObservabilityEventTypeModel = ObservabilityEventTypeModel.RETRY_ATTEMPT + ts: float + stream_id: str + context: dict[str, Any] = Field(default_factory=dict) + index: int | None = None + attempt: int + max_attempts: int + reason: str + delay_ms: float + counts_toward_limit: bool | None = None + is_network: bool | None = None + is_model_issue: bool | None = None + + +class CompleteEventModel(BaseModel): + """Pydantic model for complete event.""" + + model_config = ConfigDict(extra="forbid") + + type: ObservabilityEventTypeModel = ObservabilityEventTypeModel.COMPLETE + ts: float + stream_id: str + context: dict[str, Any] = Field(default_factory=dict) + token_count: int + content_length: int + duration_ms: float + state: dict[str, Any] | None = None + + +class ErrorEventModel(BaseModel): + """Pydantic model for error event.""" + + model_config = ConfigDict(extra="forbid") + + type: ObservabilityEventTypeModel = ObservabilityEventTypeModel.ERROR + ts: float + stream_id: str + context: dict[str, Any] = Field(default_factory=dict) + error: str + error_code: str | None = None + failure_type: str + recovery_strategy: str + policy: dict[str, Any] diff --git a/src/l0/pydantic/guardrails.py b/src/l0/pydantic/guardrails.py new file mode 100644 index 0000000..aaa101a --- /dev/null +++ b/src/l0/pydantic/guardrails.py @@ -0,0 +1,130 @@ +"""Pydantic models for L0 guardrails types.""" + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + +Severity = Literal["warning", "error", "fatal"] + + +class GuardrailViolationModel(BaseModel): + """Pydantic model for guardrail violation.""" + + model_config = ConfigDict(extra="forbid") + + rule: str + message: str + severity: Severity + recoverable: bool = True + position: int | None = None + timestamp: float | None = None + context: dict[str, Any] | None = None + suggestion: str | None = None + + +class GuardrailContextModel(BaseModel): + """Pydantic model for guardrail context.""" + + model_config = ConfigDict(extra="forbid") + + content: str + completed: bool = False + checkpoint: str | None = None + delta: str | None = None + token_count: int = 0 + metadata: dict[str, Any] | None = None + previous_violations: list[GuardrailViolationModel] | None = None + + +class GuardrailResultSummaryModel(BaseModel): + """Pydantic model for guardrail result summary.""" + + model_config = ConfigDict(extra="forbid") + + total: int + fatal: int + errors: int + warnings: int + + +class GuardrailResultModel(BaseModel): + """Pydantic model for guardrail result.""" + + model_config = ConfigDict(extra="forbid") + + passed: bool + violations: list[GuardrailViolationModel] + should_retry: bool + should_halt: bool + summary: GuardrailResultSummaryModel + + +class GuardrailStateModel(BaseModel): + """Pydantic model for guardrail engine state.""" + + model_config = ConfigDict(extra="forbid") + + violations: list[GuardrailViolationModel] = Field(default_factory=list) + violations_by_rule: dict[str, list[GuardrailViolationModel]] = Field( + default_factory=dict + ) + has_fatal_violations: bool = False + has_error_violations: bool = False + violation_count: int = 0 + last_check_time: float | None = None + + +class GuardrailConfigModel(BaseModel): + """Pydantic model for guardrail configuration.""" + + model_config = ConfigDict(extra="forbid") + + rules: list[Any] = Field(default_factory=list) # GuardrailRule (has callable) + stop_on_fatal: bool = True + enable_streaming: bool = True + check_interval: int = 100 + + +class JsonAnalysisModel(BaseModel): + """Pydantic model for JSON structure analysis.""" + + model_config = ConfigDict(extra="forbid") + + is_balanced: bool + open_braces: int + close_braces: int + open_brackets: int + close_brackets: int + in_string: bool + unclosed_string: bool + issues: list[str] = Field(default_factory=list) + + +class MarkdownAnalysisModel(BaseModel): + """Pydantic model for Markdown structure analysis.""" + + model_config = ConfigDict(extra="forbid") + + is_balanced: bool + in_fence: bool + open_fences: int + close_fences: int + fence_languages: list[str] = Field(default_factory=list) + table_rows: int + inconsistent_columns: bool + issues: list[str] = Field(default_factory=list) + + +class LatexAnalysisModel(BaseModel): + """Pydantic model for LaTeX structure analysis.""" + + model_config = ConfigDict(extra="forbid") + + is_balanced: bool + open_environments: list[str] = Field(default_factory=list) + display_math_balanced: bool + inline_math_balanced: bool + bracket_math_balanced: bool + issues: list[str] = Field(default_factory=list) diff --git a/src/l0/pydantic/metrics.py b/src/l0/pydantic/metrics.py new file mode 100644 index 0000000..f640e5f --- /dev/null +++ b/src/l0/pydantic/metrics.py @@ -0,0 +1,25 @@ +"""Pydantic models for L0 metrics types. + +These models mirror the dataclasses in l0.metrics for runtime validation. +""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict + + +class MetricsSnapshotModel(BaseModel): + """Pydantic model for a snapshot of all metrics.""" + + model_config = ConfigDict(extra="forbid") + + requests: int + tokens: int + retries: int + network_retry_count: int + errors: int + violations: int + drift_detections: int + fallbacks: int + completions: int + timeouts: int diff --git a/src/l0/pydantic/parallel.py b/src/l0/pydantic/parallel.py new file mode 100644 index 0000000..af4566b --- /dev/null +++ b/src/l0/pydantic/parallel.py @@ -0,0 +1,58 @@ +"""Pydantic models for L0 parallel execution types. + +These models mirror the dataclasses in l0.parallel for runtime validation. +""" + +from __future__ import annotations + +from typing import Any, Generic, TypeVar + +from pydantic import BaseModel, ConfigDict, Field + +T = TypeVar("T") + + +class AggregatedTelemetryModel(BaseModel): + """Pydantic model for aggregated telemetry from parallel operations.""" + + model_config = ConfigDict(extra="forbid") + + total_tokens: int = 0 + total_duration: float = 0.0 + total_retries: int = 0 + total_network_errors: int = 0 + total_violations: int = 0 + avg_tokens_per_second: float = 0.0 + avg_time_to_first_token: float = 0.0 + + +class RaceResultModel(BaseModel): + """Pydantic model for result from race operation.""" + + model_config = ConfigDict(extra="forbid") + + value: Any + winner_index: int + + +class ParallelResultModel(BaseModel): + """Pydantic model for result of parallel execution.""" + + model_config = ConfigDict(extra="forbid") + + results: list[Any | None] + errors: list[str | None] # Serialized as strings + success_count: int = 0 + failure_count: int = 0 + duration: float = 0.0 + aggregated_telemetry: AggregatedTelemetryModel | None = None + + +class ParallelOptionsModel(BaseModel): + """Pydantic model for parallel execution options.""" + + model_config = ConfigDict(extra="forbid") + + concurrency: int = 5 + fail_fast: bool = False + # Callbacks are not serializable, so they are omitted diff --git a/src/l0/pydantic/pipeline.py b/src/l0/pydantic/pipeline.py new file mode 100644 index 0000000..047f9f9 --- /dev/null +++ b/src/l0/pydantic/pipeline.py @@ -0,0 +1,82 @@ +"""Pydantic models for L0 pipeline types. + +These models mirror the dataclasses in l0.pipeline for runtime validation. +""" + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + + +class StepContextModel(BaseModel): + """Pydantic model for context passed to each pipeline step.""" + + model_config = ConfigDict(extra="forbid") + + step_index: int + total_steps: int + previous_results: list["StepResultModel"] = Field(default_factory=list) + metadata: dict[str, Any] = Field(default_factory=dict) + cancelled: bool = False + + +class StepResultModel(BaseModel): + """Pydantic model for result from a single pipeline step.""" + + model_config = ConfigDict(extra="forbid") + + step_name: str + step_index: int + input: Any + output: Any | None + raw_content: str + status: Literal["success", "error", "skipped"] + error: str | None = None # Serialized as string + duration: int = 0 + startTime: int = 0 + endTime: int = 0 + token_count: int = 0 + + +# Forward reference update +StepContextModel.model_rebuild() + + +class PipelineStepModel(BaseModel): + """Pydantic model for pipeline step configuration.""" + + model_config = ConfigDict(extra="forbid") + + name: str + metadata: dict[str, Any] = Field(default_factory=dict) + # Functions (fn, transform, condition, etc.) are not serializable + + +class PipelineOptionsModel(BaseModel): + """Pydantic model for pipeline configuration options.""" + + model_config = ConfigDict(extra="forbid") + + name: str | None = None + stop_on_error: bool = True + timeout: float | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + # Callbacks are not serializable + + +class PipelineResultModel(BaseModel): + """Pydantic model for result from pipeline execution.""" + + model_config = ConfigDict(extra="forbid") + + name: str | None + output: Any | None + steps: list[StepResultModel] + status: Literal["success", "error", "partial"] + error: str | None = None # Serialized as string + duration: int = 0 + startTime: int = 0 + endTime: int = 0 + metadata: dict[str, Any] = Field(default_factory=dict) diff --git a/src/l0/pydantic/pool.py b/src/l0/pydantic/pool.py new file mode 100644 index 0000000..ef92868 --- /dev/null +++ b/src/l0/pydantic/pool.py @@ -0,0 +1,35 @@ +"""Pydantic models for L0 pool types. + +These models mirror the dataclasses in l0.pool for runtime validation. +""" + +from __future__ import annotations + +from typing import Any + +from pydantic import BaseModel, ConfigDict + +from .types import RetryModel, TimeoutModel + + +class PoolOptionsModel(BaseModel): + """Pydantic model for operation pool configuration.""" + + model_config = ConfigDict(extra="forbid") + + shared_retry: RetryModel | None = None + shared_timeout: TimeoutModel | None = None + shared_guardrails: list[Any] | None = None # GuardrailRule not serializable + context: dict[str, Any] | None = None + # on_event callback is not serializable + + +class PoolStatsModel(BaseModel): + """Pydantic model for operation pool statistics.""" + + model_config = ConfigDict(extra="forbid") + + total_executed: int = 0 + total_succeeded: int = 0 + total_failed: int = 0 + total_duration: float = 0.0 diff --git a/src/l0/pydantic/state_machine.py b/src/l0/pydantic/state_machine.py new file mode 100644 index 0000000..7ae4628 --- /dev/null +++ b/src/l0/pydantic/state_machine.py @@ -0,0 +1,36 @@ +"""Pydantic models for L0 state machine types. + +These models mirror the dataclasses in l0.state_machine for runtime validation. +""" + +from __future__ import annotations + +from enum import Enum + +from pydantic import BaseModel, ConfigDict + + +class RuntimeStateModel(str, Enum): + """Runtime state constants.""" + + INIT = "init" + WAITING_FOR_TOKEN = "waiting_for_token" + STREAMING = "streaming" + TOOL_CALL_DETECTED = "tool_call_detected" + CONTINUATION_MATCHING = "continuation_matching" + CHECKPOINT_VERIFYING = "checkpoint_verifying" + RETRYING = "retrying" + FALLBACK = "fallback" + FINALIZING = "finalizing" + COMPLETE = "complete" + ERROR = "error" + + +class StateTransitionModel(BaseModel): + """Pydantic model for a state transition record.""" + + model_config = ConfigDict(extra="forbid") + + from_state: RuntimeStateModel + to_state: RuntimeStateModel + timestamp: float diff --git a/src/l0/pydantic/types.py b/src/l0/pydantic/types.py new file mode 100644 index 0000000..40f827b --- /dev/null +++ b/src/l0/pydantic/types.py @@ -0,0 +1,268 @@ +"""Pydantic models for L0 core types. + +These models mirror the dataclasses in l0.types for runtime validation. +""" + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + + +class EventTypeModel(str, Enum): + """Type of streaming event.""" + + TOKEN = "token" + MESSAGE = "message" + DATA = "data" + PROGRESS = "progress" + TOOL_CALL = "tool_call" + ERROR = "error" + COMPLETE = "complete" + + +class ContentTypeModel(str, Enum): + """Type of multimodal content.""" + + TEXT = "text" + IMAGE = "image" + AUDIO = "audio" + VIDEO = "video" + FILE = "file" + JSON = "json" + BINARY = "binary" + + +class ErrorCategoryModel(str, Enum): + """Category of error for retry decisions.""" + + NETWORK = "network" + TRANSIENT = "transient" + MODEL = "model" + CONTENT = "content" + PROVIDER = "provider" + FATAL = "fatal" + INTERNAL = "internal" + + +class BackoffStrategyModel(str, Enum): + """Backoff strategy for retries.""" + + EXPONENTIAL = "exponential" + LINEAR = "linear" + FIXED = "fixed" + FULL_JITTER = "full-jitter" + FIXED_JITTER = "fixed-jitter" + + +class RetryableErrorTypeModel(str, Enum): + """Error types that can be retried.""" + + ZERO_OUTPUT = "zero_output" + GUARDRAIL_VIOLATION = "guardrail_violation" + DRIFT = "drift" + INCOMPLETE = "incomplete" + NETWORK_ERROR = "network_error" + TIMEOUT = "timeout" + RATE_LIMIT = "rate_limit" + SERVER_ERROR = "server_error" + + +class DataPayloadModel(BaseModel): + """Pydantic model for multimodal data payload.""" + + model_config = ConfigDict(extra="forbid") + + content_type: ContentTypeModel + mime_type: str | None = None + base64: str | None = None + url: str | None = None + data: bytes | None = None + json_data: Any = Field(default=None, alias="json") + metadata: dict[str, Any] | None = None + + +class ProgressModel(BaseModel): + """Pydantic model for progress updates.""" + + model_config = ConfigDict(extra="forbid") + + percent: float | None = None + step: int | None = None + total_steps: int | None = None + message: str | None = None + eta: float | None = None + + +class EventModel(BaseModel): + """Pydantic model for unified stream events.""" + + model_config = ConfigDict(extra="forbid") + + type: EventTypeModel + text: str | None = None + data: dict[str, Any] | None = None + payload: DataPayloadModel | None = None + progress: ProgressModel | None = None + error: str | None = None # Serialized as string for Pydantic + usage: dict[str, int] | None = None + timestamp: float | None = None + + +class ErrorTypeDelaysModel(BaseModel): + """Pydantic model for per-error-type delay configuration.""" + + model_config = ConfigDict(extra="forbid") + + connection_dropped: float | None = 1.0 + fetch_error: float | None = 0.5 + econnreset: float | None = 1.0 + econnrefused: float | None = 2.0 + sse_aborted: float | None = 0.5 + no_bytes: float | None = 0.5 + partial_chunks: float | None = 0.5 + runtime_killed: float | None = 2.0 + background_throttle: float | None = 5.0 + dns_error: float | None = 3.0 + ssl_error: float | None = 0.0 + timeout: float | None = 1.0 + unknown: float | None = 1.0 + + +class RetryModel(BaseModel): + """Pydantic model for retry configuration.""" + + model_config = ConfigDict(extra="forbid") + + attempts: int | None = 3 + max_retries: int | None = 6 + base_delay: float | None = 1.0 + max_delay: float | None = 10.0 + strategy: BackoffStrategyModel | None = BackoffStrategyModel.FIXED_JITTER + error_type_delays: ErrorTypeDelaysModel | None = None + retry_on: list[RetryableErrorTypeModel] | None = None + + +class TimeoutModel(BaseModel): + """Pydantic model for timeout configuration.""" + + model_config = ConfigDict(extra="forbid") + + initial_token: int = 5000 + inter_token: int = 10000 + + +class CheckIntervalsModel(BaseModel): + """Pydantic model for check frequency configuration.""" + + model_config = ConfigDict(extra="forbid") + + guardrails: int = 15 + drift: int = 25 + checkpoint: int = 20 + + +class StateModel(BaseModel): + """Pydantic model for runtime state tracking.""" + + model_config = ConfigDict(extra="forbid") + + content: str = "" + checkpoint: str = "" + token_count: int = 0 + model_retry_count: int = 0 + network_retry_count: int = 0 + fallback_index: int = 0 + violations: list[Any] = Field(default_factory=list) # GuardrailViolation + drift_detected: bool = False + completed: bool = False + aborted: bool = False + first_token_at: float | None = None + last_token_at: float | None = None + duration: float | None = None + resumed: bool = False + network_errors: list[Any] = Field(default_factory=list) + data_outputs: list[DataPayloadModel] = Field(default_factory=list) + last_progress: ProgressModel | None = None + resume_point: str | None = None + resume_from: int | None = None + continuation_used: bool = False + deduplication_applied: bool = False + overlap_removed: str | None = None + + +class TelemetryMetricsModel(BaseModel): + """Pydantic model for telemetry metrics.""" + + model_config = ConfigDict(extra="forbid") + + time_to_first_token: float | None = None + avg_inter_token_time: float | None = None + tokens_per_second: float | None = None + total_tokens: int = 0 + total_retries: int = 0 + network_retry_count: int = 0 + model_retry_count: int = 0 + + +class TelemetryNetworkModel(BaseModel): + """Pydantic model for network telemetry.""" + + model_config = ConfigDict(extra="forbid") + + error_count: int = 0 + errors_by_type: dict[str, int] = Field(default_factory=dict) + errors: list[dict[str, Any]] | None = None + + +class TelemetryGuardrailsModel(BaseModel): + """Pydantic model for guardrail telemetry.""" + + model_config = ConfigDict(extra="forbid") + + violation_count: int = 0 + violations_by_rule: dict[str, int] = Field(default_factory=dict) + violations_by_rule_and_severity: dict[str, dict[str, int]] = Field( + default_factory=dict + ) + violations_by_severity: dict[str, int] = Field(default_factory=dict) + + +class TelemetryDriftModel(BaseModel): + """Pydantic model for drift telemetry.""" + + model_config = ConfigDict(extra="forbid") + + detected: bool = False + types: list[str] = Field(default_factory=list) + + +class TelemetryContinuationModel(BaseModel): + """Pydantic model for continuation telemetry.""" + + model_config = ConfigDict(extra="forbid") + + enabled: bool = False + used: bool = False + checkpoint_content: str | None = None + checkpoint_length: int | None = None + continuation_count: int | None = None + + +class TelemetryModel(BaseModel): + """Pydantic model for full telemetry.""" + + model_config = ConfigDict(extra="forbid") + + session_id: str + start_time: float + end_time: float | None = None + duration: float | None = None + metrics: TelemetryMetricsModel + network: TelemetryNetworkModel + guardrails: TelemetryGuardrailsModel | None = None + drift: TelemetryDriftModel | None = None + continuation: TelemetryContinuationModel | None = None + metadata: dict[str, Any] | None = None diff --git a/src/l0/pydantic/window.py b/src/l0/pydantic/window.py new file mode 100644 index 0000000..2a5518f --- /dev/null +++ b/src/l0/pydantic/window.py @@ -0,0 +1,95 @@ +"""Pydantic models for L0 window types. + +These models mirror the dataclasses in l0.window for runtime validation. +""" + +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + +ChunkingStrategy = Literal["token", "char", "paragraph", "sentence"] +ContextRestorationStrategy = Literal["adjacent", "overlap", "full"] + + +class DocumentChunkModel(BaseModel): + """Pydantic model for a chunk of a document.""" + + model_config = ConfigDict(extra="forbid") + + index: int + content: str + start_pos: int + end_pos: int + token_count: int + char_count: int + is_first: bool + is_last: bool + total_chunks: int + metadata: dict[str, Any] | None = None + + +class WindowConfigModel(BaseModel): + """Pydantic model for document windowing configuration.""" + + model_config = ConfigDict(extra="forbid") + + size: int = 2000 + overlap: int = 200 + strategy: ChunkingStrategy = "token" + preserve_paragraphs: bool = True + preserve_sentences: bool = False + metadata: dict[str, Any] | None = None + # estimate_tokens callback is not serializable + + +class ContextRestorationOptionsModel(BaseModel): + """Pydantic model for context restoration options.""" + + model_config = ConfigDict(extra="forbid") + + enabled: bool = True + strategy: ContextRestorationStrategy = "adjacent" + max_attempts: int = 2 + # on_restore callback is not serializable + + +class WindowStatsModel(BaseModel): + """Pydantic model for document window statistics.""" + + model_config = ConfigDict(extra="forbid") + + total_chunks: int + total_chars: int + total_tokens: int + avg_chunk_size: int + avg_chunk_tokens: int + overlap_size: int + strategy: ChunkingStrategy + + +class ChunkResultModel(BaseModel): + """Pydantic model for result of processing a chunk.""" + + model_config = ConfigDict(extra="forbid") + + chunk: DocumentChunkModel + status: Literal["success", "error"] + content: str = "" + error: str | None = None + duration: float = 0.0 + # result (Stream) is not serializable + + +class ProcessingStatsModel(BaseModel): + """Pydantic model for processing statistics.""" + + model_config = ConfigDict(extra="forbid") + + total: int + successful: int + failed: int + success_rate: float + avg_duration: float + total_duration: float diff --git a/tests/test_pydantic_models.py b/tests/test_pydantic_models.py new file mode 100644 index 0000000..f6a1e44 --- /dev/null +++ b/tests/test_pydantic_models.py @@ -0,0 +1,469 @@ +"""Tests for L0 Pydantic models.""" + +import pytest +from pydantic import ValidationError + +from l0.pydantic import ( + AggregatedTelemetryModel, + AgreementModel, + BackoffStrategyModel, + ChunkResultModel, + ConsensusAnalysisModel, + # Consensus types + ConsensusOutputModel, + ConsensusResultModel, + DisagreementModel, + # Window types + DocumentChunkModel, + DriftConfigModel, + # Drift types + DriftResultModel, + # Error types + ErrorCodeModel, + ErrorContextModel, + EventEnvelopeModel, + EventModel, + EventTypeModel, + GuardrailResultModel, + GuardrailResultSummaryModel, + # Guardrail types + GuardrailViolationModel, + # Metrics types + MetricsSnapshotModel, + # Parallel types + ParallelResultModel, + PipelineResultModel, + # Pool types + PoolStatsModel, + RaceResultModel, + # Event sourcing types + RecordedEventModel, + RecordedEventTypeModel, + RecoveryPolicyModel, + RetryModel, + # State machine types + RuntimeStateModel, + # Core types + StateModel, + StateTransitionModel, + # Pipeline types + StepResultModel, + TimeoutModel, + WindowStatsModel, +) + + +class TestCoreTypes: + """Tests for core Pydantic models.""" + + def test_state_model_defaults(self): + """Test StateModel with default values.""" + state = StateModel() + assert state.content == "" + assert state.token_count == 0 + assert state.completed is False + assert state.violations == [] + + def test_state_model_with_values(self): + """Test StateModel with explicit values.""" + state = StateModel( + content="Hello world", + token_count=3, + completed=True, + ) + assert state.content == "Hello world" + assert state.token_count == 3 + assert state.completed is True + + def test_state_model_json_roundtrip(self): + """Test JSON serialization roundtrip.""" + state = StateModel(content="test", token_count=5) + json_str = state.model_dump_json() + restored = StateModel.model_validate_json(json_str) + assert restored.content == state.content + assert restored.token_count == state.token_count + + def test_retry_model_defaults(self): + """Test RetryModel with defaults.""" + retry = RetryModel() + assert retry.attempts == 3 + assert retry.base_delay == 1.0 + assert retry.strategy == BackoffStrategyModel.FIXED_JITTER + + def test_retry_model_custom(self): + """Test RetryModel with custom values.""" + retry = RetryModel( + attempts=5, + base_delay=2.0, + strategy=BackoffStrategyModel.EXPONENTIAL, + ) + assert retry.attempts == 5 + assert retry.base_delay == 2.0 + assert retry.strategy == BackoffStrategyModel.EXPONENTIAL + + def test_timeout_model(self): + """Test TimeoutModel.""" + timeout = TimeoutModel(initial_token=10000, inter_token=5000) + assert timeout.initial_token == 10000 + assert timeout.inter_token == 5000 + + def test_event_model(self): + """Test EventModel.""" + event = EventModel(type=EventTypeModel.TOKEN, text="hello") + assert event.type == EventTypeModel.TOKEN + assert event.text == "hello" + + +class TestErrorTypes: + """Tests for error Pydantic models.""" + + def test_error_code_enum(self): + """Test ErrorCodeModel enum values.""" + assert ErrorCodeModel.STREAM_ABORTED == "STREAM_ABORTED" + assert ErrorCodeModel.NETWORK_ERROR == "NETWORK_ERROR" + + def test_recovery_policy_model(self): + """Test RecoveryPolicyModel.""" + policy = RecoveryPolicyModel( + retry_enabled=True, + fallback_enabled=True, + max_retries=5, + ) + assert policy.retry_enabled is True + assert policy.max_retries == 5 + + def test_error_context_model(self): + """Test ErrorContextModel.""" + context = ErrorContextModel( + code=ErrorCodeModel.NETWORK_ERROR, + token_count=100, + ) + assert context.code == ErrorCodeModel.NETWORK_ERROR + assert context.token_count == 100 + + +class TestGuardrailTypes: + """Tests for guardrail Pydantic models.""" + + def test_guardrail_violation_model(self): + """Test GuardrailViolationModel.""" + violation = GuardrailViolationModel( + rule="max_length", + message="Content too long", + severity="error", + recoverable=True, + ) + assert violation.rule == "max_length" + assert violation.severity == "error" + assert violation.recoverable is True + + def test_guardrail_result_model(self): + """Test GuardrailResultModel.""" + result = GuardrailResultModel( + passed=False, + violations=[ + GuardrailViolationModel( + rule="test", + message="test violation", + severity="warning", + ) + ], + should_retry=True, + should_halt=False, + summary=GuardrailResultSummaryModel( + total=1, + fatal=0, + errors=0, + warnings=1, + ), + ) + assert result.passed is False + assert len(result.violations) == 1 + assert result.should_retry is True + + +class TestConsensusTypes: + """Tests for consensus Pydantic models.""" + + def test_consensus_output_model(self): + """Test ConsensusOutputModel.""" + output = ConsensusOutputModel( + index=0, + text="Hello", + value="Hello", + success=True, + duration_ms=100.0, + ) + assert output.index == 0 + assert output.success is True + + def test_agreement_model(self): + """Test AgreementModel.""" + agreement = AgreementModel( + content="agreed value", + count=3, + ratio=0.75, + indices=[0, 1, 2], + ) + assert agreement.count == 3 + assert agreement.ratio == 0.75 + + def test_consensus_result_model(self): + """Test ConsensusResultModel.""" + result = ConsensusResultModel( + consensus="final answer", + confidence=0.9, + outputs=[ + ConsensusOutputModel( + index=0, + text="answer", + value="answer", + success=True, + ) + ], + agreements=[], + disagreements=[], + analysis=ConsensusAnalysisModel( + total_outputs=1, + successful_outputs=1, + ), + status="success", + ) + assert result.confidence == 0.9 + assert result.status == "success" + + +class TestParallelTypes: + """Tests for parallel Pydantic models.""" + + def test_parallel_result_model(self): + """Test ParallelResultModel.""" + result = ParallelResultModel( + results=["a", "b", None], + errors=[None, None, "failed"], + success_count=2, + failure_count=1, + duration=1.5, + ) + assert result.success_count == 2 + assert result.failure_count == 1 + + def test_race_result_model(self): + """Test RaceResultModel.""" + result = RaceResultModel(value="winner", winner_index=0) + assert result.value == "winner" + assert result.winner_index == 0 + + def test_aggregated_telemetry_model(self): + """Test AggregatedTelemetryModel.""" + telemetry = AggregatedTelemetryModel( + total_tokens=1000, + total_duration=5.0, + avg_tokens_per_second=200.0, + ) + assert telemetry.total_tokens == 1000 + + +class TestPipelineTypes: + """Tests for pipeline Pydantic models.""" + + def test_step_result_model(self): + """Test StepResultModel.""" + result = StepResultModel( + step_name="summarize", + step_index=0, + input="long text", + output="summary", + raw_content="summary", + status="success", + duration=500, + ) + assert result.step_name == "summarize" + assert result.status == "success" + + def test_pipeline_result_model(self): + """Test PipelineResultModel.""" + result = PipelineResultModel( + name="my-pipeline", + output="final output", + steps=[], + status="success", + duration=1000, + ) + assert result.name == "my-pipeline" + assert result.status == "success" + + +class TestPoolTypes: + """Tests for pool Pydantic models.""" + + def test_pool_stats_model(self): + """Test PoolStatsModel.""" + stats = PoolStatsModel( + total_executed=10, + total_succeeded=8, + total_failed=2, + total_duration=5.0, + ) + assert stats.total_executed == 10 + assert stats.total_succeeded == 8 + + +class TestWindowTypes: + """Tests for window Pydantic models.""" + + def test_document_chunk_model(self): + """Test DocumentChunkModel.""" + chunk = DocumentChunkModel( + index=0, + content="First chunk", + start_pos=0, + end_pos=11, + token_count=3, + char_count=11, + is_first=True, + is_last=False, + total_chunks=5, + ) + assert chunk.index == 0 + assert chunk.is_first is True + + def test_window_stats_model(self): + """Test WindowStatsModel.""" + stats = WindowStatsModel( + total_chunks=5, + total_chars=5000, + total_tokens=1250, + avg_chunk_size=1000, + avg_chunk_tokens=250, + overlap_size=100, + strategy="token", + ) + assert stats.total_chunks == 5 + assert stats.strategy == "token" + + +class TestDriftTypes: + """Tests for drift Pydantic models.""" + + def test_drift_result_model_no_drift(self): + """Test DriftResultModel with no drift.""" + result = DriftResultModel( + detected=False, + confidence=0.0, + types=[], + ) + assert result.detected is False + assert result.types == [] + + def test_drift_result_model_with_drift(self): + """Test DriftResultModel with drift detected.""" + result = DriftResultModel( + detected=True, + confidence=0.85, + types=["meta_commentary", "tone_shift"], + details="Meta commentary and tone shift detected", + ) + assert result.detected is True + assert "meta_commentary" in result.types + + def test_drift_config_model(self): + """Test DriftConfigModel.""" + config = DriftConfigModel( + detect_tone_shift=True, + detect_meta_commentary=True, + repetition_threshold=5, + ) + assert config.detect_tone_shift is True + assert config.repetition_threshold == 5 + + +class TestStateMachineTypes: + """Tests for state machine Pydantic models.""" + + def test_runtime_state_enum(self): + """Test RuntimeStateModel enum values.""" + assert RuntimeStateModel.INIT == "init" + assert RuntimeStateModel.STREAMING == "streaming" + assert RuntimeStateModel.COMPLETE == "complete" + + def test_state_transition_model(self): + """Test StateTransitionModel.""" + transition = StateTransitionModel( + from_state=RuntimeStateModel.INIT, + to_state=RuntimeStateModel.STREAMING, + timestamp=1234567890.0, + ) + assert transition.from_state == RuntimeStateModel.INIT + assert transition.to_state == RuntimeStateModel.STREAMING + + +class TestEventSourcingTypes: + """Tests for event sourcing Pydantic models.""" + + def test_recorded_event_model(self): + """Test RecordedEventModel.""" + event = RecordedEventModel( + type=RecordedEventTypeModel.TOKEN, + ts=1234567890.0, + data={"value": "hello", "index": 0}, + ) + assert event.type == RecordedEventTypeModel.TOKEN + assert event.data["value"] == "hello" + + def test_event_envelope_model(self): + """Test EventEnvelopeModel.""" + envelope = EventEnvelopeModel( + stream_id="test-stream", + seq=0, + event=RecordedEventModel( + type=RecordedEventTypeModel.START, + ts=1234567890.0, + ), + ) + assert envelope.stream_id == "test-stream" + assert envelope.seq == 0 + + +class TestMetricsTypes: + """Tests for metrics Pydantic models.""" + + def test_metrics_snapshot_model(self): + """Test MetricsSnapshotModel.""" + snapshot = MetricsSnapshotModel( + requests=100, + tokens=5000, + retries=10, + network_retry_count=5, + errors=2, + violations=3, + drift_detections=1, + fallbacks=0, + completions=95, + timeouts=3, + ) + assert snapshot.requests == 100 + assert snapshot.tokens == 5000 + assert snapshot.completions == 95 + + +class TestValidation: + """Tests for Pydantic validation behavior.""" + + def test_extra_fields_forbidden(self): + """Test that extra fields are rejected for strict models.""" + with pytest.raises(ValidationError): + StateModel(content="test", unknown_field="should fail") + + def test_type_coercion(self): + """Test that types are properly coerced.""" + # String to int coercion + state = StateModel(token_count="5") # type: ignore + assert state.token_count == 5 + + def test_json_schema_generation(self): + """Test JSON schema generation.""" + schema = StateModel.model_json_schema() + assert "properties" in schema + assert "content" in schema["properties"] + assert "token_count" in schema["properties"] From 45960146ddcb56659429fc3f6d0fe20ef7060659 Mon Sep 17 00:00:00 2001 From: LZL0 <12474488+LZL0@users.noreply.github.com> Date: Wed, 10 Dec 2025 14:47:30 +0100 Subject: [PATCH 2/3] Update events.py --- src/l0/pydantic/events.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/l0/pydantic/events.py b/src/l0/pydantic/events.py index aa300b7..c7eae2f 100644 --- a/src/l0/pydantic/events.py +++ b/src/l0/pydantic/events.py @@ -127,10 +127,10 @@ class ObservabilityEventModel(BaseModel): class SessionStartEventModel(BaseModel): """Pydantic model for session start event.""" - model_config = ConfigDict(extra="forbid") + model_config = ConfigDict(extra="forbid", populate_by_name=True) type: ObservabilityEventTypeModel = ObservabilityEventTypeModel.SESSION_START - ts: float + ts: float = Field(alias="timestamp") stream_id: str context: dict[str, Any] = Field(default_factory=dict) attempt: int @@ -141,10 +141,10 @@ class SessionStartEventModel(BaseModel): class SessionEndEventModel(BaseModel): """Pydantic model for session end event.""" - model_config = ConfigDict(extra="forbid") + model_config = ConfigDict(extra="forbid", populate_by_name=True) type: ObservabilityEventTypeModel = ObservabilityEventTypeModel.SESSION_END - ts: float + ts: float = Field(alias="timestamp") stream_id: str context: dict[str, Any] = Field(default_factory=dict) duration_ms: float @@ -155,10 +155,10 @@ class SessionEndEventModel(BaseModel): class RetryAttemptEventModel(BaseModel): """Pydantic model for retry attempt event.""" - model_config = ConfigDict(extra="forbid") + model_config = ConfigDict(extra="forbid", populate_by_name=True) type: ObservabilityEventTypeModel = ObservabilityEventTypeModel.RETRY_ATTEMPT - ts: float + ts: float = Field(alias="timestamp") stream_id: str context: dict[str, Any] = Field(default_factory=dict) index: int | None = None @@ -174,10 +174,10 @@ class RetryAttemptEventModel(BaseModel): class CompleteEventModel(BaseModel): """Pydantic model for complete event.""" - model_config = ConfigDict(extra="forbid") + model_config = ConfigDict(extra="forbid", populate_by_name=True) type: ObservabilityEventTypeModel = ObservabilityEventTypeModel.COMPLETE - ts: float + ts: float = Field(alias="timestamp") stream_id: str context: dict[str, Any] = Field(default_factory=dict) token_count: int @@ -189,10 +189,10 @@ class CompleteEventModel(BaseModel): class ErrorEventModel(BaseModel): """Pydantic model for error event.""" - model_config = ConfigDict(extra="forbid") + model_config = ConfigDict(extra="forbid", populate_by_name=True) type: ObservabilityEventTypeModel = ObservabilityEventTypeModel.ERROR - ts: float + ts: float = Field(alias="timestamp") stream_id: str context: dict[str, Any] = Field(default_factory=dict) error: str From a12cd5675d4aa6ba8681b171e36a70ffd9a1b155 Mon Sep 17 00:00:00 2001 From: LZL0 <12474488+LZL0@users.noreply.github.com> Date: Wed, 10 Dec 2025 14:47:42 +0100 Subject: [PATCH 3/3] Update events.py --- src/l0/pydantic/events.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/l0/pydantic/events.py b/src/l0/pydantic/events.py index c7eae2f..fff0252 100644 --- a/src/l0/pydantic/events.py +++ b/src/l0/pydantic/events.py @@ -104,8 +104,9 @@ class ObservabilityEventModel(BaseModel): """Pydantic model for observability event.""" model_config = ConfigDict( - extra="allow" - ) # Allow extra fields for event-specific data + extra="allow", # Allow extra fields for event-specific data + populate_by_name=True, # Accept both "ts" and "timestamp" + ) type: ObservabilityEventTypeModel ts: float = Field(alias="timestamp")