Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ def _make_field(
class _JSONSchemaToPydantic:
def __init__(self) -> None:
self._model_cache: Dict[str, Optional[Union[Type[BaseModel], ForwardRef]]] = {}
# Cache for non-object types in $defs (arrays, primitives, etc.)
self._type_cache: Dict[str, Any] = {}

def _resolve_ref(self, ref: str, schema: Dict[str, Any]) -> Dict[str, Any]:
ref_key = ref.split("/")[-1]
Expand All @@ -118,6 +120,10 @@ def _resolve_ref(self, ref: str, schema: Dict[str, Any]) -> Dict[str, Any]:
return definitions[ref_key]

def get_ref(self, ref_name: str) -> Any:
# Check type cache first for non-object types (arrays, primitives)
if ref_name in self._type_cache:
return self._type_cache[ref_name]

if ref_name not in self._model_cache:
raise ReferenceNotFoundError(
f"Reference `{ref_name}` not found in cache. Available: {list(self._model_cache.keys())}"
Expand All @@ -141,13 +147,52 @@ def _get_item_model_name(self, array_field_name: str, parent_model_name: str) ->

def _process_definitions(self, root_schema: Dict[str, Any]) -> None:
if "$defs" in root_schema:
for model_name in root_schema["$defs"]:
if model_name not in self._model_cache:
self._model_cache[model_name] = None
# First pass: register all definition names
for def_name in root_schema["$defs"]:
if def_name not in self._model_cache:
self._model_cache[def_name] = None

# Second pass: process each definition
for def_name, def_schema in root_schema["$defs"].items():
schema_type = def_schema.get("type")

# Handle non-object types (arrays, primitives) - don't create BaseModel
if schema_type is not None and schema_type != "object":
self._type_cache[def_name] = self._schema_to_python_type(def_schema, def_name, root_schema)
# Remove from model_cache since it's not a model
if def_name in self._model_cache:
del self._model_cache[def_name]
elif self._model_cache.get(def_name) is None:
# Object type - create a BaseModel
self._model_cache[def_name] = self.json_schema_to_pydantic(def_schema, def_name, root_schema)

def _schema_to_python_type(self, schema: Dict[str, Any], name: str, root_schema: Dict[str, Any]) -> Any:
"""Convert a JSON Schema to a Python type (for non-object $defs)."""
schema_type = schema.get("type")

if schema_type == "array":
item_schema = schema.get("items", {"type": "string"})
if "$ref" in item_schema:
item_type = self.get_ref(item_schema["$ref"].split("/")[-1])
elif item_schema.get("type") == "object" and "properties" in item_schema:
item_type = self._json_schema_to_model(item_schema, f"{name}_Item", root_schema)
else:
item_type_name = item_schema.get("type", "string")
item_type = TYPE_MAPPING.get(item_type_name, str)

constraints: Dict[str, Any] = {}
if "minItems" in schema:
constraints["min_length"] = schema["minItems"]
if "maxItems" in schema:
constraints["max_length"] = schema["maxItems"]

return conlist(item_type, **constraints) if constraints else List[item_type] # type: ignore[valid-type]

elif schema_type in TYPE_MAPPING:
return TYPE_MAPPING[schema_type]

for model_name, model_schema in root_schema["$defs"].items():
if self._model_cache[model_name] is None:
self._model_cache[model_name] = self.json_schema_to_pydantic(model_schema, model_name, root_schema)
# Fallback for unknown types
return Any

def json_schema_to_pydantic(
self, schema: Dict[str, Any], model_name: str = "GeneratedModel", root_schema: Optional[Dict[str, Any]] = None
Expand Down
112 changes: 112 additions & 0 deletions python/packages/autogen-core/tests/test_json_to_pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1042,3 +1042,115 @@ def test_nested_arrays_with_object_schemas() -> None:
assert alice.name == "Alice" # type: ignore[attr-defined]
assert alice.role == "Senior Developer" # type: ignore[attr-defined]
assert alice.skills == ["Python", "JavaScript", "Docker"] # type: ignore[attr-defined]


def test_defs_array_type_preserved() -> None:
"""Test that $defs with array types are preserved correctly.

Regression test for issue #7203: schema_to_pydantic_model was converting
array type definitions in $defs to empty object models.

See: https://github.com/microsoft/autogen/issues/7203
"""
from autogen_core.utils import schema_to_pydantic_model

# This is the exact schema from the issue report
type_alias_schema = {
"$defs": {
"TaskData": {
"items": {"type": "string"},
"type": "array",
}
},
"properties": {
"task_data": {
"$ref": "#/$defs/TaskData",
"description": "The task Data",
}
},
"required": ["task_data"],
"title": "ToolCallSchema",
"type": "object",
}

Model = schema_to_pydantic_model(type_alias_schema, "ToolCallSchema")

# Test that the model works with array data
instance = Model(task_data=["item1", "item2", "item3"])
assert instance.task_data == ["item1", "item2", "item3"] # type: ignore[attr-defined]

# Verify the schema preserves array type
generated_schema = Model.model_json_schema()
task_data_prop = generated_schema["properties"]["task_data"]

# The generated schema should reference an array type, not an empty object
# It may inline the type or use $ref, but should be array-like
if "$ref" in task_data_prop:
ref_key = task_data_prop["$ref"].split("/")[-1]
ref_def = generated_schema.get("$defs", {}).get(ref_key, {})
# Should NOT be an empty object
assert ref_def.get("type") != "object" or ref_def.get("properties") != {}
else:
# If inlined, should be array type
assert task_data_prop.get("type") == "array" or "items" in task_data_prop


def test_defs_primitive_type_preserved() -> None:
"""Test that $defs with primitive types (string, integer, etc.) are handled."""
from autogen_core.utils import schema_to_pydantic_model

schema = {
"$defs": {
"UserId": {"type": "string"},
"Count": {"type": "integer"},
},
"properties": {
"user_id": {"$ref": "#/$defs/UserId"},
"count": {"$ref": "#/$defs/Count"},
},
"required": ["user_id", "count"],
"title": "SimpleModel",
"type": "object",
}

Model = schema_to_pydantic_model(schema, "SimpleModel")

# Test that the model works
instance = Model(user_id="user123", count=42)
assert instance.user_id == "user123" # type: ignore[attr-defined]
assert instance.count == 42 # type: ignore[attr-defined]


def test_defs_array_with_constraints() -> None:
"""Test that $defs array types with constraints are preserved."""
from autogen_core.utils import schema_to_pydantic_model

schema = {
"$defs": {
"TagList": {
"type": "array",
"items": {"type": "string"},
"minItems": 1,
"maxItems": 5,
}
},
"properties": {
"tags": {"$ref": "#/$defs/TagList"},
},
"required": ["tags"],
"title": "TaggedItem",
"type": "object",
}

Model = schema_to_pydantic_model(schema, "TaggedItem")

# Test valid data
instance = Model(tags=["python", "autogen"])
assert instance.tags == ["python", "autogen"] # type: ignore[attr-defined]

# Test constraints are enforced
with pytest.raises(ValidationError):
Model(tags=[]) # minItems = 1

with pytest.raises(ValidationError):
Model(tags=["a", "b", "c", "d", "e", "f"]) # maxItems = 5