From 1cce9f58743ad928138ee2aabd4a31f61c96ac1c Mon Sep 17 00:00:00 2001 From: Harikrishna KP Date: Thu, 5 Feb 2026 22:13:42 +0530 Subject: [PATCH] fix(core): preserve $defs array/primitive types in schema_to_pydantic_model When `$defs` contains non-object type definitions (arrays, primitives), the schema_to_pydantic_model function was incorrectly creating empty BaseModel classes instead of preserving the original type information. This caused schemas with type aliases like: `type TaskData = list[str]` to be converted from: `{"type": "array", "items": {"type": "string"}}` to: `{"type": "object", "properties": {}}` The fix adds a separate `_type_cache` to store non-object type definitions and a `_schema_to_python_type` method to properly convert them to Python types. The `get_ref` method now checks both caches. Fixes #7203 Signed-off-by: Harikrishna KP --- .../autogen_core/utils/_json_to_pydantic.py | 57 ++++++++- .../tests/test_json_to_pydantic.py | 112 ++++++++++++++++++ 2 files changed, 163 insertions(+), 6 deletions(-) diff --git a/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py b/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py index e881d151a9fd..3b116c0c93c8 100644 --- a/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py +++ b/python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py @@ -105,6 +105,8 @@ def _make_field( class _JSONSchemaToPydantic: def __init__(self) -> None: self._model_cache: Dict[str, Optional[Union[Type[BaseModel], ForwardRef]]] = {} + # Cache for non-object types in $defs (arrays, primitives, etc.) + self._type_cache: Dict[str, Any] = {} def _resolve_ref(self, ref: str, schema: Dict[str, Any]) -> Dict[str, Any]: ref_key = ref.split("/")[-1] @@ -118,6 +120,10 @@ def _resolve_ref(self, ref: str, schema: Dict[str, Any]) -> Dict[str, Any]: return definitions[ref_key] def get_ref(self, ref_name: str) -> Any: + # Check type cache first for non-object types (arrays, primitives) + if ref_name in self._type_cache: + return self._type_cache[ref_name] + if ref_name not in self._model_cache: raise ReferenceNotFoundError( f"Reference `{ref_name}` not found in cache. Available: {list(self._model_cache.keys())}" @@ -141,13 +147,52 @@ def _get_item_model_name(self, array_field_name: str, parent_model_name: str) -> def _process_definitions(self, root_schema: Dict[str, Any]) -> None: if "$defs" in root_schema: - for model_name in root_schema["$defs"]: - if model_name not in self._model_cache: - self._model_cache[model_name] = None + # First pass: register all definition names + for def_name in root_schema["$defs"]: + if def_name not in self._model_cache: + self._model_cache[def_name] = None + + # Second pass: process each definition + for def_name, def_schema in root_schema["$defs"].items(): + schema_type = def_schema.get("type") + + # Handle non-object types (arrays, primitives) - don't create BaseModel + if schema_type is not None and schema_type != "object": + self._type_cache[def_name] = self._schema_to_python_type(def_schema, def_name, root_schema) + # Remove from model_cache since it's not a model + if def_name in self._model_cache: + del self._model_cache[def_name] + elif self._model_cache.get(def_name) is None: + # Object type - create a BaseModel + self._model_cache[def_name] = self.json_schema_to_pydantic(def_schema, def_name, root_schema) + + def _schema_to_python_type(self, schema: Dict[str, Any], name: str, root_schema: Dict[str, Any]) -> Any: + """Convert a JSON Schema to a Python type (for non-object $defs).""" + schema_type = schema.get("type") + + if schema_type == "array": + item_schema = schema.get("items", {"type": "string"}) + if "$ref" in item_schema: + item_type = self.get_ref(item_schema["$ref"].split("/")[-1]) + elif item_schema.get("type") == "object" and "properties" in item_schema: + item_type = self._json_schema_to_model(item_schema, f"{name}_Item", root_schema) + else: + item_type_name = item_schema.get("type", "string") + item_type = TYPE_MAPPING.get(item_type_name, str) + + constraints: Dict[str, Any] = {} + if "minItems" in schema: + constraints["min_length"] = schema["minItems"] + if "maxItems" in schema: + constraints["max_length"] = schema["maxItems"] + + return conlist(item_type, **constraints) if constraints else List[item_type] # type: ignore[valid-type] + + elif schema_type in TYPE_MAPPING: + return TYPE_MAPPING[schema_type] - for model_name, model_schema in root_schema["$defs"].items(): - if self._model_cache[model_name] is None: - self._model_cache[model_name] = self.json_schema_to_pydantic(model_schema, model_name, root_schema) + # Fallback for unknown types + return Any def json_schema_to_pydantic( self, schema: Dict[str, Any], model_name: str = "GeneratedModel", root_schema: Optional[Dict[str, Any]] = None diff --git a/python/packages/autogen-core/tests/test_json_to_pydantic.py b/python/packages/autogen-core/tests/test_json_to_pydantic.py index 0efad58b4ebc..126dabff8274 100644 --- a/python/packages/autogen-core/tests/test_json_to_pydantic.py +++ b/python/packages/autogen-core/tests/test_json_to_pydantic.py @@ -1042,3 +1042,115 @@ def test_nested_arrays_with_object_schemas() -> None: assert alice.name == "Alice" # type: ignore[attr-defined] assert alice.role == "Senior Developer" # type: ignore[attr-defined] assert alice.skills == ["Python", "JavaScript", "Docker"] # type: ignore[attr-defined] + + +def test_defs_array_type_preserved() -> None: + """Test that $defs with array types are preserved correctly. + + Regression test for issue #7203: schema_to_pydantic_model was converting + array type definitions in $defs to empty object models. + + See: https://github.com/microsoft/autogen/issues/7203 + """ + from autogen_core.utils import schema_to_pydantic_model + + # This is the exact schema from the issue report + type_alias_schema = { + "$defs": { + "TaskData": { + "items": {"type": "string"}, + "type": "array", + } + }, + "properties": { + "task_data": { + "$ref": "#/$defs/TaskData", + "description": "The task Data", + } + }, + "required": ["task_data"], + "title": "ToolCallSchema", + "type": "object", + } + + Model = schema_to_pydantic_model(type_alias_schema, "ToolCallSchema") + + # Test that the model works with array data + instance = Model(task_data=["item1", "item2", "item3"]) + assert instance.task_data == ["item1", "item2", "item3"] # type: ignore[attr-defined] + + # Verify the schema preserves array type + generated_schema = Model.model_json_schema() + task_data_prop = generated_schema["properties"]["task_data"] + + # The generated schema should reference an array type, not an empty object + # It may inline the type or use $ref, but should be array-like + if "$ref" in task_data_prop: + ref_key = task_data_prop["$ref"].split("/")[-1] + ref_def = generated_schema.get("$defs", {}).get(ref_key, {}) + # Should NOT be an empty object + assert ref_def.get("type") != "object" or ref_def.get("properties") != {} + else: + # If inlined, should be array type + assert task_data_prop.get("type") == "array" or "items" in task_data_prop + + +def test_defs_primitive_type_preserved() -> None: + """Test that $defs with primitive types (string, integer, etc.) are handled.""" + from autogen_core.utils import schema_to_pydantic_model + + schema = { + "$defs": { + "UserId": {"type": "string"}, + "Count": {"type": "integer"}, + }, + "properties": { + "user_id": {"$ref": "#/$defs/UserId"}, + "count": {"$ref": "#/$defs/Count"}, + }, + "required": ["user_id", "count"], + "title": "SimpleModel", + "type": "object", + } + + Model = schema_to_pydantic_model(schema, "SimpleModel") + + # Test that the model works + instance = Model(user_id="user123", count=42) + assert instance.user_id == "user123" # type: ignore[attr-defined] + assert instance.count == 42 # type: ignore[attr-defined] + + +def test_defs_array_with_constraints() -> None: + """Test that $defs array types with constraints are preserved.""" + from autogen_core.utils import schema_to_pydantic_model + + schema = { + "$defs": { + "TagList": { + "type": "array", + "items": {"type": "string"}, + "minItems": 1, + "maxItems": 5, + } + }, + "properties": { + "tags": {"$ref": "#/$defs/TagList"}, + }, + "required": ["tags"], + "title": "TaggedItem", + "type": "object", + } + + Model = schema_to_pydantic_model(schema, "TaggedItem") + + # Test valid data + instance = Model(tags=["python", "autogen"]) + assert instance.tags == ["python", "autogen"] # type: ignore[attr-defined] + + # Test constraints are enforced + with pytest.raises(ValidationError): + Model(tags=[]) # minItems = 1 + + with pytest.raises(ValidationError): + Model(tags=["a", "b", "c", "d", "e", "f"]) # maxItems = 5