From 1e531ca6f2b0197eb0b4b1e0bb2a0bbbdb4195ad Mon Sep 17 00:00:00 2001
From: Sean-Kenneth-Doherty <smaster7772@gmail.com>
Date: Fri, 30 Jan 2026 22:38:44 -0600
Subject: [PATCH] fix: Image Pydantic schema to properly handle Union[str,
 Image] deserialization (#7170)

## Problem
When UserMessage.content contains both string and Image in a list (e.g.,
), JSON deserialization fails with:
"Expected dict or Image instance, got <class 'str'>"

## Root Cause
The Image class's  used ,
which accepts any input type. When Pydantic validates Union[str, Image], it
would try the Image validator on strings before trying the str type, causing
the validation to fail.

## Solution
Changed the Image schema to use  with explicit types:
1.  - for Image instances (pass through)
2.  with validator - for JSON dicts with 'data' key

This ensures the Image validator only processes dict/Image inputs, allowing
strings to be handled by the str type in Union[str, Image].

## Tests
Added comprehensive test file with 5 test cases covering:
- String-only content
- Image-only content
- Mixed content (Image + string) - the exact bug scenario from #7170
- String before Image
- Multiple strings and images interleaved

All existing serialization tests continue to pass.
---
 .../autogen-core/src/autogen_core/_image.py   |  34 +++--
 .../tests/test_image_mixed_content.py         | 124 ++++++++++++++++++
 2 files changed, 144 insertions(+), 14 deletions(-)
 create mode 100644 python/packages/autogen-core/tests/test_image_mixed_content.py
diff --git a/python/packages/autogen-core/src/autogen_core/_image.py b/python/packages/autogen-core/src/autogen_core/_image.py
index e24dfaa6bcd9..99b7d2a1b5ec 100644
--- a/python/packages/autogen-core/src/autogen_core/_image.py
+++ b/python/packages/autogen-core/src/autogen_core/_image.py
@@ -84,25 +84,31 @@ def to_openai_format(self, detail: Literal["auto", "low", "high"] = "auto") -> D
 
     @classmethod
     def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHandler) -> core_schema.CoreSchema:
-        # Custom validation
-        def validate(value: Any, validation_info: ValidationInfo) -> Image:
-            if isinstance(value, dict):
-                base_64 = cast(str | None, value.get("data"))  # type: ignore
-                if base_64 is None:
-                    raise ValueError("Expected 'data' key in the dictionary")
-                return cls.from_base64(base_64)
-            elif isinstance(value, cls):
-                return value
-            else:
-                raise TypeError(f"Expected dict or {cls.__name__} instance, got {type(value)}")
+        # Custom validation for dict input (from JSON deserialization)
+        def validate_from_dict(value: dict[str, Any]) -> Image:
+            base_64 = cast(str | None, value.get("data"))
+            if base_64 is None:
+                raise ValueError("Expected 'data' key in the dictionary")
+            return cls.from_base64(base_64)
 
         # Custom serialization
         def serialize(value: Image) -> dict[str, Any]:
             return {"data": value.to_base64()}
 
-        return core_schema.with_info_after_validator_function(
-            validate,
-            core_schema.any_schema(),  # Accept any type; adjust if needed
+        # Use a union schema that explicitly handles:
+        # 1. Image instances (pass through)
+        # 2. Dict with 'data' key (deserialize from JSON)
+        # This prevents the validator from being called on strings in Union[str, Image]
+        return core_schema.union_schema(
+            [
+                # First, check if it's already an Image instance
+                core_schema.is_instance_schema(cls),
+                # Then, check if it's a dict and validate/convert it
+                core_schema.no_info_after_validator_function(
+                    validate_from_dict,
+                    core_schema.dict_schema(),
+                ),
+            ],
             serialization=core_schema.plain_serializer_function_ser_schema(serialize),
         )
 
diff --git a/python/packages/autogen-core/tests/test_image_mixed_content.py b/python/packages/autogen-core/tests/test_image_mixed_content.py
new file mode 100644
index 000000000000..d0607da95377
--- /dev/null
+++ b/python/packages/autogen-core/tests/test_image_mixed_content.py
@@ -0,0 +1,124 @@
+"""Test for issue #7170 - UserMessage with mixed string and Image content deserialization."""
+
+import pytest
+from autogen_core import Image
+from autogen_core.models import UserMessage
+
+
+class TestImageMixedContentDeserialization:
+    """Tests for UserMessage with mixed string and Image content."""
+
+    def test_user_message_with_string_only(self) -> None:
+        """Test UserMessage with string content serialization/deserialization."""
+        msg = UserMessage(content="Hello world", source="user")
+        json_str = msg.model_dump_json()
+        restored = UserMessage.model_validate_json(json_str)
+        assert restored.content == "Hello world"
+        assert restored.source == "user"
+
+    def test_user_message_with_image_only(self) -> None:
+        """Test UserMessage with Image only in list."""
+        # Create a small test image (1x1 red pixel PNG)
+        import base64
+        from io import BytesIO
+        from PIL import Image as PILImage
+
+        # Create a 1x1 red image
+        pil_img = PILImage.new("RGB", (1, 1), color="red")
+        buffered = BytesIO()
+        pil_img.save(buffered, format="PNG")
+        base64_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+
+        img = Image.from_base64(base64_str)
+        msg = UserMessage(content=[img], source="user")
+        json_str = msg.model_dump_json()
+        restored = UserMessage.model_validate_json(json_str)
+
+        assert isinstance(restored.content, list)
+        assert len(restored.content) == 1
+        assert isinstance(restored.content[0], Image)
+
+    def test_user_message_with_mixed_content(self) -> None:
+        """Test UserMessage with both string and Image content - issue #7170."""
+        import base64
+        from io import BytesIO
+        from PIL import Image as PILImage
+
+        # Create a 1x1 red image
+        pil_img = PILImage.new("RGB", (1, 1), color="red")
+        buffered = BytesIO()
+        pil_img.save(buffered, format="PNG")
+        base64_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+
+        img = Image.from_base64(base64_str)
+
+        # This is the exact case from issue #7170
+        msg = UserMessage(content=[img, "Please describe this image"], source="user")
+        json_str = msg.model_dump_json()
+
+        # This was failing before the fix with:
+        # "Expected dict or Image instance, got <class 'str'>"
+        restored = UserMessage.model_validate_json(json_str)
+
+        assert isinstance(restored.content, list)
+        assert len(restored.content) == 2
+        assert isinstance(restored.content[0], Image)
+        assert restored.content[1] == "Please describe this image"
+
+    def test_user_message_with_string_first_then_image(self) -> None:
+        """Test UserMessage with string before Image in list."""
+        import base64
+        from io import BytesIO
+        from PIL import Image as PILImage
+
+        # Create a 1x1 blue image
+        pil_img = PILImage.new("RGB", (1, 1), color="blue")
+        buffered = BytesIO()
+        pil_img.save(buffered, format="PNG")
+        base64_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+
+        img = Image.from_base64(base64_str)
+        msg = UserMessage(content=["What is in this image?", img], source="user")
+        json_str = msg.model_dump_json()
+        restored = UserMessage.model_validate_json(json_str)
+
+        assert isinstance(restored.content, list)
+        assert len(restored.content) == 2
+        assert restored.content[0] == "What is in this image?"
+        assert isinstance(restored.content[1], Image)
+
+    def test_user_message_with_multiple_strings_and_images(self) -> None:
+        """Test UserMessage with multiple strings and images."""
+        import base64
+        from io import BytesIO
+        from PIL import Image as PILImage
+
+        # Create two different images
+        pil_img1 = PILImage.new("RGB", (1, 1), color="red")
+        buffered1 = BytesIO()
+        pil_img1.save(buffered1, format="PNG")
+        img1 = Image.from_base64(base64.b64encode(buffered1.getvalue()).decode("utf-8"))
+
+        pil_img2 = PILImage.new("RGB", (1, 1), color="green")
+        buffered2 = BytesIO()
+        pil_img2.save(buffered2, format="PNG")
+        img2 = Image.from_base64(base64.b64encode(buffered2.getvalue()).decode("utf-8"))
+
+        msg = UserMessage(
+            content=["First text", img1, "Second text", img2, "Third text"],
+            source="user"
+        )
+        json_str = msg.model_dump_json()
+        restored = UserMessage.model_validate_json(json_str)
+
+        assert isinstance(restored.content, list)
+        assert len(restored.content) == 5
+        assert restored.content[0] == "First text"
+        assert isinstance(restored.content[1], Image)
+        assert restored.content[2] == "Second text"
+        assert isinstance(restored.content[3], Image)
+        assert restored.content[4] == "Third text"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])