From a8aee9bb9f07322333c12fdb2b9e675c6307b4d3 Mon Sep 17 00:00:00 2001 From: LUCAS KIM Date: Fri, 30 Jan 2026 18:57:55 -0800 Subject: [PATCH] fix: preserve non-ASCII text in MCP JSON serialization (#6995) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary This PR fixes MCP JSON serialization so that non-ASCII text (for example, Japanese) is preserved instead of being escaped as `\uXXXX`. ## Changes - Added an internal helper `_to_json_str` in `autogen_ext.tools.mcp._base` that calls `json.dumps(..., ensure_ascii=False, indent=indent)`. - Updated `McpToolAdapter.return_value_as_string` to use `_to_json_str` for serializing tool return values. - Updated `StreamElicitor.elicit` in `_host/_elicitation.py` to use `_to_json_str` when rendering the requested schema for the user. - Added `tests/tools/test_mcp_serialization.py` to verify that Japanese text is not escaped. ## Tests - `uv run pytest tests/tools/test_mcp_serialization.py` ✓ - `uv run pytest` for `autogen-ext` fails on optional integrations (`chromadb`, `mem0`, `redisvl`, `torch`, `ollama`, `semantic_kernel`, `graphrag`, `json_schema_to_pydantic`) that are not installed in my local environment, but the new MCP tests pass. --- .../src/autogen_ext/tools/mcp/_base.py | 6 +++- .../tools/mcp/_host/_elicitation.py | 4 ++- .../tests/tools/test_mcp_serialization.py | 34 +++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 python/packages/autogen-ext/tests/tools/test_mcp_serialization.py diff --git a/python/packages/autogen-ext/src/autogen_ext/tools/mcp/_base.py b/python/packages/autogen-ext/src/autogen_ext/tools/mcp/_base.py index 314dbe15e7ba..cad75abb6361 100644 --- a/python/packages/autogen-ext/src/autogen_ext/tools/mcp/_base.py +++ b/python/packages/autogen-ext/src/autogen_ext/tools/mcp/_base.py @@ -194,4 +194,8 @@ def serialize_item(item: Any) -> dict[str, Any]: else: return {} - return json.dumps([serialize_item(item) for item in value]) + return _to_json_str([serialize_item(item) for item in value]) + + +def _to_json_str(obj: Any, indent: int | None = None) -> str: + return json.dumps(obj, ensure_ascii=False, indent=indent) diff --git a/python/packages/autogen-ext/src/autogen_ext/tools/mcp/_host/_elicitation.py b/python/packages/autogen-ext/src/autogen_ext/tools/mcp/_host/_elicitation.py index 373f995c231d..e5eda7cfd9c1 100644 --- a/python/packages/autogen-ext/src/autogen_ext/tools/mcp/_host/_elicitation.py +++ b/python/packages/autogen-ext/src/autogen_ext/tools/mcp/_host/_elicitation.py @@ -12,6 +12,8 @@ from mcp import types as mcp_types +from .._base import _to_json_str + _ELICITATION_CHOICE_SHORTHANDS = {"a": "accept", "d": "decline", "c": "cancel"} @@ -82,7 +84,7 @@ async def elicit(self, params: mcp_types.ElicitRequestParams) -> mcp_types.Elici prompt = "\n".join( [ "Input Schema:", - json.dumps(params.requestedSchema, indent=2), + _to_json_str(params.requestedSchema, indent=2), "Please enter a JSON string following the above schema: ", ] ) diff --git a/python/packages/autogen-ext/tests/tools/test_mcp_serialization.py b/python/packages/autogen-ext/tests/tools/test_mcp_serialization.py new file mode 100644 index 000000000000..749bf691a39c --- /dev/null +++ b/python/packages/autogen-ext/tests/tools/test_mcp_serialization.py @@ -0,0 +1,34 @@ +import pytest +from autogen_ext.tools.mcp._base import _to_json_str +from autogen_ext.tools.mcp._stdio import StdioMcpToolAdapter +from mcp.types import TextContent, Tool + + +def test_to_json_str_no_escape_japanese() -> None: + """Verify that Japanese text is not escaped in _to_json_str.""" + data = {"text": "日本語"} + serialized = _to_json_str(data) + + # Check that the literal Japanese characters are in the string + assert "日本語" in serialized + # Check that the escaped unicode sequence is NOT in the string + assert "\\u65e5\\u672c\\u8a9e" not in serialized + + +def test_mcp_tool_adapter_return_value_no_escape_japanese() -> None: + """Verify that McpToolAdapter.return_value_as_string does not escape Japanese text.""" + # Mock parameters + from autogen_ext.tools.mcp import StdioServerParams + + server_params = StdioServerParams(command="echo", args=["test"]) + tool = Tool(name="test_tool", description="A test tool", inputSchema={"type": "object", "properties": {}}) + + adapter = StdioMcpToolAdapter(server_params=server_params, tool=tool) + + content = [TextContent(type="text", text="日本語")] + serialized = adapter.return_value_as_string(content) + + # Check that the literal Japanese characters are in the string + assert "日本語" in serialized + # Check that the escaped unicode sequence is NOT in the string + assert "\\u65e5\\u672c\\u8a9e" not in serialized