From 1f32952d0066a9dc1ff1482cef48c3cbe0acb663 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 10:45:45 +0100 Subject: [PATCH 1/4] fix(ai): redact message parts content of type blob --- sentry_sdk/ai/utils.py | 51 +++++++++++++++++ tests/test_ai_monitoring.py | 106 +++++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 1d2b4483c9..73155b0305 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -5,6 +5,8 @@ from sys import getsizeof from typing import TYPE_CHECKING +from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE + if TYPE_CHECKING: from typing import Any, Callable, Dict, List, Optional, Tuple @@ -141,6 +143,53 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> return 0 +def redact_blob_message_parts(messages): + # type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int] + """ + Redact blob message parts from the messages, by removing the "content" key. + e.g: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "data:image/jpeg;base64,..." + } + ] + } + becomes: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "[Filtered]" + } + ] + } + """ + + for message in messages: + content = message.get("content") + if isinstance(content, list): + for item in content: + if item.get("type") == "blob": + item["content"] = SENSITIVE_DATA_SUBSTITUTE + return messages + + def truncate_messages_by_size( messages: "List[Dict[str, Any]]", max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, @@ -186,6 +235,8 @@ def truncate_and_annotate_messages( if not messages: return None + messages = redact_blob_message_parts(messages) + truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes) if removed_count > 0: scope._gen_ai_original_message_count[span.span_id] = len(messages) diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index 8d3d4ba204..e9f3712cd3 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -4,7 +4,7 @@ import pytest import sentry_sdk -from sentry_sdk._types import AnnotatedValue +from sentry_sdk._types import AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE from sentry_sdk.ai.monitoring import ai_track from sentry_sdk.ai.utils import ( MAX_GEN_AI_MESSAGE_BYTES, @@ -13,6 +13,7 @@ truncate_and_annotate_messages, truncate_messages_by_size, _find_truncation_index, + redact_blob_message_parts, ) from sentry_sdk.serializer import serialize from sentry_sdk.utils import safe_serialize @@ -542,3 +543,106 @@ def __init__(self): assert isinstance(messages_value, AnnotatedValue) assert messages_value.metadata["len"] == stored_original_length assert len(messages_value.value) == len(truncated_messages) + + +class TestRedactBlobMessageParts: + def test_redacts_single_blob_content(self): + """Test that blob content is redacted in a message with single blob part""" + messages = [ + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text", + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "data:image/jpeg;base64,/9j/4AAQSkZJRg==", + }, + ], + } + ] + + result = redact_blob_message_parts(messages) + + assert result == messages # Returns the same list + assert ( + messages[0]["content"][0]["text"] + == "How many ponies do you see in the image?" + ) + assert messages[0]["content"][0]["type"] == "text" + assert messages[0]["content"][1]["type"] == "blob" + assert messages[0]["content"][1]["modality"] == "image" + assert messages[0]["content"][1]["mime_type"] == "image/jpeg" + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + + def test_redacts_multiple_blob_parts(self): + """Test that multiple blob parts in a single message are all redacted""" + messages = [ + { + "role": "user", + "content": [ + {"text": "Compare these images", "type": "text"}, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "data:image/jpeg;base64,first_image", + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "data:image/png;base64,second_image", + }, + ], + } + ] + + result = redact_blob_message_parts(messages) + + assert result == messages + assert messages[0]["content"][0]["text"] == "Compare these images" + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + assert messages[0]["content"][2]["content"] == SENSITIVE_DATA_SUBSTITUTE + + def test_redacts_blobs_in_multiple_messages(self): + """Test that blob parts are redacted across multiple messages""" + messages = [ + { + "role": "user", + "content": [ + {"text": "First message", "type": "text"}, + { + "type": "blob", + "modality": "image", + "content": "data:image/jpeg;base64,first", + }, + ], + }, + { + "role": "assistant", + "content": "I see the image.", + }, + { + "role": "user", + "content": [ + {"text": "Second message", "type": "text"}, + { + "type": "blob", + "modality": "image", + "content": "data:image/jpeg;base64,second", + }, + ], + }, + ] + + result = redact_blob_message_parts(messages) + + assert result == messages + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + assert messages[1]["content"] == "I see the image." # Unchanged + assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE From 795bcea241f7777e646a4da14c870a3049bdbe90 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 11:05:04 +0100 Subject: [PATCH 2/4] fix(ai): skip non dict messages --- sentry_sdk/ai/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 73155b0305..ae507e898b 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -182,6 +182,9 @@ def redact_blob_message_parts(messages): """ for message in messages: + if not isinstance(message, dict): + continue + content = message.get("content") if isinstance(content, list): for item in content: From a623e137d26e982c0d85258256c0ba013f9ecb24 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 11:21:43 +0100 Subject: [PATCH 3/4] fix(ai): typing --- sentry_sdk/ai/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index ae507e898b..1b61c7a113 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -143,8 +143,9 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> return 0 -def redact_blob_message_parts(messages): - # type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int] +def redact_blob_message_parts( + messages: "List[Dict[str, Any]]", +) -> "List[Dict[str, Any]]": """ Redact blob message parts from the messages, by removing the "content" key. e.g: From 3d3ce5bbdca43f14194edbbbee11d3b6dcd6d8a3 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 11:37:12 +0100 Subject: [PATCH 4/4] fix(ai): content items may not be dicts --- sentry_sdk/ai/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 1b61c7a113..78a64ab737 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -189,7 +189,7 @@ def redact_blob_message_parts( content = message.get("content") if isinstance(content, list): for item in content: - if item.get("type") == "blob": + if isinstance(item, dict) and item.get("type") == "blob": item["content"] = SENSITIVE_DATA_SUBSTITUTE return messages