tj-python · pull · Oct 15, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/CHANGES/10889.bugfix.rst b/CHANGES/10889.bugfix.rst
@@ -0,0 +1,4 @@
+Updated ``Content-Type`` header parsing to return ``application/octet-stream`` when header contains invalid syntax.
+See :rfc:`9110#section-8.3-5`.
+
+-- by :user:`sgaist`.
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
@@ -320,6 +320,7 @@ Roman Postnov
 Rong Zhang
 Samir Akarioh
 Samuel Colvin
+Samuel Gaist
 Sean Hunt
 Sebastian Acuna
 Sebastian Hanula

diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py
@@ -20,7 +20,9 @@
 from collections import namedtuple
 from collections.abc import Callable, Iterable, Iterator, Mapping
 from contextlib import suppress
+from email.message import EmailMessage
 from email.parser import HeaderParser
+from email.policy import HTTP
 from email.utils import parsedate
 from http.cookies import SimpleCookie
 from math import ceil
@@ -356,14 +358,40 @@ def parse_mimetype(mimetype: str) -> MimeType:
     )
 
 
+class EnsureOctetStream(EmailMessage):
+    def __init__(self) -> None:
+        super().__init__()
+        # https://www.rfc-editor.org/rfc/rfc9110#section-8.3-5
+        self.set_default_type("application/octet-stream")
+
+    def get_content_type(self) -> Any:
+        """Re-implementation from Message
+
+        Returns application/octet-stream in place of plain/text when
+        value is wrong.
+
+        The way this class is used guarantees that content-type will
+        be present so simplify the checks wrt to the base implementation.
+        """
+        value = self.get("content-type", "").lower()
+
+        # Based on the implementation of _splitparam in the standard library
+        ctype, _, _ = value.partition(";")
+        ctype = ctype.strip()
+        if ctype.count("/") != 1:
+            return self.get_default_type()
+        return ctype
+
+
 @functools.lru_cache(maxsize=56)
 def parse_content_type(raw: str) -> tuple[str, MappingProxyType[str, str]]:
     """Parse Content-Type header.
 
     Returns a tuple of the parsed content type and a
-    MappingProxyType of parameters.
+    MappingProxyType of parameters. The default returned value
+    is `application/octet-stream`
     """
-    msg = HeaderParser().parsestr(f"Content-Type: {raw}")
+    msg = HeaderParser(EnsureOctetStream, policy=HTTP).parsestr(f"Content-Type: {raw}")
     content_type = msg.get_content_type()
     params = msg.get_params(())
     content_dict = dict(params[1:])  # First element is content type again

diff --git a/docs/client_reference.rst b/docs/client_reference.rst
@@ -1550,16 +1550,14 @@ Response object
 
       .. note::
 
-         Returns value is ``'application/octet-stream'`` if no
-         Content-Type header present in HTTP headers according to
-         :rfc:`9110`. If the *Content-Type* header is invalid (e.g., ``jpg``
-         instead of ``image/jpeg``), the value is ``text/plain`` by default
-         according to :rfc:`2045`. To see the original header check
-         ``resp.headers['CONTENT-TYPE']``.
+         Returns ``'application/octet-stream'`` if no Content-Type header
+         is present or the value contains invalid syntax according to
+         :rfc:`9110`. To see the original header check
+         ``resp.headers["Content-Type"]``.
 
          To make sure Content-Type header is not present in
          the server reply, use :attr:`headers` or :attr:`raw_headers`, e.g.
-         ``'CONTENT-TYPE' not in resp.headers``.
+         ``'Content-Type' not in resp.headers``.
 
    .. attribute:: charset
 

diff --git a/requirements/constraints.txt b/requirements/constraints.txt
@@ -109,7 +109,7 @@ jinja2==3.1.6
     # via
     #   sphinx
     #   towncrier
-markdown-it-py==3.0.0
+markdown-it-py==4.0.0
     # via rich
 markupsafe==3.0.3
     # via jinja2

diff --git a/requirements/dev.txt b/requirements/dev.txt
@@ -107,7 +107,7 @@ jinja2==3.1.6
     # via
     #   sphinx
     #   towncrier
-markdown-it-py==3.0.0
+markdown-it-py==4.0.0
     # via rich
 markupsafe==3.0.3
     # via jinja2

diff --git a/requirements/lint.txt b/requirements/lint.txt
@@ -43,7 +43,7 @@ iniconfig==2.1.0
     # via pytest
 isal==1.7.2
     # via -r requirements/lint.in
-markdown-it-py==3.0.0
+markdown-it-py==4.0.0
     # via rich
 mdurl==0.1.2
     # via markdown-it-py

diff --git a/requirements/test.txt b/requirements/test.txt
@@ -55,7 +55,7 @@ iniconfig==2.1.0
     # via pytest
 isal==1.7.2 ; python_version < "3.14"
     # via -r requirements/test-common.in
-markdown-it-py==3.0.0
+markdown-it-py==4.0.0
     # via rich
 mdurl==0.1.2
     # via markdown-it-py

diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -7,6 +7,7 @@
 from collections.abc import Iterator
 from math import ceil, modf
 from pathlib import Path
+from types import MappingProxyType
 from unittest import mock
 from urllib.request import getproxies_environment
 
@@ -81,6 +82,30 @@ def test_parse_mimetype(mimetype: str, expected: helpers.MimeType) -> None:
     assert result == expected
 
 
+# ------------------- parse_content_type ------------------------------
+
+
+@pytest.mark.parametrize(
+    "content_type, expected",
+    [
+        (
+            "text/plain",
+            ("text/plain", MultiDictProxy(MultiDict())),
+        ),
+        (
+            "wrong",
+            ("application/octet-stream", MultiDictProxy(MultiDict())),
+        ),
+    ],
+)
+def test_parse_content_type(
+    content_type: str, expected: tuple[str, MappingProxyType[str, str]]
+) -> None:
+    result = helpers.parse_content_type(content_type)
+
+    assert result == expected
+
+
 # ------------------- guess_filename ----------------------------------
 
 

diff --git a/tests/test_web_response.py b/tests/test_web_response.py
@@ -1023,10 +1023,10 @@ def test_ctor_content_type_with_extra() -> None:
     assert resp.headers["content-type"] == "text/plain; version=0.0.4; charset=utf-8"
 
 
-def test_invalid_content_type_parses_to_text_plain() -> None:
+def test_invalid_content_type_parses_to_application_octect_stream() -> None:
     resp = web.Response(text="test test", content_type="jpeg")
 
-    assert resp.content_type == "text/plain"
+    assert resp.content_type == "application/octet-stream"
     assert resp.headers["content-type"] == "jpeg; charset=utf-8"