Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES/10889.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Updated ``Content-Type`` header parsing to return ``application/octet-stream`` when header contains invalid syntax.
See :rfc:`9110#section-8.3-5`.

-- by :user:`sgaist`.
1 change: 1 addition & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ Roman Postnov
Rong Zhang
Samir Akarioh
Samuel Colvin
Samuel Gaist
Sean Hunt
Sebastian Acuna
Sebastian Hanula
Expand Down
32 changes: 30 additions & 2 deletions aiohttp/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from collections import namedtuple
from collections.abc import Callable, Iterable, Iterator, Mapping
from contextlib import suppress
from email.message import EmailMessage
from email.parser import HeaderParser
from email.policy import HTTP
from email.utils import parsedate
from http.cookies import SimpleCookie
from math import ceil
Expand Down Expand Up @@ -356,14 +358,40 @@ def parse_mimetype(mimetype: str) -> MimeType:
)


class EnsureOctetStream(EmailMessage):
def __init__(self) -> None:
super().__init__()
# https://www.rfc-editor.org/rfc/rfc9110#section-8.3-5
self.set_default_type("application/octet-stream")

def get_content_type(self) -> Any:
"""Re-implementation from Message

Returns application/octet-stream in place of plain/text when
value is wrong.

The way this class is used guarantees that content-type will
be present so simplify the checks wrt to the base implementation.
"""
value = self.get("content-type", "").lower()

# Based on the implementation of _splitparam in the standard library
ctype, _, _ = value.partition(";")
ctype = ctype.strip()
if ctype.count("/") != 1:
return self.get_default_type()
return ctype


@functools.lru_cache(maxsize=56)
def parse_content_type(raw: str) -> tuple[str, MappingProxyType[str, str]]:
"""Parse Content-Type header.

Returns a tuple of the parsed content type and a
MappingProxyType of parameters.
MappingProxyType of parameters. The default returned value
is `application/octet-stream`
"""
msg = HeaderParser().parsestr(f"Content-Type: {raw}")
msg = HeaderParser(EnsureOctetStream, policy=HTTP).parsestr(f"Content-Type: {raw}")
content_type = msg.get_content_type()
params = msg.get_params(())
content_dict = dict(params[1:]) # First element is content type again
Expand Down
12 changes: 5 additions & 7 deletions docs/client_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1550,16 +1550,14 @@ Response object

.. note::

Returns value is ``'application/octet-stream'`` if no
Content-Type header present in HTTP headers according to
:rfc:`9110`. If the *Content-Type* header is invalid (e.g., ``jpg``
instead of ``image/jpeg``), the value is ``text/plain`` by default
according to :rfc:`2045`. To see the original header check
``resp.headers['CONTENT-TYPE']``.
Returns ``'application/octet-stream'`` if no Content-Type header
is present or the value contains invalid syntax according to
:rfc:`9110`. To see the original header check
``resp.headers["Content-Type"]``.

To make sure Content-Type header is not present in
the server reply, use :attr:`headers` or :attr:`raw_headers`, e.g.
``'CONTENT-TYPE' not in resp.headers``.
``'Content-Type' not in resp.headers``.

.. attribute:: charset

Expand Down
2 changes: 1 addition & 1 deletion requirements/constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ jinja2==3.1.6
# via
# sphinx
# towncrier
markdown-it-py==3.0.0
markdown-it-py==4.0.0
# via rich
markupsafe==3.0.3
# via jinja2
Expand Down
2 changes: 1 addition & 1 deletion requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ jinja2==3.1.6
# via
# sphinx
# towncrier
markdown-it-py==3.0.0
markdown-it-py==4.0.0
# via rich
markupsafe==3.0.3
# via jinja2
Expand Down
2 changes: 1 addition & 1 deletion requirements/lint.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ iniconfig==2.1.0
# via pytest
isal==1.7.2
# via -r requirements/lint.in
markdown-it-py==3.0.0
markdown-it-py==4.0.0
# via rich
mdurl==0.1.2
# via markdown-it-py
Expand Down
2 changes: 1 addition & 1 deletion requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ iniconfig==2.1.0
# via pytest
isal==1.7.2 ; python_version < "3.14"
# via -r requirements/test-common.in
markdown-it-py==3.0.0
markdown-it-py==4.0.0
# via rich
mdurl==0.1.2
# via markdown-it-py
Expand Down
25 changes: 25 additions & 0 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from collections.abc import Iterator
from math import ceil, modf
from pathlib import Path
from types import MappingProxyType
from unittest import mock
from urllib.request import getproxies_environment

Expand Down Expand Up @@ -81,6 +82,30 @@ def test_parse_mimetype(mimetype: str, expected: helpers.MimeType) -> None:
assert result == expected


# ------------------- parse_content_type ------------------------------


@pytest.mark.parametrize(
"content_type, expected",
[
(
"text/plain",
("text/plain", MultiDictProxy(MultiDict())),
),
(
"wrong",
("application/octet-stream", MultiDictProxy(MultiDict())),
),
],
)
def test_parse_content_type(
content_type: str, expected: tuple[str, MappingProxyType[str, str]]
) -> None:
result = helpers.parse_content_type(content_type)

assert result == expected


# ------------------- guess_filename ----------------------------------


Expand Down
4 changes: 2 additions & 2 deletions tests/test_web_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,10 +1023,10 @@ def test_ctor_content_type_with_extra() -> None:
assert resp.headers["content-type"] == "text/plain; version=0.0.4; charset=utf-8"


def test_invalid_content_type_parses_to_text_plain() -> None:
def test_invalid_content_type_parses_to_application_octect_stream() -> None:
resp = web.Response(text="test test", content_type="jpeg")

assert resp.content_type == "text/plain"
assert resp.content_type == "application/octet-stream"
assert resp.headers["content-type"] == "jpeg; charset=utf-8"


Expand Down
Loading