From 1b9a3c638c219d42a6c0a4f903bf6fdd5ea8c9c1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 2 Jun 2025 22:42:22 +0100 Subject: [PATCH 01/11] Increment version to 3.12.8.dev0 (#11122) --- aiohttp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/__init__.py b/aiohttp/__init__.py index d4ba8ccb488..28981917a0e 100644 --- a/aiohttp/__init__.py +++ b/aiohttp/__init__.py @@ -1,4 +1,4 @@ -__version__ = "3.12.7" +__version__ = "3.12.8.dev0" from typing import TYPE_CHECKING, Tuple From 650592322cc4c7eaea69c76d6bc5546c194b3a2b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Jun 2025 10:53:42 +0000 Subject: [PATCH 02/11] Bump uritemplate from 4.1.1 to 4.2.0 (#11127) Bumps [uritemplate](https://github.com/python-hyper/uritemplate) from 4.1.1 to 4.2.0.
Changelog

Sourced from uritemplate's changelog.

4.2.0 - 2025-06-01

  • Drop support for Python 3.8
  • Fix bug where already url-encoded values were further escaped and encoded (See python-hyper/uritemplate#99)
  • Refactor uritemplate/variable.py to enable fixing the aforementioned bug.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=uritemplate&package-manager=pip&previous-version=4.1.1&new-version=4.2.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements/constraints.txt | 2 +- requirements/dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 314f88f8462..0bc42abfba0 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -278,7 +278,7 @@ typing-extensions==4.13.2 # typing-inspection typing-inspection==0.4.1 # via pydantic -uritemplate==4.1.1 +uritemplate==4.2.0 # via gidgethub urllib3==2.4.0 # via requests diff --git a/requirements/dev.txt b/requirements/dev.txt index 55f1b831543..cdf512edb69 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -269,7 +269,7 @@ typing-extensions==4.13.2 # typing-inspection typing-inspection==0.4.1 # via pydantic -uritemplate==4.1.1 +uritemplate==4.2.0 # via gidgethub urllib3==2.4.0 # via requests From 278fc1ea083e856722c424806653abf567ed6c73 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Wed, 4 Jun 2025 09:51:53 +0100 Subject: [PATCH 03/11] [PR #11129/c0449bb5 backport][3.12] Add preemptive authentication support to DigestAuthMiddleware (#11131) Co-authored-by: J. Nick Koston Fixes #11128 --- CHANGES/11128.feature.rst | 9 + CHANGES/11129.feature.rst | 1 + aiohttp/client_middleware_digest_auth.py | 62 ++- docs/client_reference.rst | 34 +- tests/test_client_middleware_digest_auth.py | 421 ++++++++++++++++++++ 5 files changed, 523 insertions(+), 4 deletions(-) create mode 100644 CHANGES/11128.feature.rst create mode 120000 CHANGES/11129.feature.rst diff --git a/CHANGES/11128.feature.rst b/CHANGES/11128.feature.rst new file mode 100644 index 00000000000..0f99d2b8a11 --- /dev/null +++ b/CHANGES/11128.feature.rst @@ -0,0 +1,9 @@ +Added preemptive digest authentication to :class:`~aiohttp.DigestAuthMiddleware` -- by :user:`bdraco`. + +The middleware now reuses authentication credentials for subsequent requests to the same +protection space, improving efficiency by avoiding extra authentication round trips. +This behavior matches how web browsers handle digest authentication and follows +:rfc:`7616#section-3.6`. + +Preemptive authentication is enabled by default but can be disabled by passing +``preemptive=False`` to the middleware constructor. diff --git a/CHANGES/11129.feature.rst b/CHANGES/11129.feature.rst new file mode 120000 index 00000000000..692d28ba9ce --- /dev/null +++ b/CHANGES/11129.feature.rst @@ -0,0 +1 @@ +11128.feature.rst \ No newline at end of file diff --git a/aiohttp/client_middleware_digest_auth.py b/aiohttp/client_middleware_digest_auth.py index b2daf76e6bb..35f462f180b 100644 --- a/aiohttp/client_middleware_digest_auth.py +++ b/aiohttp/client_middleware_digest_auth.py @@ -38,6 +38,8 @@ class DigestAuthChallenge(TypedDict, total=False): qop: str algorithm: str opaque: str + domain: str + stale: str DigestFunctions: Dict[str, Callable[[bytes], "hashlib._Hash"]] = { @@ -81,13 +83,17 @@ class DigestAuthChallenge(TypedDict, total=False): # RFC 7616: Challenge parameters to extract CHALLENGE_FIELDS: Final[ - Tuple[Literal["realm", "nonce", "qop", "algorithm", "opaque"], ...] + Tuple[ + Literal["realm", "nonce", "qop", "algorithm", "opaque", "domain", "stale"], ... + ] ] = ( "realm", "nonce", "qop", "algorithm", "opaque", + "domain", + "stale", ) # Supported digest authentication algorithms @@ -159,6 +165,7 @@ class DigestAuthMiddleware: - Supports 'auth' and 'auth-int' quality of protection modes - Properly handles quoted strings and parameter parsing - Includes replay attack protection with client nonce count tracking + - Supports preemptive authentication per RFC 7616 Section 3.6 Standards compliance: - RFC 7616: HTTP Digest Access Authentication (primary reference) @@ -175,6 +182,7 @@ def __init__( self, login: str, password: str, + preemptive: bool = True, ) -> None: if login is None: raise ValueError("None is not allowed as login value") @@ -192,6 +200,9 @@ def __init__( self._last_nonce_bytes = b"" self._nonce_count = 0 self._challenge: DigestAuthChallenge = {} + self._preemptive: bool = preemptive + # Set of URLs defining the protection space + self._protection_space: List[str] = [] async def _encode( self, method: str, url: URL, body: Union[Payload, Literal[b""]] @@ -354,6 +365,26 @@ def KD(s: bytes, d: bytes) -> bytes: return f"Digest {', '.join(pairs)}" + def _in_protection_space(self, url: URL) -> bool: + """ + Check if the given URL is within the current protection space. + + According to RFC 7616, a URI is in the protection space if any URI + in the protection space is a prefix of it (after both have been made absolute). + """ + request_str = str(url) + for space_str in self._protection_space: + # Check if request starts with space URL + if not request_str.startswith(space_str): + continue + # Exact match or space ends with / (proper directory prefix) + if len(request_str) == len(space_str) or space_str[-1] == "/": + return True + # Check next char is / to ensure proper path boundary + if request_str[len(space_str)] == "/": + return True + return False + def _authenticate(self, response: ClientResponse) -> bool: """ Takes the given response and tries digest-auth, if needed. @@ -391,6 +422,25 @@ def _authenticate(self, response: ClientResponse) -> bool: if value := header_pairs.get(field): self._challenge[field] = value + # Update protection space based on domain parameter or default to origin + origin = response.url.origin() + + if domain := self._challenge.get("domain"): + # Parse space-separated list of URIs + self._protection_space = [] + for uri in domain.split(): + # Remove quotes if present + uri = uri.strip('"') + if uri.startswith("/"): + # Path-absolute, relative to origin + self._protection_space.append(str(origin.join(URL(uri)))) + else: + # Absolute URI + self._protection_space.append(str(URL(uri))) + else: + # No domain specified, protection space is entire origin + self._protection_space = [str(origin)] + # Return True only if we found at least one challenge parameter return bool(self._challenge) @@ -400,8 +450,14 @@ async def __call__( """Run the digest auth middleware.""" response = None for retry_count in range(2): - # Apply authorization header if we have a challenge (on second attempt) - if retry_count > 0: + # Apply authorization header if: + # 1. This is a retry after 401 (retry_count > 0), OR + # 2. Preemptive auth is enabled AND we have a challenge AND the URL is in protection space + if retry_count > 0 or ( + self._preemptive + and self._challenge + and self._in_protection_space(request.url) + ): request.headers[hdrs.AUTHORIZATION] = await self._encode( request.method, request.url, request.body ) diff --git a/docs/client_reference.rst b/docs/client_reference.rst index 8a721f514cd..1644c57054b 100644 --- a/docs/client_reference.rst +++ b/docs/client_reference.rst @@ -2300,12 +2300,13 @@ Utilities :return: encoded authentication data, :class:`str`. -.. class:: DigestAuthMiddleware(login, password) +.. class:: DigestAuthMiddleware(login, password, *, preemptive=True) HTTP digest authentication client middleware. :param str login: login :param str password: password + :param bool preemptive: Enable preemptive authentication (default: ``True``) This middleware supports HTTP digest authentication with both `auth` and `auth-int` quality of protection (qop) modes, and a variety of hashing algorithms. @@ -2315,6 +2316,31 @@ Utilities - Parsing 401 Unauthorized responses with `WWW-Authenticate: Digest` headers - Generating appropriate `Authorization: Digest` headers on retry - Maintaining nonce counts and challenge data per request + - When ``preemptive=True``, reusing authentication credentials for subsequent + requests to the same protection space (following RFC 7616 Section 3.6) + + **Preemptive Authentication** + + By default (``preemptive=True``), the middleware remembers successful authentication + challenges and automatically includes the Authorization header in subsequent requests + to the same protection space. This behavior: + + - Improves server efficiency by avoiding extra round trips + - Matches how modern web browsers handle digest authentication + - Follows the recommendation in RFC 7616 Section 3.6 + + The server may still respond with a 401 status and ``stale=true`` if the nonce + has expired, in which case the middleware will automatically retry with the new nonce. + + To disable preemptive authentication and require a 401 challenge for every request, + set ``preemptive=False``:: + + # Default behavior - preemptive auth enabled + digest_auth_middleware = DigestAuthMiddleware(login="user", password="pass") + + # Disable preemptive auth - always wait for 401 challenge + digest_auth_middleware = DigestAuthMiddleware(login="user", password="pass", + preemptive=False) Usage:: @@ -2324,7 +2350,13 @@ Utilities # The middleware automatically handles the digest auth handshake assert resp.status == 200 + # Subsequent requests include auth header preemptively + async with session.get("http://protected.example.com/other") as resp: + assert resp.status == 200 # No 401 round trip needed + .. versionadded:: 3.12 + .. versionchanged:: 3.12.8 + Added ``preemptive`` parameter to enable/disable preemptive authentication. .. class:: CookieJar(*, unsafe=False, quote_cookie=True, treat_as_secure_origin = []) diff --git a/tests/test_client_middleware_digest_auth.py b/tests/test_client_middleware_digest_auth.py index b649e0b601f..16959aecdf4 100644 --- a/tests/test_client_middleware_digest_auth.py +++ b/tests/test_client_middleware_digest_auth.py @@ -778,6 +778,332 @@ async def handler(request: Request) -> Response: assert request_count == 2 +async def test_preemptive_auth_disabled( + aiohttp_server: AiohttpServer, +) -> None: + """Test that preemptive authentication can be disabled.""" + digest_auth_mw = DigestAuthMiddleware("user", "pass", preemptive=False) + request_count = 0 + auth_headers = [] + + async def handler(request: Request) -> Response: + nonlocal request_count + request_count += 1 + auth_headers.append(request.headers.get(hdrs.AUTHORIZATION)) + + if not request.headers.get(hdrs.AUTHORIZATION): + # Return 401 with digest challenge + challenge = 'Digest realm="test", nonce="abc123", qop="auth", algorithm=MD5' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized", + ) + + return Response(text="OK") + + app = Application() + app.router.add_get("/", handler) + server = await aiohttp_server(app) + + async with ClientSession(middlewares=(digest_auth_mw,)) as session: + # First request will get 401 and store challenge + async with session.get(server.make_url("/")) as resp: + assert resp.status == 200 + text = await resp.text() + assert text == "OK" + + # Second request should NOT send auth preemptively (preemptive=False) + async with session.get(server.make_url("/")) as resp: + assert resp.status == 200 + text = await resp.text() + assert text == "OK" + + # With preemptive disabled, each request needs 401 challenge first + assert request_count == 4 # 2 requests * 2 (401 + retry) + assert auth_headers[0] is None # First request has no auth + assert auth_headers[1] is not None # Second request has auth after 401 + assert auth_headers[2] is None # Third request has no auth (preemptive disabled) + assert auth_headers[3] is not None # Fourth request has auth after 401 + + +async def test_preemptive_auth_with_stale_nonce( + aiohttp_server: AiohttpServer, +) -> None: + """Test preemptive auth handles stale nonce responses correctly.""" + digest_auth_mw = DigestAuthMiddleware("user", "pass", preemptive=True) + request_count = 0 + current_nonce = 0 + + async def handler(request: Request) -> Response: + nonlocal request_count, current_nonce + request_count += 1 + + auth_header = request.headers.get(hdrs.AUTHORIZATION) + + if not auth_header: + # First request without auth + current_nonce = 1 + challenge = f'Digest realm="test", nonce="nonce{current_nonce}", qop="auth", algorithm=MD5' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized", + ) + + # For the second set of requests, always consider the first nonce stale + if request_count == 3 and current_nonce == 1: + # Stale nonce - request new auth with stale=true + current_nonce = 2 + challenge = f'Digest realm="test", nonce="nonce{current_nonce}", qop="auth", algorithm=MD5, stale=true' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized - Stale nonce", + ) + + return Response(text="OK") + + app = Application() + app.router.add_get("/", handler) + server = await aiohttp_server(app) + + async with ClientSession(middlewares=(digest_auth_mw,)) as session: + # First request - will get 401, then retry with auth + async with session.get(server.make_url("/")) as resp: + assert resp.status == 200 + text = await resp.text() + assert text == "OK" + + # Second request - will use preemptive auth with nonce1, get 401 stale, retry with nonce2 + async with session.get(server.make_url("/")) as resp: + assert resp.status == 200 + text = await resp.text() + assert text == "OK" + + # Verify the expected flow: + # Request 1: no auth -> 401 + # Request 2: retry with auth -> 200 + # Request 3: preemptive auth with old nonce -> 401 stale + # Request 4: retry with new nonce -> 200 + assert request_count == 4 + + +async def test_preemptive_auth_updates_nonce_count( + aiohttp_server: AiohttpServer, +) -> None: + """Test that preemptive auth properly increments nonce count.""" + digest_auth_mw = DigestAuthMiddleware("user", "pass", preemptive=True) + request_count = 0 + nonce_counts = [] + + async def handler(request: Request) -> Response: + nonlocal request_count + request_count += 1 + + auth_header = request.headers.get(hdrs.AUTHORIZATION) + + if not auth_header: + # First request without auth + challenge = 'Digest realm="test", nonce="abc123", qop="auth", algorithm=MD5' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized", + ) + + # Extract nc (nonce count) from auth header + nc_match = auth_header.split("nc=")[1].split(",")[0].strip() + nonce_counts.append(nc_match) + + return Response(text="OK") + + app = Application() + app.router.add_get("/", handler) + server = await aiohttp_server(app) + + async with ClientSession(middlewares=(digest_auth_mw,)) as session: + # Make multiple requests to see nonce count increment + for _ in range(3): + async with session.get(server.make_url("/")) as resp: + assert resp.status == 200 + await resp.text() + + # First request has no auth, then gets 401 and retries with nc=00000001 + # Second and third requests use preemptive auth with nc=00000002 and nc=00000003 + assert len(nonce_counts) == 3 + assert nonce_counts[0] == "00000001" + assert nonce_counts[1] == "00000002" + assert nonce_counts[2] == "00000003" + + +async def test_preemptive_auth_respects_protection_space( + aiohttp_server: AiohttpServer, +) -> None: + """Test that preemptive auth only applies to URLs within the protection space.""" + digest_auth_mw = DigestAuthMiddleware("user", "pass", preemptive=True) + request_count = 0 + auth_headers = [] + requested_paths = [] + + async def handler(request: Request) -> Response: + nonlocal request_count + request_count += 1 + auth_headers.append(request.headers.get(hdrs.AUTHORIZATION)) + requested_paths.append(request.path) + + if not request.headers.get(hdrs.AUTHORIZATION): + # Return 401 with digest challenge including domain parameter + challenge = 'Digest realm="test", nonce="abc123", qop="auth", algorithm=MD5, domain="/api /admin"' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized", + ) + + return Response(text="OK") + + app = Application() + app.router.add_get("/api/endpoint", handler) + app.router.add_get("/admin/panel", handler) + app.router.add_get("/public/page", handler) + server = await aiohttp_server(app) + + async with ClientSession(middlewares=(digest_auth_mw,)) as session: + # First request to /api/endpoint - should get 401 and retry with auth + async with session.get(server.make_url("/api/endpoint")) as resp: + assert resp.status == 200 + + # Second request to /api/endpoint - should use preemptive auth (in protection space) + async with session.get(server.make_url("/api/endpoint")) as resp: + assert resp.status == 200 + + # Third request to /admin/panel - should use preemptive auth (in protection space) + async with session.get(server.make_url("/admin/panel")) as resp: + assert resp.status == 200 + + # Fourth request to /public/page - should NOT use preemptive auth (outside protection space) + async with session.get(server.make_url("/public/page")) as resp: + assert resp.status == 200 + + # Verify auth headers + assert auth_headers[0] is None # First request to /api/endpoint - no auth + assert auth_headers[1] is not None # Retry with auth + assert ( + auth_headers[2] is not None + ) # Second request to /api/endpoint - preemptive auth + assert auth_headers[3] is not None # Request to /admin/panel - preemptive auth + assert auth_headers[4] is None # First request to /public/page - no preemptive auth + assert auth_headers[5] is not None # Retry with auth + + # Verify paths + assert requested_paths == [ + "/api/endpoint", # Initial request + "/api/endpoint", # Retry with auth + "/api/endpoint", # Second request with preemptive auth + "/admin/panel", # Request with preemptive auth + "/public/page", # Initial request (no preemptive auth) + "/public/page", # Retry with auth + ] + + +async def test_preemptive_auth_with_absolute_domain_uris( + aiohttp_server: AiohttpServer, +) -> None: + """Test preemptive auth with absolute URIs in domain parameter.""" + digest_auth_mw = DigestAuthMiddleware("user", "pass", preemptive=True) + request_count = 0 + auth_headers = [] + + async def handler(request: Request) -> Response: + nonlocal request_count + request_count += 1 + auth_headers.append(request.headers.get(hdrs.AUTHORIZATION)) + + if not request.headers.get(hdrs.AUTHORIZATION): + # Return 401 with digest challenge including absolute URI in domain + server_url = str(request.url.with_path("/protected")) + challenge = f'Digest realm="test", nonce="abc123", qop="auth", algorithm=MD5, domain="{server_url}"' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized", + ) + + return Response(text="OK") + + app = Application() + app.router.add_get("/protected/resource", handler) + app.router.add_get("/unprotected/resource", handler) + server = await aiohttp_server(app) + + async with ClientSession(middlewares=(digest_auth_mw,)) as session: + # First request to protected resource + async with session.get(server.make_url("/protected/resource")) as resp: + assert resp.status == 200 + + # Second request to protected resource - should use preemptive auth + async with session.get(server.make_url("/protected/resource")) as resp: + assert resp.status == 200 + + # Request to unprotected resource - should NOT use preemptive auth + async with session.get(server.make_url("/unprotected/resource")) as resp: + assert resp.status == 200 + + # Verify auth pattern + assert auth_headers[0] is None # First request - no auth + assert auth_headers[1] is not None # Retry with auth + assert auth_headers[2] is not None # Second request - preemptive auth + assert auth_headers[3] is None # Unprotected resource - no preemptive auth + assert auth_headers[4] is not None # Retry with auth + + +async def test_preemptive_auth_without_domain_uses_origin( + aiohttp_server: AiohttpServer, +) -> None: + """Test that preemptive auth without domain parameter applies to entire origin.""" + digest_auth_mw = DigestAuthMiddleware("user", "pass", preemptive=True) + request_count = 0 + auth_headers = [] + + async def handler(request: Request) -> Response: + nonlocal request_count + request_count += 1 + auth_headers.append(request.headers.get(hdrs.AUTHORIZATION)) + + if not request.headers.get(hdrs.AUTHORIZATION): + # Return 401 with digest challenge without domain parameter + challenge = 'Digest realm="test", nonce="abc123", qop="auth", algorithm=MD5' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized", + ) + + return Response(text="OK") + + app = Application() + app.router.add_get("/path1", handler) + app.router.add_get("/path2", handler) + server = await aiohttp_server(app) + + async with ClientSession(middlewares=(digest_auth_mw,)) as session: + # First request + async with session.get(server.make_url("/path1")) as resp: + assert resp.status == 200 + + # Second request to different path - should still use preemptive auth + async with session.get(server.make_url("/path2")) as resp: + assert resp.status == 200 + + # Verify auth pattern + assert auth_headers[0] is None # First request - no auth + assert auth_headers[1] is not None # Retry with auth + assert ( + auth_headers[2] is not None + ) # Second request - preemptive auth (entire origin) + + @pytest.mark.parametrize( ("status", "headers", "expected"), [ @@ -810,3 +1136,98 @@ def test_authenticate_with_malformed_headers( result = digest_auth_mw._authenticate(response) assert result == expected + + +@pytest.mark.parametrize( + ("protection_space_url", "request_url", "expected"), + [ + # Exact match + ("http://example.com/app1", "http://example.com/app1", True), + # Path with trailing slash should match + ("http://example.com/app1", "http://example.com/app1/", True), + # Subpaths should match + ("http://example.com/app1", "http://example.com/app1/resource", True), + ("http://example.com/app1", "http://example.com/app1/sub/path", True), + # Should NOT match different paths that start with same prefix + ("http://example.com/app1", "http://example.com/app1xx", False), + ("http://example.com/app1", "http://example.com/app123", False), + # Protection space with trailing slash + ("http://example.com/app1/", "http://example.com/app1/", True), + ("http://example.com/app1/", "http://example.com/app1/resource", True), + ( + "http://example.com/app1/", + "http://example.com/app1", + False, + ), # No trailing slash + # Root protection space + ("http://example.com/", "http://example.com/", True), + ("http://example.com/", "http://example.com/anything", True), + ("http://example.com/", "http://example.com", False), # No trailing slash + # Different origins should not match + ("http://example.com/app1", "https://example.com/app1", False), + ("http://example.com/app1", "http://other.com/app1", False), + ("http://example.com:8080/app1", "http://example.com/app1", False), + ], + ids=[ + "exact_match", + "path_with_trailing_slash", + "subpath_match", + "deep_subpath_match", + "no_match_app1xx", + "no_match_app123", + "protection_with_slash_exact", + "protection_with_slash_subpath", + "protection_with_slash_no_match_without", + "root_protection_exact", + "root_protection_subpath", + "root_protection_no_match_without_slash", + "different_scheme", + "different_host", + "different_port", + ], +) +def test_in_protection_space( + digest_auth_mw: DigestAuthMiddleware, + protection_space_url: str, + request_url: str, + expected: bool, +) -> None: + """Test _in_protection_space method with various URL patterns.""" + digest_auth_mw._protection_space = [protection_space_url] + result = digest_auth_mw._in_protection_space(URL(request_url)) + assert result == expected + + +def test_in_protection_space_multiple_spaces( + digest_auth_mw: DigestAuthMiddleware, +) -> None: + """Test _in_protection_space with multiple protection spaces.""" + digest_auth_mw._protection_space = [ + "http://example.com/api", + "http://example.com/admin/", + "http://example.com/secure/area", + ] + + # Test various URLs + assert digest_auth_mw._in_protection_space(URL("http://example.com/api")) is True + assert digest_auth_mw._in_protection_space(URL("http://example.com/api/v1")) is True + assert ( + digest_auth_mw._in_protection_space(URL("http://example.com/admin/panel")) + is True + ) + assert ( + digest_auth_mw._in_protection_space( + URL("http://example.com/secure/area/resource") + ) + is True + ) + + # These should not match + assert digest_auth_mw._in_protection_space(URL("http://example.com/apiv2")) is False + assert ( + digest_auth_mw._in_protection_space(URL("http://example.com/admin")) is False + ) # No trailing slash + assert ( + digest_auth_mw._in_protection_space(URL("http://example.com/secure")) is False + ) + assert digest_auth_mw._in_protection_space(URL("http://example.com/other")) is False From 72fa2d02fe84ec61305bc06ab75a8c5e5616cf68 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Wed, 4 Jun 2025 09:52:01 +0100 Subject: [PATCH 04/11] [PR #11129/c0449bb5 backport][3.13] Add preemptive authentication support to DigestAuthMiddleware (#11132) Co-authored-by: J. Nick Koston Fixes #11128 --- CHANGES/11128.feature.rst | 9 + CHANGES/11129.feature.rst | 1 + aiohttp/client_middleware_digest_auth.py | 62 ++- docs/client_reference.rst | 34 +- tests/test_client_middleware_digest_auth.py | 421 ++++++++++++++++++++ 5 files changed, 523 insertions(+), 4 deletions(-) create mode 100644 CHANGES/11128.feature.rst create mode 120000 CHANGES/11129.feature.rst diff --git a/CHANGES/11128.feature.rst b/CHANGES/11128.feature.rst new file mode 100644 index 00000000000..0f99d2b8a11 --- /dev/null +++ b/CHANGES/11128.feature.rst @@ -0,0 +1,9 @@ +Added preemptive digest authentication to :class:`~aiohttp.DigestAuthMiddleware` -- by :user:`bdraco`. + +The middleware now reuses authentication credentials for subsequent requests to the same +protection space, improving efficiency by avoiding extra authentication round trips. +This behavior matches how web browsers handle digest authentication and follows +:rfc:`7616#section-3.6`. + +Preemptive authentication is enabled by default but can be disabled by passing +``preemptive=False`` to the middleware constructor. diff --git a/CHANGES/11129.feature.rst b/CHANGES/11129.feature.rst new file mode 120000 index 00000000000..692d28ba9ce --- /dev/null +++ b/CHANGES/11129.feature.rst @@ -0,0 +1 @@ +11128.feature.rst \ No newline at end of file diff --git a/aiohttp/client_middleware_digest_auth.py b/aiohttp/client_middleware_digest_auth.py index b2daf76e6bb..35f462f180b 100644 --- a/aiohttp/client_middleware_digest_auth.py +++ b/aiohttp/client_middleware_digest_auth.py @@ -38,6 +38,8 @@ class DigestAuthChallenge(TypedDict, total=False): qop: str algorithm: str opaque: str + domain: str + stale: str DigestFunctions: Dict[str, Callable[[bytes], "hashlib._Hash"]] = { @@ -81,13 +83,17 @@ class DigestAuthChallenge(TypedDict, total=False): # RFC 7616: Challenge parameters to extract CHALLENGE_FIELDS: Final[ - Tuple[Literal["realm", "nonce", "qop", "algorithm", "opaque"], ...] + Tuple[ + Literal["realm", "nonce", "qop", "algorithm", "opaque", "domain", "stale"], ... + ] ] = ( "realm", "nonce", "qop", "algorithm", "opaque", + "domain", + "stale", ) # Supported digest authentication algorithms @@ -159,6 +165,7 @@ class DigestAuthMiddleware: - Supports 'auth' and 'auth-int' quality of protection modes - Properly handles quoted strings and parameter parsing - Includes replay attack protection with client nonce count tracking + - Supports preemptive authentication per RFC 7616 Section 3.6 Standards compliance: - RFC 7616: HTTP Digest Access Authentication (primary reference) @@ -175,6 +182,7 @@ def __init__( self, login: str, password: str, + preemptive: bool = True, ) -> None: if login is None: raise ValueError("None is not allowed as login value") @@ -192,6 +200,9 @@ def __init__( self._last_nonce_bytes = b"" self._nonce_count = 0 self._challenge: DigestAuthChallenge = {} + self._preemptive: bool = preemptive + # Set of URLs defining the protection space + self._protection_space: List[str] = [] async def _encode( self, method: str, url: URL, body: Union[Payload, Literal[b""]] @@ -354,6 +365,26 @@ def KD(s: bytes, d: bytes) -> bytes: return f"Digest {', '.join(pairs)}" + def _in_protection_space(self, url: URL) -> bool: + """ + Check if the given URL is within the current protection space. + + According to RFC 7616, a URI is in the protection space if any URI + in the protection space is a prefix of it (after both have been made absolute). + """ + request_str = str(url) + for space_str in self._protection_space: + # Check if request starts with space URL + if not request_str.startswith(space_str): + continue + # Exact match or space ends with / (proper directory prefix) + if len(request_str) == len(space_str) or space_str[-1] == "/": + return True + # Check next char is / to ensure proper path boundary + if request_str[len(space_str)] == "/": + return True + return False + def _authenticate(self, response: ClientResponse) -> bool: """ Takes the given response and tries digest-auth, if needed. @@ -391,6 +422,25 @@ def _authenticate(self, response: ClientResponse) -> bool: if value := header_pairs.get(field): self._challenge[field] = value + # Update protection space based on domain parameter or default to origin + origin = response.url.origin() + + if domain := self._challenge.get("domain"): + # Parse space-separated list of URIs + self._protection_space = [] + for uri in domain.split(): + # Remove quotes if present + uri = uri.strip('"') + if uri.startswith("/"): + # Path-absolute, relative to origin + self._protection_space.append(str(origin.join(URL(uri)))) + else: + # Absolute URI + self._protection_space.append(str(URL(uri))) + else: + # No domain specified, protection space is entire origin + self._protection_space = [str(origin)] + # Return True only if we found at least one challenge parameter return bool(self._challenge) @@ -400,8 +450,14 @@ async def __call__( """Run the digest auth middleware.""" response = None for retry_count in range(2): - # Apply authorization header if we have a challenge (on second attempt) - if retry_count > 0: + # Apply authorization header if: + # 1. This is a retry after 401 (retry_count > 0), OR + # 2. Preemptive auth is enabled AND we have a challenge AND the URL is in protection space + if retry_count > 0 or ( + self._preemptive + and self._challenge + and self._in_protection_space(request.url) + ): request.headers[hdrs.AUTHORIZATION] = await self._encode( request.method, request.url, request.body ) diff --git a/docs/client_reference.rst b/docs/client_reference.rst index 8a721f514cd..1644c57054b 100644 --- a/docs/client_reference.rst +++ b/docs/client_reference.rst @@ -2300,12 +2300,13 @@ Utilities :return: encoded authentication data, :class:`str`. -.. class:: DigestAuthMiddleware(login, password) +.. class:: DigestAuthMiddleware(login, password, *, preemptive=True) HTTP digest authentication client middleware. :param str login: login :param str password: password + :param bool preemptive: Enable preemptive authentication (default: ``True``) This middleware supports HTTP digest authentication with both `auth` and `auth-int` quality of protection (qop) modes, and a variety of hashing algorithms. @@ -2315,6 +2316,31 @@ Utilities - Parsing 401 Unauthorized responses with `WWW-Authenticate: Digest` headers - Generating appropriate `Authorization: Digest` headers on retry - Maintaining nonce counts and challenge data per request + - When ``preemptive=True``, reusing authentication credentials for subsequent + requests to the same protection space (following RFC 7616 Section 3.6) + + **Preemptive Authentication** + + By default (``preemptive=True``), the middleware remembers successful authentication + challenges and automatically includes the Authorization header in subsequent requests + to the same protection space. This behavior: + + - Improves server efficiency by avoiding extra round trips + - Matches how modern web browsers handle digest authentication + - Follows the recommendation in RFC 7616 Section 3.6 + + The server may still respond with a 401 status and ``stale=true`` if the nonce + has expired, in which case the middleware will automatically retry with the new nonce. + + To disable preemptive authentication and require a 401 challenge for every request, + set ``preemptive=False``:: + + # Default behavior - preemptive auth enabled + digest_auth_middleware = DigestAuthMiddleware(login="user", password="pass") + + # Disable preemptive auth - always wait for 401 challenge + digest_auth_middleware = DigestAuthMiddleware(login="user", password="pass", + preemptive=False) Usage:: @@ -2324,7 +2350,13 @@ Utilities # The middleware automatically handles the digest auth handshake assert resp.status == 200 + # Subsequent requests include auth header preemptively + async with session.get("http://protected.example.com/other") as resp: + assert resp.status == 200 # No 401 round trip needed + .. versionadded:: 3.12 + .. versionchanged:: 3.12.8 + Added ``preemptive`` parameter to enable/disable preemptive authentication. .. class:: CookieJar(*, unsafe=False, quote_cookie=True, treat_as_secure_origin = []) diff --git a/tests/test_client_middleware_digest_auth.py b/tests/test_client_middleware_digest_auth.py index b649e0b601f..16959aecdf4 100644 --- a/tests/test_client_middleware_digest_auth.py +++ b/tests/test_client_middleware_digest_auth.py @@ -778,6 +778,332 @@ async def handler(request: Request) -> Response: assert request_count == 2 +async def test_preemptive_auth_disabled( + aiohttp_server: AiohttpServer, +) -> None: + """Test that preemptive authentication can be disabled.""" + digest_auth_mw = DigestAuthMiddleware("user", "pass", preemptive=False) + request_count = 0 + auth_headers = [] + + async def handler(request: Request) -> Response: + nonlocal request_count + request_count += 1 + auth_headers.append(request.headers.get(hdrs.AUTHORIZATION)) + + if not request.headers.get(hdrs.AUTHORIZATION): + # Return 401 with digest challenge + challenge = 'Digest realm="test", nonce="abc123", qop="auth", algorithm=MD5' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized", + ) + + return Response(text="OK") + + app = Application() + app.router.add_get("/", handler) + server = await aiohttp_server(app) + + async with ClientSession(middlewares=(digest_auth_mw,)) as session: + # First request will get 401 and store challenge + async with session.get(server.make_url("/")) as resp: + assert resp.status == 200 + text = await resp.text() + assert text == "OK" + + # Second request should NOT send auth preemptively (preemptive=False) + async with session.get(server.make_url("/")) as resp: + assert resp.status == 200 + text = await resp.text() + assert text == "OK" + + # With preemptive disabled, each request needs 401 challenge first + assert request_count == 4 # 2 requests * 2 (401 + retry) + assert auth_headers[0] is None # First request has no auth + assert auth_headers[1] is not None # Second request has auth after 401 + assert auth_headers[2] is None # Third request has no auth (preemptive disabled) + assert auth_headers[3] is not None # Fourth request has auth after 401 + + +async def test_preemptive_auth_with_stale_nonce( + aiohttp_server: AiohttpServer, +) -> None: + """Test preemptive auth handles stale nonce responses correctly.""" + digest_auth_mw = DigestAuthMiddleware("user", "pass", preemptive=True) + request_count = 0 + current_nonce = 0 + + async def handler(request: Request) -> Response: + nonlocal request_count, current_nonce + request_count += 1 + + auth_header = request.headers.get(hdrs.AUTHORIZATION) + + if not auth_header: + # First request without auth + current_nonce = 1 + challenge = f'Digest realm="test", nonce="nonce{current_nonce}", qop="auth", algorithm=MD5' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized", + ) + + # For the second set of requests, always consider the first nonce stale + if request_count == 3 and current_nonce == 1: + # Stale nonce - request new auth with stale=true + current_nonce = 2 + challenge = f'Digest realm="test", nonce="nonce{current_nonce}", qop="auth", algorithm=MD5, stale=true' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized - Stale nonce", + ) + + return Response(text="OK") + + app = Application() + app.router.add_get("/", handler) + server = await aiohttp_server(app) + + async with ClientSession(middlewares=(digest_auth_mw,)) as session: + # First request - will get 401, then retry with auth + async with session.get(server.make_url("/")) as resp: + assert resp.status == 200 + text = await resp.text() + assert text == "OK" + + # Second request - will use preemptive auth with nonce1, get 401 stale, retry with nonce2 + async with session.get(server.make_url("/")) as resp: + assert resp.status == 200 + text = await resp.text() + assert text == "OK" + + # Verify the expected flow: + # Request 1: no auth -> 401 + # Request 2: retry with auth -> 200 + # Request 3: preemptive auth with old nonce -> 401 stale + # Request 4: retry with new nonce -> 200 + assert request_count == 4 + + +async def test_preemptive_auth_updates_nonce_count( + aiohttp_server: AiohttpServer, +) -> None: + """Test that preemptive auth properly increments nonce count.""" + digest_auth_mw = DigestAuthMiddleware("user", "pass", preemptive=True) + request_count = 0 + nonce_counts = [] + + async def handler(request: Request) -> Response: + nonlocal request_count + request_count += 1 + + auth_header = request.headers.get(hdrs.AUTHORIZATION) + + if not auth_header: + # First request without auth + challenge = 'Digest realm="test", nonce="abc123", qop="auth", algorithm=MD5' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized", + ) + + # Extract nc (nonce count) from auth header + nc_match = auth_header.split("nc=")[1].split(",")[0].strip() + nonce_counts.append(nc_match) + + return Response(text="OK") + + app = Application() + app.router.add_get("/", handler) + server = await aiohttp_server(app) + + async with ClientSession(middlewares=(digest_auth_mw,)) as session: + # Make multiple requests to see nonce count increment + for _ in range(3): + async with session.get(server.make_url("/")) as resp: + assert resp.status == 200 + await resp.text() + + # First request has no auth, then gets 401 and retries with nc=00000001 + # Second and third requests use preemptive auth with nc=00000002 and nc=00000003 + assert len(nonce_counts) == 3 + assert nonce_counts[0] == "00000001" + assert nonce_counts[1] == "00000002" + assert nonce_counts[2] == "00000003" + + +async def test_preemptive_auth_respects_protection_space( + aiohttp_server: AiohttpServer, +) -> None: + """Test that preemptive auth only applies to URLs within the protection space.""" + digest_auth_mw = DigestAuthMiddleware("user", "pass", preemptive=True) + request_count = 0 + auth_headers = [] + requested_paths = [] + + async def handler(request: Request) -> Response: + nonlocal request_count + request_count += 1 + auth_headers.append(request.headers.get(hdrs.AUTHORIZATION)) + requested_paths.append(request.path) + + if not request.headers.get(hdrs.AUTHORIZATION): + # Return 401 with digest challenge including domain parameter + challenge = 'Digest realm="test", nonce="abc123", qop="auth", algorithm=MD5, domain="/api /admin"' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized", + ) + + return Response(text="OK") + + app = Application() + app.router.add_get("/api/endpoint", handler) + app.router.add_get("/admin/panel", handler) + app.router.add_get("/public/page", handler) + server = await aiohttp_server(app) + + async with ClientSession(middlewares=(digest_auth_mw,)) as session: + # First request to /api/endpoint - should get 401 and retry with auth + async with session.get(server.make_url("/api/endpoint")) as resp: + assert resp.status == 200 + + # Second request to /api/endpoint - should use preemptive auth (in protection space) + async with session.get(server.make_url("/api/endpoint")) as resp: + assert resp.status == 200 + + # Third request to /admin/panel - should use preemptive auth (in protection space) + async with session.get(server.make_url("/admin/panel")) as resp: + assert resp.status == 200 + + # Fourth request to /public/page - should NOT use preemptive auth (outside protection space) + async with session.get(server.make_url("/public/page")) as resp: + assert resp.status == 200 + + # Verify auth headers + assert auth_headers[0] is None # First request to /api/endpoint - no auth + assert auth_headers[1] is not None # Retry with auth + assert ( + auth_headers[2] is not None + ) # Second request to /api/endpoint - preemptive auth + assert auth_headers[3] is not None # Request to /admin/panel - preemptive auth + assert auth_headers[4] is None # First request to /public/page - no preemptive auth + assert auth_headers[5] is not None # Retry with auth + + # Verify paths + assert requested_paths == [ + "/api/endpoint", # Initial request + "/api/endpoint", # Retry with auth + "/api/endpoint", # Second request with preemptive auth + "/admin/panel", # Request with preemptive auth + "/public/page", # Initial request (no preemptive auth) + "/public/page", # Retry with auth + ] + + +async def test_preemptive_auth_with_absolute_domain_uris( + aiohttp_server: AiohttpServer, +) -> None: + """Test preemptive auth with absolute URIs in domain parameter.""" + digest_auth_mw = DigestAuthMiddleware("user", "pass", preemptive=True) + request_count = 0 + auth_headers = [] + + async def handler(request: Request) -> Response: + nonlocal request_count + request_count += 1 + auth_headers.append(request.headers.get(hdrs.AUTHORIZATION)) + + if not request.headers.get(hdrs.AUTHORIZATION): + # Return 401 with digest challenge including absolute URI in domain + server_url = str(request.url.with_path("/protected")) + challenge = f'Digest realm="test", nonce="abc123", qop="auth", algorithm=MD5, domain="{server_url}"' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized", + ) + + return Response(text="OK") + + app = Application() + app.router.add_get("/protected/resource", handler) + app.router.add_get("/unprotected/resource", handler) + server = await aiohttp_server(app) + + async with ClientSession(middlewares=(digest_auth_mw,)) as session: + # First request to protected resource + async with session.get(server.make_url("/protected/resource")) as resp: + assert resp.status == 200 + + # Second request to protected resource - should use preemptive auth + async with session.get(server.make_url("/protected/resource")) as resp: + assert resp.status == 200 + + # Request to unprotected resource - should NOT use preemptive auth + async with session.get(server.make_url("/unprotected/resource")) as resp: + assert resp.status == 200 + + # Verify auth pattern + assert auth_headers[0] is None # First request - no auth + assert auth_headers[1] is not None # Retry with auth + assert auth_headers[2] is not None # Second request - preemptive auth + assert auth_headers[3] is None # Unprotected resource - no preemptive auth + assert auth_headers[4] is not None # Retry with auth + + +async def test_preemptive_auth_without_domain_uses_origin( + aiohttp_server: AiohttpServer, +) -> None: + """Test that preemptive auth without domain parameter applies to entire origin.""" + digest_auth_mw = DigestAuthMiddleware("user", "pass", preemptive=True) + request_count = 0 + auth_headers = [] + + async def handler(request: Request) -> Response: + nonlocal request_count + request_count += 1 + auth_headers.append(request.headers.get(hdrs.AUTHORIZATION)) + + if not request.headers.get(hdrs.AUTHORIZATION): + # Return 401 with digest challenge without domain parameter + challenge = 'Digest realm="test", nonce="abc123", qop="auth", algorithm=MD5' + return Response( + status=401, + headers={"WWW-Authenticate": challenge}, + text="Unauthorized", + ) + + return Response(text="OK") + + app = Application() + app.router.add_get("/path1", handler) + app.router.add_get("/path2", handler) + server = await aiohttp_server(app) + + async with ClientSession(middlewares=(digest_auth_mw,)) as session: + # First request + async with session.get(server.make_url("/path1")) as resp: + assert resp.status == 200 + + # Second request to different path - should still use preemptive auth + async with session.get(server.make_url("/path2")) as resp: + assert resp.status == 200 + + # Verify auth pattern + assert auth_headers[0] is None # First request - no auth + assert auth_headers[1] is not None # Retry with auth + assert ( + auth_headers[2] is not None + ) # Second request - preemptive auth (entire origin) + + @pytest.mark.parametrize( ("status", "headers", "expected"), [ @@ -810,3 +1136,98 @@ def test_authenticate_with_malformed_headers( result = digest_auth_mw._authenticate(response) assert result == expected + + +@pytest.mark.parametrize( + ("protection_space_url", "request_url", "expected"), + [ + # Exact match + ("http://example.com/app1", "http://example.com/app1", True), + # Path with trailing slash should match + ("http://example.com/app1", "http://example.com/app1/", True), + # Subpaths should match + ("http://example.com/app1", "http://example.com/app1/resource", True), + ("http://example.com/app1", "http://example.com/app1/sub/path", True), + # Should NOT match different paths that start with same prefix + ("http://example.com/app1", "http://example.com/app1xx", False), + ("http://example.com/app1", "http://example.com/app123", False), + # Protection space with trailing slash + ("http://example.com/app1/", "http://example.com/app1/", True), + ("http://example.com/app1/", "http://example.com/app1/resource", True), + ( + "http://example.com/app1/", + "http://example.com/app1", + False, + ), # No trailing slash + # Root protection space + ("http://example.com/", "http://example.com/", True), + ("http://example.com/", "http://example.com/anything", True), + ("http://example.com/", "http://example.com", False), # No trailing slash + # Different origins should not match + ("http://example.com/app1", "https://example.com/app1", False), + ("http://example.com/app1", "http://other.com/app1", False), + ("http://example.com:8080/app1", "http://example.com/app1", False), + ], + ids=[ + "exact_match", + "path_with_trailing_slash", + "subpath_match", + "deep_subpath_match", + "no_match_app1xx", + "no_match_app123", + "protection_with_slash_exact", + "protection_with_slash_subpath", + "protection_with_slash_no_match_without", + "root_protection_exact", + "root_protection_subpath", + "root_protection_no_match_without_slash", + "different_scheme", + "different_host", + "different_port", + ], +) +def test_in_protection_space( + digest_auth_mw: DigestAuthMiddleware, + protection_space_url: str, + request_url: str, + expected: bool, +) -> None: + """Test _in_protection_space method with various URL patterns.""" + digest_auth_mw._protection_space = [protection_space_url] + result = digest_auth_mw._in_protection_space(URL(request_url)) + assert result == expected + + +def test_in_protection_space_multiple_spaces( + digest_auth_mw: DigestAuthMiddleware, +) -> None: + """Test _in_protection_space with multiple protection spaces.""" + digest_auth_mw._protection_space = [ + "http://example.com/api", + "http://example.com/admin/", + "http://example.com/secure/area", + ] + + # Test various URLs + assert digest_auth_mw._in_protection_space(URL("http://example.com/api")) is True + assert digest_auth_mw._in_protection_space(URL("http://example.com/api/v1")) is True + assert ( + digest_auth_mw._in_protection_space(URL("http://example.com/admin/panel")) + is True + ) + assert ( + digest_auth_mw._in_protection_space( + URL("http://example.com/secure/area/resource") + ) + is True + ) + + # These should not match + assert digest_auth_mw._in_protection_space(URL("http://example.com/apiv2")) is False + assert ( + digest_auth_mw._in_protection_space(URL("http://example.com/admin")) is False + ) # No trailing slash + assert ( + digest_auth_mw._in_protection_space(URL("http://example.com/secure")) is False + ) + assert digest_auth_mw._in_protection_space(URL("http://example.com/other")) is False From 47bc2a4ba49b583b8e0ff5822d9fd0fd665c6399 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Jun 2025 10:12:00 +0100 Subject: [PATCH 05/11] Release 3.12.8 (#11133) --- CHANGES.rst | 26 ++++++++++++++++++++++++++ CHANGES/11128.feature.rst | 9 --------- CHANGES/11129.feature.rst | 1 - aiohttp/__init__.py | 2 +- 4 files changed, 27 insertions(+), 11 deletions(-) delete mode 100644 CHANGES/11128.feature.rst delete mode 120000 CHANGES/11129.feature.rst diff --git a/CHANGES.rst b/CHANGES.rst index 867cd041a55..43e2173f8b8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,32 @@ .. towncrier release notes start +3.12.8 (2025-06-04) +=================== + +Features +-------- + +- Added preemptive digest authentication to :class:`~aiohttp.DigestAuthMiddleware` -- by :user:`bdraco`. + + The middleware now reuses authentication credentials for subsequent requests to the same + protection space, improving efficiency by avoiding extra authentication round trips. + This behavior matches how web browsers handle digest authentication and follows + :rfc:`7616#section-3.6`. + + Preemptive authentication is enabled by default but can be disabled by passing + ``preemptive=False`` to the middleware constructor. + + + *Related issues and pull requests on GitHub:* + :issue:`11128`, :issue:`11129`. + + + + +---- + + 3.12.7 (2025-06-02) =================== diff --git a/CHANGES/11128.feature.rst b/CHANGES/11128.feature.rst deleted file mode 100644 index 0f99d2b8a11..00000000000 --- a/CHANGES/11128.feature.rst +++ /dev/null @@ -1,9 +0,0 @@ -Added preemptive digest authentication to :class:`~aiohttp.DigestAuthMiddleware` -- by :user:`bdraco`. - -The middleware now reuses authentication credentials for subsequent requests to the same -protection space, improving efficiency by avoiding extra authentication round trips. -This behavior matches how web browsers handle digest authentication and follows -:rfc:`7616#section-3.6`. - -Preemptive authentication is enabled by default but can be disabled by passing -``preemptive=False`` to the middleware constructor. diff --git a/CHANGES/11129.feature.rst b/CHANGES/11129.feature.rst deleted file mode 120000 index 692d28ba9ce..00000000000 --- a/CHANGES/11129.feature.rst +++ /dev/null @@ -1 +0,0 @@ -11128.feature.rst \ No newline at end of file diff --git a/aiohttp/__init__.py b/aiohttp/__init__.py index 28981917a0e..92b7a7b076a 100644 --- a/aiohttp/__init__.py +++ b/aiohttp/__init__.py @@ -1,4 +1,4 @@ -__version__ = "3.12.8.dev0" +__version__ = "3.12.8" from typing import TYPE_CHECKING, Tuple From 1981d59687f13cca281c6df954d39d7cc7f8a78c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 4 Jun 2025 10:40:14 +0000 Subject: [PATCH 06/11] Bump gidgethub from 5.3.0 to 5.4.0 (#11136) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [gidgethub](https://github.com/brettcannon/gidgethub) from 5.3.0 to 5.4.0.
Release notes

Sourced from gidgethub's releases.

5.4.0

What's Changed

New Contributors

Full Changelog: https://github.com/gidgethub/gidgethub/compare/v5.3.0...v5.4.0

Changelog

Sourced from gidgethub's changelog.

5.4.0

  • Make :meth:gidgethub.abc.GitHubAPI.getiter be iterative instead of recursive (PR [#219](https://github.com/brettcannon/gidgethub/issues/219) <https://github.com/gidgethub/gidgethub/pull/219>_)

  • :meth:gidgethub.apps.get_jwt now accepts an expiration parameter to configure JWT token expiration time (PR [#215](https://github.com/brettcannon/gidgethub/issues/215) <https://github.com/gidgethub/gidgethub/pull/215>_)

  • Add support for Python 3.12-3.13 and drop EOL Python 3.7 (PR [#209](https://github.com/brettcannon/gidgethub/issues/209) <https://github.com/gidgethub/gidgethub/pull/209>_)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=gidgethub&package-manager=pip&previous-version=5.3.0&new-version=5.4.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements/constraints.txt | 2 +- requirements/dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 0bc42abfba0..cdd5139765c 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -84,7 +84,7 @@ frozenlist==1.6.0 # via # -r requirements/runtime-deps.in # aiosignal -gidgethub==5.3.0 +gidgethub==5.4.0 # via cherry-picker gunicorn==23.0.0 # via -r requirements/base.in diff --git a/requirements/dev.txt b/requirements/dev.txt index cdf512edb69..0afae4094d7 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -82,7 +82,7 @@ frozenlist==1.6.0 # via # -r requirements/runtime-deps.in # aiosignal -gidgethub==5.3.0 +gidgethub==5.4.0 # via cherry-picker gunicorn==23.0.0 # via -r requirements/base.in From 6e7376d273fcb51c6d7bac9611d78f71bc8f7795 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 4 Jun 2025 10:52:59 +0000 Subject: [PATCH 07/11] Bump frozenlist from 1.6.0 to 1.6.2 (#11137) Bumps [frozenlist](https://github.com/aio-libs/frozenlist) from 1.6.0 to 1.6.2.
Release notes

Sourced from frozenlist's releases.

1.6.2

No changes from 1.6.1. This is re-publish since 1.6.1 failed to publish.

No significant changes.


1.6.1

This release was yanked because the upload job failed to upload all files to PyPI

Bug fixes

  • Correctly use cimport for including PyBool_FromLong -- by :user:lysnikolaou.

    Related issues and pull requests on GitHub: #653.

Packaging updates and notes for downstreams

  • Exclude _frozenlist.cpp from bdists/wheels -- by :user:musicinmybrain.

    Related issues and pull requests on GitHub: #649.

  • Updated to use Cython 3.1 universally across the build path -- by :user:lysnikolaou.

    Related issues and pull requests on GitHub: #654.


Changelog

Sourced from frozenlist's changelog.

v1.6.2

(2025-06-03)

No significant changes.


v1.6.1

(2025-06-02)

Bug fixes

  • Correctly use cimport for including PyBool_FromLong -- by :user:lysnikolaou.

    Related issues and pull requests on GitHub: :issue:653.

Packaging updates and notes for downstreams

  • Exclude _frozenlist.cpp from bdists/wheels -- by :user:musicinmybrain.

    Related issues and pull requests on GitHub: :issue:649.

  • Updated to use Cython 3.1 universally across the build path -- by :user:lysnikolaou.

    Related issues and pull requests on GitHub: :issue:654.


Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=frozenlist&package-manager=pip&previous-version=1.6.0&new-version=1.6.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements/base.txt | 3 +-- requirements/constraints.txt | 2 +- requirements/dev.txt | 2 +- requirements/runtime-deps.txt | 2 +- requirements/test.txt | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 2cd73f52418..8b3e0cff601 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -18,7 +18,7 @@ brotli==1.1.0 ; platform_python_implementation == "CPython" # via -r requirements/runtime-deps.in cffi==1.17.1 # via pycares -frozenlist==1.6.0 +frozenlist==1.6.2 # via # -r requirements/runtime-deps.in # aiosignal @@ -43,7 +43,6 @@ pycparser==2.22 typing-extensions==4.13.2 # via multidict uvloop==0.21.0 ; platform_system != "Windows" and implementation_name == "cpython" -winloop==0.1.8; platform_system == "Windows" and implementation_name == "cpython" # via -r requirements/base.in yarl==1.20.0 # via -r requirements/runtime-deps.in diff --git a/requirements/constraints.txt b/requirements/constraints.txt index cdd5139765c..77c3a12d41d 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -80,7 +80,7 @@ freezegun==1.5.2 # via # -r requirements/lint.in # -r requirements/test.in -frozenlist==1.6.0 +frozenlist==1.6.2 # via # -r requirements/runtime-deps.in # aiosignal diff --git a/requirements/dev.txt b/requirements/dev.txt index 0afae4094d7..08ca9e99b0b 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -78,7 +78,7 @@ freezegun==1.5.2 # via # -r requirements/lint.in # -r requirements/test.in -frozenlist==1.6.0 +frozenlist==1.6.2 # via # -r requirements/runtime-deps.in # aiosignal diff --git a/requirements/runtime-deps.txt b/requirements/runtime-deps.txt index 58263ab61ed..4b9aca5264d 100644 --- a/requirements/runtime-deps.txt +++ b/requirements/runtime-deps.txt @@ -18,7 +18,7 @@ brotli==1.1.0 ; platform_python_implementation == "CPython" # via -r requirements/runtime-deps.in cffi==1.17.1 # via pycares -frozenlist==1.6.0 +frozenlist==1.6.2 # via # -r requirements/runtime-deps.in # aiosignal diff --git a/requirements/test.txt b/requirements/test.txt index 9376171d7c0..3b127bb328d 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -41,7 +41,7 @@ forbiddenfruit==0.1.4 # via blockbuster freezegun==1.5.2 # via -r requirements/test.in -frozenlist==1.6.0 +frozenlist==1.6.2 # via # -r requirements/runtime-deps.in # aiosignal From 3dafd4c7882b0e82a9f0d5aa82e2130966765d55 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Jun 2025 15:55:50 +0100 Subject: [PATCH 08/11] Fix IOBasePayload reading entire files into memory instead of chunking (#11139) --- CHANGES/11138.bugfix.rst | 3 + aiohttp/payload.py | 14 +++- tests/test_payload.py | 150 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 CHANGES/11138.bugfix.rst diff --git a/CHANGES/11138.bugfix.rst b/CHANGES/11138.bugfix.rst new file mode 100644 index 00000000000..6d8c634e51f --- /dev/null +++ b/CHANGES/11138.bugfix.rst @@ -0,0 +1,3 @@ +Fixed ``IOBasePayload`` and ``TextIOPayload`` reading entire files into memory when streaming large files -- by :user:`bdraco`. + +When using file-like objects with the aiohttp client, the entire file would be read into memory if the file size was provided in the ``Content-Length`` header. This could cause out-of-memory errors when uploading large files. The payload classes now correctly read data in chunks of ``READ_SIZE`` (64KB) regardless of the total content length. diff --git a/aiohttp/payload.py b/aiohttp/payload.py index 1f83b611567..ec5f35e286b 100644 --- a/aiohttp/payload.py +++ b/aiohttp/payload.py @@ -512,7 +512,7 @@ def _read_and_available_len( self._set_or_restore_start_position() size = self.size # Call size only once since it does I/O return size, self._value.read( - min(size or READ_SIZE, remaining_content_len or READ_SIZE) + min(READ_SIZE, size or READ_SIZE, remaining_content_len or READ_SIZE) ) def _read(self, remaining_content_len: Optional[int]) -> bytes: @@ -615,7 +615,15 @@ async def write_with_length( return # Read next chunk - chunk = await loop.run_in_executor(None, self._read, remaining_content_len) + chunk = await loop.run_in_executor( + None, + self._read, + ( + min(READ_SIZE, remaining_content_len) + if remaining_content_len is not None + else READ_SIZE + ), + ) def _should_stop_writing( self, @@ -757,7 +765,7 @@ def _read_and_available_len( self._set_or_restore_start_position() size = self.size chunk = self._value.read( - min(size or READ_SIZE, remaining_content_len or READ_SIZE) + min(READ_SIZE, size or READ_SIZE, remaining_content_len or READ_SIZE) ) return size, chunk.encode(self._encoding) if self._encoding else chunk.encode() diff --git a/tests/test_payload.py b/tests/test_payload.py index 2d80dc0c65d..be17dbe31f8 100644 --- a/tests/test_payload.py +++ b/tests/test_payload.py @@ -12,6 +12,7 @@ from aiohttp import payload from aiohttp.abc import AbstractStreamWriter +from aiohttp.payload import READ_SIZE class BufferWriter(AbstractStreamWriter): @@ -363,6 +364,155 @@ async def test_iobase_payload_exact_chunk_size_limit() -> None: assert written == data[:chunk_size] +async def test_iobase_payload_reads_in_chunks() -> None: + """Test IOBasePayload reads data in chunks of READ_SIZE, not all at once.""" + # Create a large file that's multiple times larger than READ_SIZE + large_data = b"x" * (READ_SIZE * 3 + 1000) # ~192KB + 1000 bytes + + # Mock the file-like object to track read calls + mock_file = unittest.mock.Mock(spec=io.BytesIO) + mock_file.tell.return_value = 0 + mock_file.fileno.side_effect = AttributeError # Make size return None + + # Track the sizes of read() calls + read_sizes = [] + + def mock_read(size: int) -> bytes: + read_sizes.append(size) + # Return data based on how many times read was called + call_count = len(read_sizes) + if call_count == 1: + return large_data[:size] + elif call_count == 2: + return large_data[READ_SIZE : READ_SIZE + size] + elif call_count == 3: + return large_data[READ_SIZE * 2 : READ_SIZE * 2 + size] + else: + return large_data[READ_SIZE * 3 :] + + mock_file.read.side_effect = mock_read + + payload_obj = payload.IOBasePayload(mock_file) + writer = MockStreamWriter() + + # Write with a large content_length + await payload_obj.write_with_length(writer, len(large_data)) + + # Verify that reads were limited to READ_SIZE + assert len(read_sizes) > 1 # Should have multiple reads + for read_size in read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + +async def test_iobase_payload_large_content_length() -> None: + """Test IOBasePayload with very large content_length doesn't read all at once.""" + data = b"x" * (READ_SIZE + 1000) + + # Create a custom file-like object that tracks read sizes + class TrackingBytesIO(io.BytesIO): + def __init__(self, data: bytes) -> None: + super().__init__(data) + self.read_sizes: List[int] = [] + + def read(self, size: Optional[int] = -1) -> bytes: + self.read_sizes.append(size if size is not None else -1) + return super().read(size) + + tracking_file = TrackingBytesIO(data) + payload_obj = payload.IOBasePayload(tracking_file) + writer = MockStreamWriter() + + # Write with a very large content_length (simulating the bug scenario) + large_content_length = 10 * 1024 * 1024 # 10MB + await payload_obj.write_with_length(writer, large_content_length) + + # Verify no single read exceeded READ_SIZE + for read_size in tracking_file.read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + # Verify the correct amount of data was written + assert writer.get_written_bytes() == data + + +async def test_textio_payload_reads_in_chunks() -> None: + """Test TextIOPayload reads data in chunks of READ_SIZE, not all at once.""" + # Create a large text file that's multiple times larger than READ_SIZE + large_text = "x" * (READ_SIZE * 3 + 1000) # ~192KB + 1000 chars + + # Mock the file-like object to track read calls + mock_file = unittest.mock.Mock(spec=io.StringIO) + mock_file.tell.return_value = 0 + mock_file.fileno.side_effect = AttributeError # Make size return None + mock_file.encoding = "utf-8" + + # Track the sizes of read() calls + read_sizes = [] + + def mock_read(size: int) -> str: + read_sizes.append(size) + # Return data based on how many times read was called + call_count = len(read_sizes) + if call_count == 1: + return large_text[:size] + elif call_count == 2: + return large_text[READ_SIZE : READ_SIZE + size] + elif call_count == 3: + return large_text[READ_SIZE * 2 : READ_SIZE * 2 + size] + else: + return large_text[READ_SIZE * 3 :] + + mock_file.read.side_effect = mock_read + + payload_obj = payload.TextIOPayload(mock_file) + writer = MockStreamWriter() + + # Write with a large content_length + await payload_obj.write_with_length(writer, len(large_text.encode("utf-8"))) + + # Verify that reads were limited to READ_SIZE + assert len(read_sizes) > 1 # Should have multiple reads + for read_size in read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + +async def test_textio_payload_large_content_length() -> None: + """Test TextIOPayload with very large content_length doesn't read all at once.""" + text_data = "x" * (READ_SIZE + 1000) + + # Create a custom file-like object that tracks read sizes + class TrackingStringIO(io.StringIO): + def __init__(self, data: str) -> None: + super().__init__(data) + self.read_sizes: List[int] = [] + + def read(self, size: Optional[int] = -1) -> str: + self.read_sizes.append(size if size is not None else -1) + return super().read(size) + + tracking_file = TrackingStringIO(text_data) + payload_obj = payload.TextIOPayload(tracking_file) + writer = MockStreamWriter() + + # Write with a very large content_length (simulating the bug scenario) + large_content_length = 10 * 1024 * 1024 # 10MB + await payload_obj.write_with_length(writer, large_content_length) + + # Verify no single read exceeded READ_SIZE + for read_size in tracking_file.read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + # Verify the correct amount of data was written + assert writer.get_written_bytes() == text_data.encode("utf-8") + + async def test_async_iterable_payload_write_with_length_no_limit() -> None: """Test AsyncIterablePayload writing with no content length limit.""" From 7ccc94df3dfb570539fb9deb50fe28f336f12f9b Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Wed, 4 Jun 2025 15:18:16 +0000 Subject: [PATCH 09/11] [PR #11139/3dafd4c7 backport][3.12] Fix IOBasePayload reading entire files into memory instead of chunking (#11141) Co-authored-by: J. Nick Koston Fixes #11138 --- CHANGES/11138.bugfix.rst | 3 + aiohttp/payload.py | 14 +++- tests/test_payload.py | 152 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 165 insertions(+), 4 deletions(-) create mode 100644 CHANGES/11138.bugfix.rst diff --git a/CHANGES/11138.bugfix.rst b/CHANGES/11138.bugfix.rst new file mode 100644 index 00000000000..6d8c634e51f --- /dev/null +++ b/CHANGES/11138.bugfix.rst @@ -0,0 +1,3 @@ +Fixed ``IOBasePayload`` and ``TextIOPayload`` reading entire files into memory when streaming large files -- by :user:`bdraco`. + +When using file-like objects with the aiohttp client, the entire file would be read into memory if the file size was provided in the ``Content-Length`` header. This could cause out-of-memory errors when uploading large files. The payload classes now correctly read data in chunks of ``READ_SIZE`` (64KB) regardless of the total content length. diff --git a/aiohttp/payload.py b/aiohttp/payload.py index 7180fd2b430..d119d9beefc 100644 --- a/aiohttp/payload.py +++ b/aiohttp/payload.py @@ -514,7 +514,7 @@ def _read_and_available_len( self._set_or_restore_start_position() size = self.size # Call size only once since it does I/O return size, self._value.read( - min(size or READ_SIZE, remaining_content_len or READ_SIZE) + min(READ_SIZE, size or READ_SIZE, remaining_content_len or READ_SIZE) ) def _read(self, remaining_content_len: Optional[int]) -> bytes: @@ -617,7 +617,15 @@ async def write_with_length( return # Read next chunk - chunk = await loop.run_in_executor(None, self._read, remaining_content_len) + chunk = await loop.run_in_executor( + None, + self._read, + ( + min(READ_SIZE, remaining_content_len) + if remaining_content_len is not None + else READ_SIZE + ), + ) def _should_stop_writing( self, @@ -760,7 +768,7 @@ def _read_and_available_len( self._set_or_restore_start_position() size = self.size chunk = self._value.read( - min(size or READ_SIZE, remaining_content_len or READ_SIZE) + min(READ_SIZE, size or READ_SIZE, remaining_content_len or READ_SIZE) ) return size, chunk.encode(self._encoding) if self._encoding else chunk.encode() diff --git a/tests/test_payload.py b/tests/test_payload.py index b810a68f8b7..2fd0a0f60d9 100644 --- a/tests/test_payload.py +++ b/tests/test_payload.py @@ -6,13 +6,14 @@ from collections.abc import AsyncIterator from io import StringIO from pathlib import Path -from typing import Optional, TextIO, Union +from typing import List, Optional, TextIO, Union import pytest from multidict import CIMultiDict from aiohttp import payload from aiohttp.abc import AbstractStreamWriter +from aiohttp.payload import READ_SIZE class BufferWriter(AbstractStreamWriter): @@ -365,6 +366,155 @@ async def test_iobase_payload_exact_chunk_size_limit() -> None: assert written == data[:chunk_size] +async def test_iobase_payload_reads_in_chunks() -> None: + """Test IOBasePayload reads data in chunks of READ_SIZE, not all at once.""" + # Create a large file that's multiple times larger than READ_SIZE + large_data = b"x" * (READ_SIZE * 3 + 1000) # ~192KB + 1000 bytes + + # Mock the file-like object to track read calls + mock_file = unittest.mock.Mock(spec=io.BytesIO) + mock_file.tell.return_value = 0 + mock_file.fileno.side_effect = AttributeError # Make size return None + + # Track the sizes of read() calls + read_sizes = [] + + def mock_read(size: int) -> bytes: + read_sizes.append(size) + # Return data based on how many times read was called + call_count = len(read_sizes) + if call_count == 1: + return large_data[:size] + elif call_count == 2: + return large_data[READ_SIZE : READ_SIZE + size] + elif call_count == 3: + return large_data[READ_SIZE * 2 : READ_SIZE * 2 + size] + else: + return large_data[READ_SIZE * 3 :] + + mock_file.read.side_effect = mock_read + + payload_obj = payload.IOBasePayload(mock_file) + writer = MockStreamWriter() + + # Write with a large content_length + await payload_obj.write_with_length(writer, len(large_data)) + + # Verify that reads were limited to READ_SIZE + assert len(read_sizes) > 1 # Should have multiple reads + for read_size in read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + +async def test_iobase_payload_large_content_length() -> None: + """Test IOBasePayload with very large content_length doesn't read all at once.""" + data = b"x" * (READ_SIZE + 1000) + + # Create a custom file-like object that tracks read sizes + class TrackingBytesIO(io.BytesIO): + def __init__(self, data: bytes) -> None: + super().__init__(data) + self.read_sizes: List[int] = [] + + def read(self, size: Optional[int] = -1) -> bytes: + self.read_sizes.append(size if size is not None else -1) + return super().read(size) + + tracking_file = TrackingBytesIO(data) + payload_obj = payload.IOBasePayload(tracking_file) + writer = MockStreamWriter() + + # Write with a very large content_length (simulating the bug scenario) + large_content_length = 10 * 1024 * 1024 # 10MB + await payload_obj.write_with_length(writer, large_content_length) + + # Verify no single read exceeded READ_SIZE + for read_size in tracking_file.read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + # Verify the correct amount of data was written + assert writer.get_written_bytes() == data + + +async def test_textio_payload_reads_in_chunks() -> None: + """Test TextIOPayload reads data in chunks of READ_SIZE, not all at once.""" + # Create a large text file that's multiple times larger than READ_SIZE + large_text = "x" * (READ_SIZE * 3 + 1000) # ~192KB + 1000 chars + + # Mock the file-like object to track read calls + mock_file = unittest.mock.Mock(spec=io.StringIO) + mock_file.tell.return_value = 0 + mock_file.fileno.side_effect = AttributeError # Make size return None + mock_file.encoding = "utf-8" + + # Track the sizes of read() calls + read_sizes = [] + + def mock_read(size: int) -> str: + read_sizes.append(size) + # Return data based on how many times read was called + call_count = len(read_sizes) + if call_count == 1: + return large_text[:size] + elif call_count == 2: + return large_text[READ_SIZE : READ_SIZE + size] + elif call_count == 3: + return large_text[READ_SIZE * 2 : READ_SIZE * 2 + size] + else: + return large_text[READ_SIZE * 3 :] + + mock_file.read.side_effect = mock_read + + payload_obj = payload.TextIOPayload(mock_file) + writer = MockStreamWriter() + + # Write with a large content_length + await payload_obj.write_with_length(writer, len(large_text.encode("utf-8"))) + + # Verify that reads were limited to READ_SIZE + assert len(read_sizes) > 1 # Should have multiple reads + for read_size in read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + +async def test_textio_payload_large_content_length() -> None: + """Test TextIOPayload with very large content_length doesn't read all at once.""" + text_data = "x" * (READ_SIZE + 1000) + + # Create a custom file-like object that tracks read sizes + class TrackingStringIO(io.StringIO): + def __init__(self, data: str) -> None: + super().__init__(data) + self.read_sizes: List[int] = [] + + def read(self, size: Optional[int] = -1) -> str: + self.read_sizes.append(size if size is not None else -1) + return super().read(size) + + tracking_file = TrackingStringIO(text_data) + payload_obj = payload.TextIOPayload(tracking_file) + writer = MockStreamWriter() + + # Write with a very large content_length (simulating the bug scenario) + large_content_length = 10 * 1024 * 1024 # 10MB + await payload_obj.write_with_length(writer, large_content_length) + + # Verify no single read exceeded READ_SIZE + for read_size in tracking_file.read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + # Verify the correct amount of data was written + assert writer.get_written_bytes() == text_data.encode("utf-8") + + async def test_async_iterable_payload_write_with_length_no_limit() -> None: """Test AsyncIterablePayload writing with no content length limit.""" From 067ecbdfc63b0af9625dd91639544a1d7b432263 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Wed, 4 Jun 2025 15:20:52 +0000 Subject: [PATCH 10/11] [PR #11139/3dafd4c7 backport][3.13] Fix IOBasePayload reading entire files into memory instead of chunking (#11142) Co-authored-by: J. Nick Koston Fixes #11138 --- CHANGES/11138.bugfix.rst | 3 + aiohttp/payload.py | 14 +++- tests/test_payload.py | 152 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 165 insertions(+), 4 deletions(-) create mode 100644 CHANGES/11138.bugfix.rst diff --git a/CHANGES/11138.bugfix.rst b/CHANGES/11138.bugfix.rst new file mode 100644 index 00000000000..6d8c634e51f --- /dev/null +++ b/CHANGES/11138.bugfix.rst @@ -0,0 +1,3 @@ +Fixed ``IOBasePayload`` and ``TextIOPayload`` reading entire files into memory when streaming large files -- by :user:`bdraco`. + +When using file-like objects with the aiohttp client, the entire file would be read into memory if the file size was provided in the ``Content-Length`` header. This could cause out-of-memory errors when uploading large files. The payload classes now correctly read data in chunks of ``READ_SIZE`` (64KB) regardless of the total content length. diff --git a/aiohttp/payload.py b/aiohttp/payload.py index 7180fd2b430..d119d9beefc 100644 --- a/aiohttp/payload.py +++ b/aiohttp/payload.py @@ -514,7 +514,7 @@ def _read_and_available_len( self._set_or_restore_start_position() size = self.size # Call size only once since it does I/O return size, self._value.read( - min(size or READ_SIZE, remaining_content_len or READ_SIZE) + min(READ_SIZE, size or READ_SIZE, remaining_content_len or READ_SIZE) ) def _read(self, remaining_content_len: Optional[int]) -> bytes: @@ -617,7 +617,15 @@ async def write_with_length( return # Read next chunk - chunk = await loop.run_in_executor(None, self._read, remaining_content_len) + chunk = await loop.run_in_executor( + None, + self._read, + ( + min(READ_SIZE, remaining_content_len) + if remaining_content_len is not None + else READ_SIZE + ), + ) def _should_stop_writing( self, @@ -760,7 +768,7 @@ def _read_and_available_len( self._set_or_restore_start_position() size = self.size chunk = self._value.read( - min(size or READ_SIZE, remaining_content_len or READ_SIZE) + min(READ_SIZE, size or READ_SIZE, remaining_content_len or READ_SIZE) ) return size, chunk.encode(self._encoding) if self._encoding else chunk.encode() diff --git a/tests/test_payload.py b/tests/test_payload.py index b810a68f8b7..2fd0a0f60d9 100644 --- a/tests/test_payload.py +++ b/tests/test_payload.py @@ -6,13 +6,14 @@ from collections.abc import AsyncIterator from io import StringIO from pathlib import Path -from typing import Optional, TextIO, Union +from typing import List, Optional, TextIO, Union import pytest from multidict import CIMultiDict from aiohttp import payload from aiohttp.abc import AbstractStreamWriter +from aiohttp.payload import READ_SIZE class BufferWriter(AbstractStreamWriter): @@ -365,6 +366,155 @@ async def test_iobase_payload_exact_chunk_size_limit() -> None: assert written == data[:chunk_size] +async def test_iobase_payload_reads_in_chunks() -> None: + """Test IOBasePayload reads data in chunks of READ_SIZE, not all at once.""" + # Create a large file that's multiple times larger than READ_SIZE + large_data = b"x" * (READ_SIZE * 3 + 1000) # ~192KB + 1000 bytes + + # Mock the file-like object to track read calls + mock_file = unittest.mock.Mock(spec=io.BytesIO) + mock_file.tell.return_value = 0 + mock_file.fileno.side_effect = AttributeError # Make size return None + + # Track the sizes of read() calls + read_sizes = [] + + def mock_read(size: int) -> bytes: + read_sizes.append(size) + # Return data based on how many times read was called + call_count = len(read_sizes) + if call_count == 1: + return large_data[:size] + elif call_count == 2: + return large_data[READ_SIZE : READ_SIZE + size] + elif call_count == 3: + return large_data[READ_SIZE * 2 : READ_SIZE * 2 + size] + else: + return large_data[READ_SIZE * 3 :] + + mock_file.read.side_effect = mock_read + + payload_obj = payload.IOBasePayload(mock_file) + writer = MockStreamWriter() + + # Write with a large content_length + await payload_obj.write_with_length(writer, len(large_data)) + + # Verify that reads were limited to READ_SIZE + assert len(read_sizes) > 1 # Should have multiple reads + for read_size in read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + +async def test_iobase_payload_large_content_length() -> None: + """Test IOBasePayload with very large content_length doesn't read all at once.""" + data = b"x" * (READ_SIZE + 1000) + + # Create a custom file-like object that tracks read sizes + class TrackingBytesIO(io.BytesIO): + def __init__(self, data: bytes) -> None: + super().__init__(data) + self.read_sizes: List[int] = [] + + def read(self, size: Optional[int] = -1) -> bytes: + self.read_sizes.append(size if size is not None else -1) + return super().read(size) + + tracking_file = TrackingBytesIO(data) + payload_obj = payload.IOBasePayload(tracking_file) + writer = MockStreamWriter() + + # Write with a very large content_length (simulating the bug scenario) + large_content_length = 10 * 1024 * 1024 # 10MB + await payload_obj.write_with_length(writer, large_content_length) + + # Verify no single read exceeded READ_SIZE + for read_size in tracking_file.read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + # Verify the correct amount of data was written + assert writer.get_written_bytes() == data + + +async def test_textio_payload_reads_in_chunks() -> None: + """Test TextIOPayload reads data in chunks of READ_SIZE, not all at once.""" + # Create a large text file that's multiple times larger than READ_SIZE + large_text = "x" * (READ_SIZE * 3 + 1000) # ~192KB + 1000 chars + + # Mock the file-like object to track read calls + mock_file = unittest.mock.Mock(spec=io.StringIO) + mock_file.tell.return_value = 0 + mock_file.fileno.side_effect = AttributeError # Make size return None + mock_file.encoding = "utf-8" + + # Track the sizes of read() calls + read_sizes = [] + + def mock_read(size: int) -> str: + read_sizes.append(size) + # Return data based on how many times read was called + call_count = len(read_sizes) + if call_count == 1: + return large_text[:size] + elif call_count == 2: + return large_text[READ_SIZE : READ_SIZE + size] + elif call_count == 3: + return large_text[READ_SIZE * 2 : READ_SIZE * 2 + size] + else: + return large_text[READ_SIZE * 3 :] + + mock_file.read.side_effect = mock_read + + payload_obj = payload.TextIOPayload(mock_file) + writer = MockStreamWriter() + + # Write with a large content_length + await payload_obj.write_with_length(writer, len(large_text.encode("utf-8"))) + + # Verify that reads were limited to READ_SIZE + assert len(read_sizes) > 1 # Should have multiple reads + for read_size in read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + +async def test_textio_payload_large_content_length() -> None: + """Test TextIOPayload with very large content_length doesn't read all at once.""" + text_data = "x" * (READ_SIZE + 1000) + + # Create a custom file-like object that tracks read sizes + class TrackingStringIO(io.StringIO): + def __init__(self, data: str) -> None: + super().__init__(data) + self.read_sizes: List[int] = [] + + def read(self, size: Optional[int] = -1) -> str: + self.read_sizes.append(size if size is not None else -1) + return super().read(size) + + tracking_file = TrackingStringIO(text_data) + payload_obj = payload.TextIOPayload(tracking_file) + writer = MockStreamWriter() + + # Write with a very large content_length (simulating the bug scenario) + large_content_length = 10 * 1024 * 1024 # 10MB + await payload_obj.write_with_length(writer, large_content_length) + + # Verify no single read exceeded READ_SIZE + for read_size in tracking_file.read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + # Verify the correct amount of data was written + assert writer.get_written_bytes() == text_data.encode("utf-8") + + async def test_async_iterable_payload_write_with_length_no_limit() -> None: """Test AsyncIterablePayload writing with no content length limit.""" From d40e7bb150ddf49958d9a63285716414f00b628e Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Jun 2025 16:45:46 +0100 Subject: [PATCH 11/11] Release 3.12.9 (#11143) --- CHANGES.rst | 20 ++++++++++++++++++++ CHANGES/11138.bugfix.rst | 3 --- aiohttp/__init__.py | 2 +- 3 files changed, 21 insertions(+), 4 deletions(-) delete mode 100644 CHANGES/11138.bugfix.rst diff --git a/CHANGES.rst b/CHANGES.rst index 43e2173f8b8..fd27e959e23 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,26 @@ .. towncrier release notes start +3.12.9 (2025-06-04) +=================== + +Bug fixes +--------- + +- Fixed ``IOBasePayload`` and ``TextIOPayload`` reading entire files into memory when streaming large files -- by :user:`bdraco`. + + When using file-like objects with the aiohttp client, the entire file would be read into memory if the file size was provided in the ``Content-Length`` header. This could cause out-of-memory errors when uploading large files. The payload classes now correctly read data in chunks of ``READ_SIZE`` (64KB) regardless of the total content length. + + + *Related issues and pull requests on GitHub:* + :issue:`11138`. + + + + +---- + + 3.12.8 (2025-06-04) =================== diff --git a/CHANGES/11138.bugfix.rst b/CHANGES/11138.bugfix.rst deleted file mode 100644 index 6d8c634e51f..00000000000 --- a/CHANGES/11138.bugfix.rst +++ /dev/null @@ -1,3 +0,0 @@ -Fixed ``IOBasePayload`` and ``TextIOPayload`` reading entire files into memory when streaming large files -- by :user:`bdraco`. - -When using file-like objects with the aiohttp client, the entire file would be read into memory if the file size was provided in the ``Content-Length`` header. This could cause out-of-memory errors when uploading large files. The payload classes now correctly read data in chunks of ``READ_SIZE`` (64KB) regardless of the total content length. diff --git a/aiohttp/__init__.py b/aiohttp/__init__.py index 92b7a7b076a..4df59028912 100644 --- a/aiohttp/__init__.py +++ b/aiohttp/__init__.py @@ -1,4 +1,4 @@ -__version__ = "3.12.8" +__version__ = "3.12.9" from typing import TYPE_CHECKING, Tuple