diff --git a/CHANGES/11270.bugfix.rst b/CHANGES/11270.bugfix.rst new file mode 100644 index 00000000000..d1e0992b949 --- /dev/null +++ b/CHANGES/11270.bugfix.rst @@ -0,0 +1 @@ +Fixed file uploads failing with HTTP 422 errors when encountering 307/308 redirects, and 301/302 redirects for non-POST methods, by preserving the request body when appropriate per :rfc:`9110#section-15.4.3-3.1` -- by :user:`bdraco`. diff --git a/aiohttp/client.py b/aiohttp/client.py index 29c63a6b031..6a8c667491f 100644 --- a/aiohttp/client.py +++ b/aiohttp/client.py @@ -761,6 +761,12 @@ async def _connect_and_send_request( data = None if headers.get(hdrs.CONTENT_LENGTH): headers.pop(hdrs.CONTENT_LENGTH) + else: + # For 307/308, always preserve the request body + # For 301/302 with non-POST methods, preserve the request body + # https://www.rfc-editor.org/rfc/rfc9110#section-15.4.3-3.1 + # Use the existing payload to avoid recreating it from a potentially consumed file + data = req._body r_url = resp.headers.get(hdrs.LOCATION) or resp.headers.get( hdrs.URI diff --git a/aiohttp/payload.py b/aiohttp/payload.py index ec5f35e286b..eab56692399 100644 --- a/aiohttp/payload.py +++ b/aiohttp/payload.py @@ -484,10 +484,14 @@ def _set_or_restore_start_position(self) -> None: if self._start_position is None: try: self._start_position = self._value.tell() - except OSError: + except (OSError, AttributeError): self._consumed = True # Cannot seek, mark as consumed return - self._value.seek(self._start_position) + try: + self._value.seek(self._start_position) + except (OSError, AttributeError): + # Failed to seek back - mark as consumed since we've already read + self._consumed = True def _read_and_available_len( self, remaining_content_len: Optional[int] @@ -538,11 +542,30 @@ def size(self) -> Optional[int]: """ Size of the payload in bytes. - Returns the number of bytes remaining to be read from the file. + Returns the total size of the payload content from the initial position. + This ensures consistent Content-Length for requests, including 307/308 redirects + where the same payload instance is reused. + Returns None if the size cannot be determined (e.g., for unseekable streams). """ try: - return os.fstat(self._value.fileno()).st_size - self._value.tell() + # Store the start position on first access. + # This is critical when the same payload instance is reused (e.g., 307/308 + # redirects). Without storing the initial position, after the payload is + # read once, the file position would be at EOF, which would cause the + # size calculation to return 0 (file_size - EOF position). + # By storing the start position, we ensure the size calculation always + # returns the correct total size for any subsequent use. + if self._start_position is None: + try: + self._start_position = self._value.tell() + except (OSError, AttributeError): + # Can't get position, can't determine size + return None + + # Return the total size from the start position + # This ensures Content-Length is correct even after reading + return os.fstat(self._value.fileno()).st_size - self._start_position except (AttributeError, OSError): return None diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 2a147a57082..839f5efc4af 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -186,7 +186,7 @@ pytest==8.4.1 # pytest-cov # pytest-mock # pytest-xdist -pytest-codspeed==3.2.0 +pytest-codspeed==4.0.0 # via # -r requirements/lint.in # -r requirements/test.in diff --git a/requirements/dev.txt b/requirements/dev.txt index bf44edac144..0ea9609b133 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -181,7 +181,7 @@ pytest==8.4.1 # pytest-cov # pytest-mock # pytest-xdist -pytest-codspeed==3.2.0 +pytest-codspeed==4.0.0 # via # -r requirements/lint.in # -r requirements/test.in diff --git a/requirements/lint.txt b/requirements/lint.txt index fc2664c7dbc..fad4b1d9db2 100644 --- a/requirements/lint.txt +++ b/requirements/lint.txt @@ -80,7 +80,7 @@ pytest==8.4.1 # -r requirements/lint.in # pytest-codspeed # pytest-mock -pytest-codspeed==3.2.0 +pytest-codspeed==4.0.0 # via -r requirements/lint.in pytest-mock==3.14.1 # via -r requirements/lint.in diff --git a/requirements/test.txt b/requirements/test.txt index 1b96b5ada61..1b0f3c3cfd2 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -102,7 +102,7 @@ pytest==8.4.1 # pytest-cov # pytest-mock # pytest-xdist -pytest-codspeed==3.2.0 +pytest-codspeed==4.0.0 # via -r requirements/test.in pytest-cov==6.2.1 # via -r requirements/test.in diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index 19eeb2d41d3..23ace2f6d4e 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -5357,3 +5357,228 @@ async def handler(request: web.Request) -> web.Response: assert ( len(resp._raw_cookie_headers) == 12 ), "All raw headers should be preserved" + + +@pytest.mark.parametrize("status", (307, 308)) +async def test_file_upload_307_308_redirect( + aiohttp_client: AiohttpClient, tmp_path: pathlib.Path, status: int +) -> None: + """Test that file uploads work correctly with 307/308 redirects. + + This demonstrates the bug where file payloads get incorrect Content-Length + on redirect because the file position isn't reset. + """ + received_bodies: List[bytes] = [] + + async def handler(request: web.Request) -> web.Response: + # Store the body content + body = await request.read() + received_bodies.append(body) + + if str(request.url.path).endswith("/"): + # Redirect URLs ending with / to remove the trailing slash + return web.Response( + status=status, + headers={ + "Location": str(request.url.with_path(request.url.path.rstrip("/"))) + }, + ) + + # Return success with the body size + return web.json_response( + { + "received_size": len(body), + "content_length": request.headers.get("Content-Length"), + } + ) + + app = web.Application() + app.router.add_post("/upload/", handler) + app.router.add_post("/upload", handler) + + client = await aiohttp_client(app) + + # Create a test file + test_file = tmp_path / f"test_upload_{status}.txt" + content = b"This is test file content for upload." + await asyncio.to_thread(test_file.write_bytes, content) + expected_size = len(content) + + # Upload file to URL with trailing slash (will trigger redirect) + f = await asyncio.to_thread(open, test_file, "rb") + try: + async with client.post("/upload/", data=f) as resp: + assert resp.status == 200 + result = await resp.json() + + # The server should receive the full file content + assert result["received_size"] == expected_size + assert result["content_length"] == str(expected_size) + + # Both requests should have received the same content + assert len(received_bodies) == 2 + assert received_bodies[0] == content # First request + assert received_bodies[1] == content # After redirect + finally: + await asyncio.to_thread(f.close) + + +@pytest.mark.parametrize("status", [301, 302]) +@pytest.mark.parametrize("method", ["PUT", "PATCH", "DELETE"]) +async def test_file_upload_301_302_redirect_non_post( + aiohttp_client: AiohttpClient, tmp_path: pathlib.Path, status: int, method: str +) -> None: + """Test that file uploads work correctly with 301/302 redirects for non-POST methods. + + Per RFC 9110, 301/302 redirects should preserve the method and body for non-POST requests. + """ + received_bodies: List[bytes] = [] + + async def handler(request: web.Request) -> web.Response: + # Store the body content + body = await request.read() + received_bodies.append(body) + + if str(request.url.path).endswith("/"): + # Redirect URLs ending with / to remove the trailing slash + return web.Response( + status=status, + headers={ + "Location": str(request.url.with_path(request.url.path.rstrip("/"))) + }, + ) + + # Return success with the body size + return web.json_response( + { + "method": request.method, + "received_size": len(body), + "content_length": request.headers.get("Content-Length"), + } + ) + + app = web.Application() + app.router.add_route(method, "/upload/", handler) + app.router.add_route(method, "/upload", handler) + + client = await aiohttp_client(app) + + # Create a test file + test_file = tmp_path / f"test_upload_{status}_{method.lower()}.txt" + content = f"Test {method} file content for {status} redirect.".encode() + await asyncio.to_thread(test_file.write_bytes, content) + expected_size = len(content) + + # Upload file to URL with trailing slash (will trigger redirect) + f = await asyncio.to_thread(open, test_file, "rb") + try: + async with client.request(method, "/upload/", data=f) as resp: + assert resp.status == 200 + result = await resp.json() + + # The server should receive the full file content after redirect + assert result["method"] == method # Method should be preserved + assert result["received_size"] == expected_size + assert result["content_length"] == str(expected_size) + + # Both requests should have received the same content + assert len(received_bodies) == 2 + assert received_bodies[0] == content # First request + assert received_bodies[1] == content # After redirect + finally: + await asyncio.to_thread(f.close) + + +async def test_file_upload_307_302_redirect_chain( + aiohttp_client: AiohttpClient, tmp_path: pathlib.Path +) -> None: + """Test that file uploads work correctly with 307->302->200 redirect chain. + + This verifies that: + 1. 307 preserves POST method and file body + 2. 302 changes POST to GET and drops the body + 3. No body leaks to the final GET request + """ + received_requests: List[Dict[str, Any]] = [] + + async def handler(request: web.Request) -> web.Response: + # Store request details + body = await request.read() + received_requests.append( + { + "path": str(request.url.path), + "method": request.method, + "body_size": len(body), + "content_length": request.headers.get("Content-Length"), + } + ) + + if request.url.path == "/upload307": + # First redirect: 307 should preserve method and body + return web.Response(status=307, headers={"Location": "/upload302"}) + elif request.url.path == "/upload302": + # Second redirect: 302 should change POST to GET + return web.Response(status=302, headers={"Location": "/final"}) + else: + # Final destination + return web.json_response( + { + "final_method": request.method, + "final_body_size": len(body), + "requests_received": len(received_requests), + } + ) + + app = web.Application() + app.router.add_route("*", "/upload307", handler) + app.router.add_route("*", "/upload302", handler) + app.router.add_route("*", "/final", handler) + + client = await aiohttp_client(app) + + # Create a test file + test_file = tmp_path / "test_redirect_chain.txt" + content = b"Test file content that should not leak to GET request" + await asyncio.to_thread(test_file.write_bytes, content) + expected_size = len(content) + + # Upload file to URL that triggers 307->302->final redirect chain + f = await asyncio.to_thread(open, test_file, "rb") + try: + async with client.post("/upload307", data=f) as resp: + assert resp.status == 200 + result = await resp.json() + + # Verify the redirect chain + assert len(resp.history) == 2 + assert resp.history[0].status == 307 + assert resp.history[1].status == 302 + + # Verify final request is GET with no body + assert result["final_method"] == "GET" + assert result["final_body_size"] == 0 + assert result["requests_received"] == 3 + + # Verify the request sequence + assert len(received_requests) == 3 + + # First request (307): POST with full body + assert received_requests[0]["path"] == "/upload307" + assert received_requests[0]["method"] == "POST" + assert received_requests[0]["body_size"] == expected_size + assert received_requests[0]["content_length"] == str(expected_size) + + # Second request (302): POST with preserved body from 307 + assert received_requests[1]["path"] == "/upload302" + assert received_requests[1]["method"] == "POST" + assert received_requests[1]["body_size"] == expected_size + assert received_requests[1]["content_length"] == str(expected_size) + + # Third request (final): GET with no body (302 changed method and dropped body) + assert received_requests[2]["path"] == "/final" + assert received_requests[2]["method"] == "GET" + assert received_requests[2]["body_size"] == 0 + assert received_requests[2]["content_length"] is None + + finally: + await asyncio.to_thread(f.close) diff --git a/tests/test_payload.py b/tests/test_payload.py index be17dbe31f8..fb8267f21cf 100644 --- a/tests/test_payload.py +++ b/tests/test_payload.py @@ -1276,3 +1276,79 @@ def open_file() -> TextIO: assert len(writer.buffer) == utf16_file_size finally: await loop.run_in_executor(None, f.close) + + +async def test_iobase_payload_size_after_reading(tmp_path: Path) -> None: + """Test that IOBasePayload.size returns correct size after file has been read. + + This demonstrates the bug where size calculation doesn't account for + the current file position, causing issues with 307/308 redirects. + """ + # Create a test file with known content + test_file = tmp_path / "test.txt" + content = b"Hello, World! This is test content." + await asyncio.to_thread(test_file.write_bytes, content) + expected_size = len(content) + + # Open the file and create payload + f = await asyncio.to_thread(open, test_file, "rb") + try: + p = payload.BufferedReaderPayload(f) + + # First size check - should return full file size + assert p.size == expected_size + + # Read the file (simulating first request) + writer = BufferWriter() + await p.write(writer) + assert len(writer.buffer) == expected_size + + # Second size check - should still return full file size + # but currently returns 0 because file position is at EOF + assert p.size == expected_size # This assertion fails! + + # Attempting to write again should write the full content + # but currently writes nothing because file is at EOF + writer2 = BufferWriter() + await p.write(writer2) + assert len(writer2.buffer) == expected_size # This also fails! + finally: + await asyncio.to_thread(f.close) + + +async def test_iobase_payload_size_unseekable() -> None: + """Test that IOBasePayload.size returns None for unseekable files.""" + + class UnseekableFile: + """Mock file object that doesn't support seeking.""" + + def __init__(self, content: bytes) -> None: + self.content = content + self.pos = 0 + + def read(self, size: int) -> bytes: + result = self.content[self.pos : self.pos + size] + self.pos += len(result) + return result + + def tell(self) -> int: + raise OSError("Unseekable file") + + content = b"Unseekable content" + f = UnseekableFile(content) + p = payload.IOBasePayload(f) # type: ignore[arg-type] + + # Size should return None for unseekable files + assert p.size is None + + # Payload should not be consumed before writing + assert p.consumed is False + + # Writing should still work + writer = BufferWriter() + await p.write(writer) + assert writer.buffer == content + + # For unseekable files that can't tell() or seek(), + # they are marked as consumed after the first write + assert p.consumed is True