diff --git a/CHANGES.rst b/CHANGES.rst index e22fe28c135..a6fe7f9e5f5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,52 @@ .. towncrier release notes start +3.12.9 (2025-06-04) +=================== + +Bug fixes +--------- + +- Fixed ``IOBasePayload`` and ``TextIOPayload`` reading entire files into memory when streaming large files -- by :user:`bdraco`. + + When using file-like objects with the aiohttp client, the entire file would be read into memory if the file size was provided in the ``Content-Length`` header. This could cause out-of-memory errors when uploading large files. The payload classes now correctly read data in chunks of ``READ_SIZE`` (64KB) regardless of the total content length. + + + *Related issues and pull requests on GitHub:* + :issue:`11138`. + + + + +---- + + +3.12.8 (2025-06-04) +=================== + +Features +-------- + +- Added preemptive digest authentication to :class:`~aiohttp.DigestAuthMiddleware` -- by :user:`bdraco`. + + The middleware now reuses authentication credentials for subsequent requests to the same + protection space, improving efficiency by avoiding extra authentication round trips. + This behavior matches how web browsers handle digest authentication and follows + :rfc:`7616#section-3.6`. + + Preemptive authentication is enabled by default but can be disabled by passing + ``preemptive=False`` to the middleware constructor. + + + *Related issues and pull requests on GitHub:* + :issue:`11128`, :issue:`11129`. + + + + +---- + + 3.12.7 (2025-06-02) =================== diff --git a/CHANGES/11128.feature.rst b/CHANGES/11128.feature.rst deleted file mode 100644 index 0f99d2b8a11..00000000000 --- a/CHANGES/11128.feature.rst +++ /dev/null @@ -1,9 +0,0 @@ -Added preemptive digest authentication to :class:`~aiohttp.DigestAuthMiddleware` -- by :user:`bdraco`. - -The middleware now reuses authentication credentials for subsequent requests to the same -protection space, improving efficiency by avoiding extra authentication round trips. -This behavior matches how web browsers handle digest authentication and follows -:rfc:`7616#section-3.6`. - -Preemptive authentication is enabled by default but can be disabled by passing -``preemptive=False`` to the middleware constructor. diff --git a/CHANGES/11129.feature.rst b/CHANGES/11129.feature.rst deleted file mode 120000 index 692d28ba9ce..00000000000 --- a/CHANGES/11129.feature.rst +++ /dev/null @@ -1 +0,0 @@ -11128.feature.rst \ No newline at end of file diff --git a/aiohttp/payload.py b/aiohttp/payload.py index 1f83b611567..ec5f35e286b 100644 --- a/aiohttp/payload.py +++ b/aiohttp/payload.py @@ -512,7 +512,7 @@ def _read_and_available_len( self._set_or_restore_start_position() size = self.size # Call size only once since it does I/O return size, self._value.read( - min(size or READ_SIZE, remaining_content_len or READ_SIZE) + min(READ_SIZE, size or READ_SIZE, remaining_content_len or READ_SIZE) ) def _read(self, remaining_content_len: Optional[int]) -> bytes: @@ -615,7 +615,15 @@ async def write_with_length( return # Read next chunk - chunk = await loop.run_in_executor(None, self._read, remaining_content_len) + chunk = await loop.run_in_executor( + None, + self._read, + ( + min(READ_SIZE, remaining_content_len) + if remaining_content_len is not None + else READ_SIZE + ), + ) def _should_stop_writing( self, @@ -757,7 +765,7 @@ def _read_and_available_len( self._set_or_restore_start_position() size = self.size chunk = self._value.read( - min(size or READ_SIZE, remaining_content_len or READ_SIZE) + min(READ_SIZE, size or READ_SIZE, remaining_content_len or READ_SIZE) ) return size, chunk.encode(self._encoding) if self._encoding else chunk.encode() diff --git a/tests/test_payload.py b/tests/test_payload.py index 2d80dc0c65d..be17dbe31f8 100644 --- a/tests/test_payload.py +++ b/tests/test_payload.py @@ -12,6 +12,7 @@ from aiohttp import payload from aiohttp.abc import AbstractStreamWriter +from aiohttp.payload import READ_SIZE class BufferWriter(AbstractStreamWriter): @@ -363,6 +364,155 @@ async def test_iobase_payload_exact_chunk_size_limit() -> None: assert written == data[:chunk_size] +async def test_iobase_payload_reads_in_chunks() -> None: + """Test IOBasePayload reads data in chunks of READ_SIZE, not all at once.""" + # Create a large file that's multiple times larger than READ_SIZE + large_data = b"x" * (READ_SIZE * 3 + 1000) # ~192KB + 1000 bytes + + # Mock the file-like object to track read calls + mock_file = unittest.mock.Mock(spec=io.BytesIO) + mock_file.tell.return_value = 0 + mock_file.fileno.side_effect = AttributeError # Make size return None + + # Track the sizes of read() calls + read_sizes = [] + + def mock_read(size: int) -> bytes: + read_sizes.append(size) + # Return data based on how many times read was called + call_count = len(read_sizes) + if call_count == 1: + return large_data[:size] + elif call_count == 2: + return large_data[READ_SIZE : READ_SIZE + size] + elif call_count == 3: + return large_data[READ_SIZE * 2 : READ_SIZE * 2 + size] + else: + return large_data[READ_SIZE * 3 :] + + mock_file.read.side_effect = mock_read + + payload_obj = payload.IOBasePayload(mock_file) + writer = MockStreamWriter() + + # Write with a large content_length + await payload_obj.write_with_length(writer, len(large_data)) + + # Verify that reads were limited to READ_SIZE + assert len(read_sizes) > 1 # Should have multiple reads + for read_size in read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + +async def test_iobase_payload_large_content_length() -> None: + """Test IOBasePayload with very large content_length doesn't read all at once.""" + data = b"x" * (READ_SIZE + 1000) + + # Create a custom file-like object that tracks read sizes + class TrackingBytesIO(io.BytesIO): + def __init__(self, data: bytes) -> None: + super().__init__(data) + self.read_sizes: List[int] = [] + + def read(self, size: Optional[int] = -1) -> bytes: + self.read_sizes.append(size if size is not None else -1) + return super().read(size) + + tracking_file = TrackingBytesIO(data) + payload_obj = payload.IOBasePayload(tracking_file) + writer = MockStreamWriter() + + # Write with a very large content_length (simulating the bug scenario) + large_content_length = 10 * 1024 * 1024 # 10MB + await payload_obj.write_with_length(writer, large_content_length) + + # Verify no single read exceeded READ_SIZE + for read_size in tracking_file.read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + # Verify the correct amount of data was written + assert writer.get_written_bytes() == data + + +async def test_textio_payload_reads_in_chunks() -> None: + """Test TextIOPayload reads data in chunks of READ_SIZE, not all at once.""" + # Create a large text file that's multiple times larger than READ_SIZE + large_text = "x" * (READ_SIZE * 3 + 1000) # ~192KB + 1000 chars + + # Mock the file-like object to track read calls + mock_file = unittest.mock.Mock(spec=io.StringIO) + mock_file.tell.return_value = 0 + mock_file.fileno.side_effect = AttributeError # Make size return None + mock_file.encoding = "utf-8" + + # Track the sizes of read() calls + read_sizes = [] + + def mock_read(size: int) -> str: + read_sizes.append(size) + # Return data based on how many times read was called + call_count = len(read_sizes) + if call_count == 1: + return large_text[:size] + elif call_count == 2: + return large_text[READ_SIZE : READ_SIZE + size] + elif call_count == 3: + return large_text[READ_SIZE * 2 : READ_SIZE * 2 + size] + else: + return large_text[READ_SIZE * 3 :] + + mock_file.read.side_effect = mock_read + + payload_obj = payload.TextIOPayload(mock_file) + writer = MockStreamWriter() + + # Write with a large content_length + await payload_obj.write_with_length(writer, len(large_text.encode("utf-8"))) + + # Verify that reads were limited to READ_SIZE + assert len(read_sizes) > 1 # Should have multiple reads + for read_size in read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + +async def test_textio_payload_large_content_length() -> None: + """Test TextIOPayload with very large content_length doesn't read all at once.""" + text_data = "x" * (READ_SIZE + 1000) + + # Create a custom file-like object that tracks read sizes + class TrackingStringIO(io.StringIO): + def __init__(self, data: str) -> None: + super().__init__(data) + self.read_sizes: List[int] = [] + + def read(self, size: Optional[int] = -1) -> str: + self.read_sizes.append(size if size is not None else -1) + return super().read(size) + + tracking_file = TrackingStringIO(text_data) + payload_obj = payload.TextIOPayload(tracking_file) + writer = MockStreamWriter() + + # Write with a very large content_length (simulating the bug scenario) + large_content_length = 10 * 1024 * 1024 # 10MB + await payload_obj.write_with_length(writer, large_content_length) + + # Verify no single read exceeded READ_SIZE + for read_size in tracking_file.read_sizes: + assert ( + read_size <= READ_SIZE + ), f"Read size {read_size} exceeds READ_SIZE {READ_SIZE}" + + # Verify the correct amount of data was written + assert writer.get_written_bytes() == text_data.encode("utf-8") + + async def test_async_iterable_payload_write_with_length_no_limit() -> None: """Test AsyncIterablePayload writing with no content length limit."""