From 92bd7351bb3a9838592fb4e7d976894307758b22 Mon Sep 17 00:00:00 2001 From: NandaScott Date: Sat, 24 Jan 2026 14:26:49 -0500 Subject: [PATCH] Set encoding headers to gzip for bulk_data download --- scrython/bulk_data/bulk_data_mixins.py | 12 ++++++++--- tests/test_bulk_data.py | 29 ++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/scrython/bulk_data/bulk_data_mixins.py b/scrython/bulk_data/bulk_data_mixins.py index 42a59e6..bf49317 100644 --- a/scrython/bulk_data/bulk_data_mixins.py +++ b/scrython/bulk_data/bulk_data_mixins.py @@ -1,7 +1,9 @@ import gzip import json from typing import Any -from urllib.request import urlopen +from urllib.request import Request, urlopen + +from ..base import ScrythonRequestHandler class BulkDataObjectMixin: @@ -161,6 +163,10 @@ def download( """ download_url = self.download_uri + request = Request(download_url) + request.add_header("User-Agent", ScrythonRequestHandler._user_agent) + request.add_header("Accept-Encoding", "gzip, identity") + # Optional progress bar if progress: try: @@ -172,7 +178,7 @@ def download( ) from exc # Download with progress bar - with urlopen(download_url) as response: + with urlopen(request) as response: # Check actual HTTP Content-Encoding header content_encoding = response.info().get("Content-Encoding", "").lower() @@ -202,7 +208,7 @@ def download( data = downloaded_data else: # Download without progress bar - with urlopen(download_url) as response: + with urlopen(request) as response: # Check actual HTTP Content-Encoding header content_encoding = response.info().get("Content-Encoding", "").lower() diff --git a/tests/test_bulk_data.py b/tests/test_bulk_data.py index e50fabf..d05cd14 100644 --- a/tests/test_bulk_data.py +++ b/tests/test_bulk_data.py @@ -312,3 +312,32 @@ def test_download_uncompressed_with_progress(self, mock_urlopen): assert result == test_data assert len(result) == 1 assert result[0]["name"] == "Test Card" + + def test_download_sets_headers(self, mock_urlopen): + """Test download sets proper User-Agent and Accept-Encoding headers.""" + from urllib.request import Request + + from scrython.base import ScrythonRequestHandler + + mock_urlopen.set_response("bulk_data/by_id.json") + bulk = ByType(type="oracle_cards") + + with patch("scrython.bulk_data.bulk_data_mixins.urlopen") as mock_download: + # Set up mock to allow inspection of the Request object + mock_response = MagicMock() + mock_response.read.return_value = b"[]" + mock_response.info.return_value.get.return_value = "" + mock_response.__enter__.return_value = mock_response + mock_response.__exit__.return_value = None + mock_download.return_value = mock_response + + bulk.download() + + # Verify urlopen was called with a Request object + mock_download.assert_called_once() + request = mock_download.call_args[0][0] + assert isinstance(request, Request) + + # Verify headers are set correctly + assert request.get_header("User-agent") == ScrythonRequestHandler._user_agent + assert request.get_header("Accept-encoding") == "gzip, identity"