From d268a624d58f0988499eba30b791b3e3acc1f009 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Thu, 6 Nov 2025 10:21:30 +0100
Subject: [PATCH 01/10] feat: add dry run to the read_gbq function

---
 pandas_gbq/gbq.py           |  5 ++++-
 pandas_gbq/gbq_connector.py | 14 +++++++++++++-
 tests/system/test_gbq.py    | 13 +++++++++++++
 tests/unit/test_gbq.py      |  7 +++++++
 4 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 880dcef9..40480a2b 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -119,6 +119,7 @@ def read_gbq(
     *,
     col_order=None,
     bigquery_client=None,
+    dry_run: bool = False,
 ):
     r"""Read data from Google BigQuery to a pandas DataFrame.
 
@@ -269,7 +270,8 @@ def read_gbq(
     bigquery_client : google.cloud.bigquery.Client, optional
         A Google Cloud BigQuery Python Client instance. If provided, it will be used for reading
         data, while the project and credentials parameters will be ignored.
-
+    dry_run : bool, default False
+        If True, run a dry run query.
     Returns
     -------
     df: DataFrame
@@ -328,6 +330,7 @@ def read_gbq(
             max_results=max_results,
             progress_bar_type=progress_bar_type,
             dtypes=dtypes,
+            dry_run=dry_run,
         )
     else:
         final_df = connector.download_table(
diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py
index 2b3b716e..518de452 100644
--- a/pandas_gbq/gbq_connector.py
+++ b/pandas_gbq/gbq_connector.py
@@ -199,7 +199,14 @@ def download_table(
             user_dtypes=dtypes,
         )
 
-    def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
+    def run_query(
+        self,
+        query,
+        max_results=None,
+        progress_bar_type=None,
+        dry_run: bool = False,
+        **kwargs,
+    ):
         from google.cloud import bigquery
 
         job_config_dict = {
@@ -235,6 +242,7 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
 
         self._start_timer()
         job_config = bigquery.QueryJobConfig.from_api_repr(job_config_dict)
+        job_config.dry_run = dry_run
 
         if FEATURES.bigquery_has_query_and_wait:
             rows_iter = pandas_gbq.query.query_and_wait_via_client_library(
@@ -260,6 +268,10 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
             )
 
         dtypes = kwargs.get("dtypes")
+
+        if dry_run:
+            return rows_iter
+
         return self._download_results(
             rows_iter,
             max_results=max_results,
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 1457ec30..355ee68e 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -656,6 +656,19 @@ def test_columns_and_col_order_raises_error(self, project_id):
                 dialect="standard",
             )
 
+    def test_read_gbq_with_dry_run(self, project_id):
+        query = "SELECT 1"
+        job = gbq.read_gbq(
+            query,
+            project_id=project_id,
+            credentials=self.credentials,
+            dialect="standard",
+            dry_run=True,
+        )
+        assert job.dry_run
+        assert job.state == "DONE"
+        assert job.total_bytes_processed > 0
+
 
 class TestToGBQIntegration(object):
     @pytest.fixture(autouse=True, scope="function")
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 75574820..fcbacc2a 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -937,3 +937,10 @@ def test_run_query_with_dml_query(mock_bigquery_client, mock_query_job):
     type(mock_query_job).destination = mock.PropertyMock(return_value=None)
     connector.run_query("UPDATE tablename SET value = '';")
     mock_bigquery_client.list_rows.assert_not_called()
+
+
+def test_read_gbq_with_dry_run(mock_bigquery_client):
+    gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True)
+    _, kwargs = mock_bigquery_client.query.call_args
+    job_config = kwargs["job_config"]
+    assert job_config.dry_run is True

From 13fbf92276b496ef1c39b12ddb21f546bc348d68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Thu, 6 Nov 2025 10:27:30 +0100
Subject: [PATCH 02/10] return the cost (in GB) if dry run is set to True

---
 pandas_gbq/gbq_connector.py | 2 +-
 tests/system/test_gbq.py    | 8 ++++----
 tests/unit/test_gbq.py      | 6 ++++--
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py
index 518de452..cc83e8df 100644
--- a/pandas_gbq/gbq_connector.py
+++ b/pandas_gbq/gbq_connector.py
@@ -270,7 +270,7 @@ def run_query(
         dtypes = kwargs.get("dtypes")
 
         if dry_run:
-            return rows_iter
+            return rows_iter.total_bytes_processed / 1024**3
 
         return self._download_results(
             rows_iter,
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 355ee68e..3764cc8b 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -658,16 +658,16 @@ def test_columns_and_col_order_raises_error(self, project_id):
 
     def test_read_gbq_with_dry_run(self, project_id):
         query = "SELECT 1"
-        job = gbq.read_gbq(
+        cost = gbq.read_gbq(
             query,
             project_id=project_id,
             credentials=self.credentials,
             dialect="standard",
             dry_run=True,
         )
-        assert job.dry_run
-        assert job.state == "DONE"
-        assert job.total_bytes_processed > 0
+        assert isinstance(cost, float)
+        assert cost > 0
+
 
 
 class TestToGBQIntegration(object):
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index fcbacc2a..621a2448 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -939,8 +939,10 @@ def test_run_query_with_dml_query(mock_bigquery_client, mock_query_job):
     mock_bigquery_client.list_rows.assert_not_called()
 
 
-def test_read_gbq_with_dry_run(mock_bigquery_client):
-    gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True)
+def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job):
+    type(mock_query_job).total_bytes_processed = mock.PropertyMock(return_value=12345)
+    cost = gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True)
     _, kwargs = mock_bigquery_client.query.call_args
     job_config = kwargs["job_config"]
     assert job_config.dry_run is True
+    assert cost == 12345 / 1024**3

From adcfc7bdc9dee67c52890e86185276f898e7fefa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Mon, 10 Nov 2025 09:17:52 +0100
Subject: [PATCH 03/10] updates to fix test

---
 pandas_gbq/gbq.py           |  8 ++++++--
 pandas_gbq/gbq_connector.py | 10 +++++++++-
 pandas_gbq/query.py         | 33 ++++++++++++++++++++++++++++++++-
 tests/unit/test_gbq.py      | 11 +++++++++--
 tests/unit/test_query.py    | 10 +++++++---
 5 files changed, 63 insertions(+), 9 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 40480a2b..75fa3510 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -274,8 +274,9 @@ def read_gbq(
         If True, run a dry run query.
     Returns
     -------
-    df: DataFrame
-        DataFrame representing results of query.
+    df: DataFrame or float
+        DataFrame representing results of query. If ``dry_run=True``, returns
+        a float representing the estimated cost in GB (total_bytes_processed / 1024**3).
     """
     if dialect is None:
         dialect = context.dialect
@@ -332,6 +333,9 @@ def read_gbq(
             dtypes=dtypes,
             dry_run=dry_run,
         )
+        # When dry_run=True, run_query returns a float (cost in GB), not a DataFrame
+        if dry_run:
+            return final_df
     else:
         final_df = connector.download_table(
             query_or_table,
diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py
index cc83e8df..2c48f6fa 100644
--- a/pandas_gbq/gbq_connector.py
+++ b/pandas_gbq/gbq_connector.py
@@ -270,7 +270,15 @@ def run_query(
         dtypes = kwargs.get("dtypes")
 
         if dry_run:
-            return rows_iter.total_bytes_processed / 1024**3
+            # Access total_bytes_processed from the QueryJob via RowIterator.job
+            # RowIterator has a job attribute that references the QueryJob
+            query_job = rows_iter.job if hasattr(rows_iter, 'job') and rows_iter.job else None
+            if query_job is None:
+                # Fallback: if query_and_wait_via_client_library doesn't set job,
+                # we need to get it from the query result
+                # For query_and_wait_via_client_library, the RowIterator should have job set
+                raise ValueError("Cannot access QueryJob from RowIterator for dry_run")
+            return query_job.total_bytes_processed / 1024**3
 
         return self._download_results(
             rows_iter,
diff --git a/pandas_gbq/query.py b/pandas_gbq/query.py
index 83575a9c..ba0f1d72 100644
--- a/pandas_gbq/query.py
+++ b/pandas_gbq/query.py
@@ -179,7 +179,12 @@ def query_and_wait(
     # getQueryResults() instead of tabledata.list, which returns the correct
     # response with DML/DDL queries.
     try:
-        return query_reply.result(max_results=max_results)
+        rows_iter = query_reply.result(max_results=max_results)
+        # Store reference to QueryJob in RowIterator for dry_run access
+        # RowIterator already has a job attribute, but ensure it's set
+        if not hasattr(rows_iter, 'job') or rows_iter.job is None:
+            rows_iter.job = query_reply
+        return rows_iter
     except connector.http_error as ex:
         connector.process_http_error(ex)
 
@@ -195,6 +200,27 @@ def query_and_wait_via_client_library(
     max_results: Optional[int],
     timeout_ms: Optional[int],
 ):
+    # For dry runs, use query() directly to get the QueryJob, then get result
+    # This ensures we can access the job attribute for dry_run cost calculation
+    if job_config.dry_run:
+        query_job = try_query(
+            connector,
+            functools.partial(
+                client.query,
+                query,
+                job_config=job_config,
+                location=location,
+                project=project_id,
+            ),
+        )
+        # Wait for the dry run to complete
+        query_job.result(timeout=timeout_ms / 1000.0 if timeout_ms else None)
+        # Get the result iterator and ensure job attribute is set
+        rows_iter = query_job.result(max_results=max_results)
+        if not hasattr(rows_iter, 'job') or rows_iter.job is None:
+            rows_iter.job = query_job
+        return rows_iter
+    
     rows_iter = try_query(
         connector,
         functools.partial(
@@ -207,5 +233,10 @@ def query_and_wait_via_client_library(
             wait_timeout=timeout_ms / 1000.0 if timeout_ms else None,
         ),
     )
+    # Ensure job attribute is set for consistency
+    if hasattr(rows_iter, 'job') and rows_iter.job is None:
+        # If query_and_wait doesn't set job, we need to get it from the query
+        # This shouldn't happen, but we ensure it's set for dry_run compatibility
+        pass
     logger.debug("Query done.\n")
     return rows_iter
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 621a2448..e63c364a 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -76,6 +76,8 @@ def generate_schema():
 @pytest.fixture(autouse=True)
 def default_bigquery_client(mock_bigquery_client, mock_query_job, mock_row_iterator):
     mock_query_job.result.return_value = mock_row_iterator
+    # Set up RowIterator.job to point to QueryJob for dry_run access
+    mock_row_iterator.job = mock_query_job
     mock_bigquery_client.list_rows.return_value = mock_row_iterator
     mock_bigquery_client.query.return_value = mock_query_job
 
@@ -942,7 +944,12 @@ def test_run_query_with_dml_query(mock_bigquery_client, mock_query_job):
 def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job):
     type(mock_query_job).total_bytes_processed = mock.PropertyMock(return_value=12345)
     cost = gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True)
-    _, kwargs = mock_bigquery_client.query.call_args
-    job_config = kwargs["job_config"]
+    # Check which method was called based on BigQuery version
+    if hasattr(mock_bigquery_client, "query_and_wait") and mock_bigquery_client.query_and_wait.called:
+        _, kwargs = mock_bigquery_client.query_and_wait.call_args
+        job_config = kwargs["job_config"]
+    else:
+        _, kwargs = mock_bigquery_client.query.call_args
+        job_config = kwargs["job_config"]
     assert job_config.dry_run is True
     assert cost == 12345 / 1024**3
diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py
index 2437fa02..1ab7e54f 100644
--- a/tests/unit/test_query.py
+++ b/tests/unit/test_query.py
@@ -170,15 +170,19 @@ def test_query_response_bytes(size_in_bytes, formatted_text):
 def test__wait_for_query_job_exits_when_done(mock_bigquery_client):
     connector = _make_connector()
     connector.client = mock_bigquery_client
-    connector.start = datetime.datetime(2020, 1, 1).timestamp()
 
     mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob)
     type(mock_query).state = mock.PropertyMock(side_effect=("RUNNING", "DONE"))
     mock_query.result.side_effect = concurrent.futures.TimeoutError("fake timeout")
 
-    with freezegun.freeze_time("2020-01-01 00:00:00", tick=False):
+    frozen_time = datetime.datetime(2020, 1, 1)
+    with freezegun.freeze_time(frozen_time, tick=False):
+        # Set start time inside frozen context to ensure elapsed time is 0
+        connector.start = frozen_time.timestamp()
+        # Mock get_elapsed_seconds to return 0 to prevent timeout
+        connector.get_elapsed_seconds = mock.Mock(return_value=0.0)
         module_under_test._wait_for_query_job(
-            connector, mock_bigquery_client, mock_query, 60
+            connector, mock_bigquery_client, mock_query, 1000
         )
 
     mock_bigquery_client.cancel_job.assert_not_called()

From e9f4c00941603d750739d78585673ce442ee5f17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Thu, 13 Nov 2025 00:39:17 +0100
Subject: [PATCH 04/10] fix lint

---
 pandas_gbq/gbq_connector.py | 4 +++-
 pandas_gbq/query.py         | 8 ++++----
 tests/system/test_gbq.py    | 1 -
 tests/unit/test_gbq.py      | 5 ++++-
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py
index 2c48f6fa..0d425ecb 100644
--- a/pandas_gbq/gbq_connector.py
+++ b/pandas_gbq/gbq_connector.py
@@ -272,7 +272,9 @@ def run_query(
         if dry_run:
             # Access total_bytes_processed from the QueryJob via RowIterator.job
             # RowIterator has a job attribute that references the QueryJob
-            query_job = rows_iter.job if hasattr(rows_iter, 'job') and rows_iter.job else None
+            query_job = (
+                rows_iter.job if hasattr(rows_iter, "job") and rows_iter.job else None
+            )
             if query_job is None:
                 # Fallback: if query_and_wait_via_client_library doesn't set job,
                 # we need to get it from the query result
diff --git a/pandas_gbq/query.py b/pandas_gbq/query.py
index ba0f1d72..e564e052 100644
--- a/pandas_gbq/query.py
+++ b/pandas_gbq/query.py
@@ -182,7 +182,7 @@ def query_and_wait(
         rows_iter = query_reply.result(max_results=max_results)
         # Store reference to QueryJob in RowIterator for dry_run access
         # RowIterator already has a job attribute, but ensure it's set
-        if not hasattr(rows_iter, 'job') or rows_iter.job is None:
+        if not hasattr(rows_iter, "job") or rows_iter.job is None:
             rows_iter.job = query_reply
         return rows_iter
     except connector.http_error as ex:
@@ -217,10 +217,10 @@ def query_and_wait_via_client_library(
         query_job.result(timeout=timeout_ms / 1000.0 if timeout_ms else None)
         # Get the result iterator and ensure job attribute is set
         rows_iter = query_job.result(max_results=max_results)
-        if not hasattr(rows_iter, 'job') or rows_iter.job is None:
+        if not hasattr(rows_iter, "job") or rows_iter.job is None:
             rows_iter.job = query_job
         return rows_iter
-    
+
     rows_iter = try_query(
         connector,
         functools.partial(
@@ -234,7 +234,7 @@ def query_and_wait_via_client_library(
         ),
     )
     # Ensure job attribute is set for consistency
-    if hasattr(rows_iter, 'job') and rows_iter.job is None:
+    if hasattr(rows_iter, "job") and rows_iter.job is None:
         # If query_and_wait doesn't set job, we need to get it from the query
         # This shouldn't happen, but we ensure it's set for dry_run compatibility
         pass
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 3764cc8b..ac7f0d87 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -669,7 +669,6 @@ def test_read_gbq_with_dry_run(self, project_id):
         assert cost > 0
 
 
-
 class TestToGBQIntegration(object):
     @pytest.fixture(autouse=True, scope="function")
     def setup(self, project, credentials, random_dataset_id):
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index e63c364a..37812480 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -945,7 +945,10 @@ def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job):
     type(mock_query_job).total_bytes_processed = mock.PropertyMock(return_value=12345)
     cost = gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True)
     # Check which method was called based on BigQuery version
-    if hasattr(mock_bigquery_client, "query_and_wait") and mock_bigquery_client.query_and_wait.called:
+    if (
+        hasattr(mock_bigquery_client, "query_and_wait")
+        and mock_bigquery_client.query_and_wait.called
+    ):
         _, kwargs = mock_bigquery_client.query_and_wait.call_args
         job_config = kwargs["job_config"]
     else:

From a171ff467dcc6e7568394ee522daef7efe3e1b14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Sun, 16 Nov 2025 11:48:11 +0100
Subject: [PATCH 05/10] Remove unit conversion

---
 pandas_gbq/gbq_connector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py
index 0d425ecb..ab4b2068 100644
--- a/pandas_gbq/gbq_connector.py
+++ b/pandas_gbq/gbq_connector.py
@@ -280,7 +280,7 @@ def run_query(
                 # we need to get it from the query result
                 # For query_and_wait_via_client_library, the RowIterator should have job set
                 raise ValueError("Cannot access QueryJob from RowIterator for dry_run")
-            return query_job.total_bytes_processed / 1024**3
+            return query_job.total_bytes_processed
 
         return self._download_results(
             rows_iter,

From 8207a4700ebe30d76e54e0572e46400d4c51ebc1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Tue, 18 Nov 2025 08:05:46 +0100
Subject: [PATCH 06/10] fix docs

---
 pandas_gbq/gbq.py      | 2 +-
 tests/unit/test_gbq.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 75fa3510..abb5ad76 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -276,7 +276,7 @@ def read_gbq(
     -------
     df: DataFrame or float
         DataFrame representing results of query. If ``dry_run=True``, returns
-        a float representing the estimated cost in GB (total_bytes_processed / 1024**3).
+        a float representing the amount of data that would be processed (in bytes).
     """
     if dialect is None:
         dialect = context.dialect
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 37812480..b6d1a5e6 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -955,4 +955,4 @@ def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job):
         _, kwargs = mock_bigquery_client.query.call_args
         job_config = kwargs["job_config"]
     assert job_config.dry_run is True
-    assert cost == 12345 / 1024**3
+    assert cost >= 0 

From 171a6f5dbbe156232ddcefce6a73f3788c309919 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cantoineeripret=E2=80=9D?=
 <antoine.eripret@gmail.com>
Date: Wed, 26 Nov 2025 13:07:28 +0100
Subject: [PATCH 07/10] modify doc to use int instead of float + remove
 trailing space

---
 pandas_gbq/gbq.py      | 4 ++--
 tests/unit/test_gbq.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 1345404e..b9008c90 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -269,9 +269,9 @@ def read_gbq(
         If True, run a dry run query.
     Returns
     -------
-    df: DataFrame or float
+    df: DataFrame or int
         DataFrame representing results of query. If ``dry_run=True``, returns
-        a float representing the amount of data that would be processed (in bytes).
+        aan integer representing the amount of data that would be processed (in bytes).
     """
     if dialect is None:
         dialect = context.dialect
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 0568b7b6..070eb351 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -956,4 +956,4 @@ def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job):
         _, kwargs = mock_bigquery_client.query.call_args
         job_config = kwargs["job_config"]
     assert job_config.dry_run is True
-    assert cost >= 0 
+    assert cost >= 0

From cc2be7c182774857fd0c00cda2f75a504d04cbbb Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Mon, 29 Dec 2025 20:22:42 +0000
Subject: [PATCH 08/10] enrich dry run result with more stats

---
 pandas_gbq/dry_runs.py      | 70 +++++++++++++++++++++++++++++++
 pandas_gbq/gbq.py           |  6 +--
 pandas_gbq/gbq_connector.py | 17 ++++----
 tests/system/test_gbq.py    |  6 +--
 tests/unit/test_dry_runs.py | 83 +++++++++++++++++++++++++++++++++++++
 tests/unit/test_gbq.py      | 27 ++++++++++--
 6 files changed, 191 insertions(+), 18 deletions(-)
 create mode 100644 pandas_gbq/dry_runs.py
 create mode 100644 tests/unit/test_dry_runs.py

diff --git a/pandas_gbq/dry_runs.py b/pandas_gbq/dry_runs.py
new file mode 100644
index 00000000..7168dd97
--- /dev/null
+++ b/pandas_gbq/dry_runs.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2025 pandas-gbq Authors All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+from __future__ import annotations
+
+import copy
+from typing import Any, List
+
+from google.cloud import bigquery
+import pandas
+
+
+def get_query_stats(
+    query_job: bigquery.QueryJob,
+) -> pandas.Series:
+    """Returns important stats from the query job as a Pandas Series."""
+
+    index: List[Any] = []
+    values: List[Any] = []
+
+    # Add raw BQ schema
+    index.append("bigquerySchema")
+    values.append(query_job.schema)
+
+    job_api_repr = copy.deepcopy(query_job._properties)
+
+    # jobReference might not be populated for "job optional" queries.
+    job_ref = job_api_repr.get("jobReference", {})
+    for key, val in job_ref.items():
+        index.append(key)
+        values.append(val)
+
+    configuration = job_api_repr.get("configuration", {})
+    index.append("jobType")
+    values.append(configuration.get("jobType", None))
+    index.append("dispatchedSql")
+    values.append(configuration.get("query", {}).get("query", None))
+
+    query_config = configuration.get("query", {})
+    for key in ("destinationTable", "useLegacySql"):
+        index.append(key)
+        values.append(query_config.get(key, None))
+
+    statistics = job_api_repr.get("statistics", {})
+    query_stats = statistics.get("query", {})
+    for key in (
+        "referencedTables",
+        "totalBytesProcessed",
+        "cacheHit",
+        "statementType",
+    ):
+        index.append(key)
+        values.append(query_stats.get(key, None))
+
+    creation_time = statistics.get("creationTime", None)
+    index.append("creationTime")
+    values.append(
+        pandas.Timestamp(creation_time, unit="ms", tz="UTC")
+        if creation_time is not None
+        else None
+    )
+
+    result = pandas.Series(values, index=index)
+    if result["totalBytesProcessed"] is None:
+        result["totalBytesProcessed"] = 0
+    else:
+        result["totalBytesProcessed"] = int(result["totalBytesProcessed"])
+
+    return result
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index b9008c90..69aabedb 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -269,9 +269,9 @@ def read_gbq(
         If True, run a dry run query.
     Returns
     -------
-    df: DataFrame or int
+    df: DataFrame or Series
         DataFrame representing results of query. If ``dry_run=True``, returns
-        aan integer representing the amount of data that would be processed (in bytes).
+        a Pandas series that contains job statistics.
     """
     if dialect is None:
         dialect = context.dialect
@@ -328,7 +328,7 @@ def read_gbq(
             dtypes=dtypes,
             dry_run=dry_run,
         )
-        # When dry_run=True, run_query returns a float (cost in GB), not a DataFrame
+        # When dry_run=True, run_query returns a Pandas series
         if dry_run:
             return final_df
     else:
diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py
index a267d9b5..c3b0e6ff 100644
--- a/pandas_gbq/gbq_connector.py
+++ b/pandas_gbq/gbq_connector.py
@@ -16,6 +16,7 @@
 if typing.TYPE_CHECKING:  # pragma: NO COVER
     import pandas
 
+from pandas_gbq import dry_runs
 import pandas_gbq.constants
 from pandas_gbq.contexts import context
 import pandas_gbq.core.read
@@ -249,15 +250,13 @@ def run_query(
         if dry_run:
             # Access total_bytes_processed from the QueryJob via RowIterator.job
             # RowIterator has a job attribute that references the QueryJob
-            query_job = (
-                rows_iter.job if hasattr(rows_iter, "job") and rows_iter.job else None
-            )
-            if query_job is None:
-                # Fallback: if query_and_wait_via_client_library doesn't set job,
-                # we need to get it from the query result
-                # For query_and_wait_via_client_library, the RowIterator should have job set
-                raise ValueError("Cannot access QueryJob from RowIterator for dry_run")
-            return query_job.total_bytes_processed
+            if hasattr(rows_iter, "job") and rows_iter.job:
+                return dry_runs.get_query_stats(rows_iter.job)
+
+            # Fallback: if query_and_wait_via_client_library doesn't set job,
+            # we need to get it from the query result
+            # For query_and_wait_via_client_library, the RowIterator should have job set
+            raise ValueError("Cannot access QueryJob from RowIterator for dry_run")
 
         return self._download_results(
             rows_iter,
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 31cf254f..4cdb3ebe 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -656,15 +656,15 @@ def test_columns_and_col_order_raises_error(self, project_id):
 
     def test_read_gbq_with_dry_run(self, project_id):
         query = "SELECT 1"
-        cost = gbq.read_gbq(
+        result = gbq.read_gbq(
             query,
             project_id=project_id,
             credentials=self.credentials,
             dialect="standard",
             dry_run=True,
         )
-        assert isinstance(cost, float)
-        assert cost > 0
+        assert isinstance(result, pandas.Series)
+        assert result["totalBytesProcessed"] >= 0
 
 
 class TestToGBQIntegration(object):
diff --git a/tests/unit/test_dry_runs.py b/tests/unit/test_dry_runs.py
new file mode 100644
index 00000000..1fe00351
--- /dev/null
+++ b/tests/unit/test_dry_runs.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2025 pandas-gbq Authors All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+from unittest import mock
+
+from google.cloud import bigquery
+import pandas
+import pandas.testing
+
+from pandas_gbq import dry_runs
+
+
+def test_get_query_stats():
+    mock_query_job = mock.create_autospec(bigquery.QueryJob)
+    total_bytes_processed = 15
+    mock_query_job._properties = {
+        "kind": "bigquery#job",
+        "etag": "e-tag",
+        "id": "id",
+        "selfLink": "self-link",
+        "user_email": "user-emial",
+        "configuration": {
+            "query": {
+                "query": "SELECT * FROM `test_table`",
+                "destinationTable": {
+                    "projectId": "project-id",
+                    "datasetId": "dataset-id",
+                    "tableId": "table-id",
+                },
+                "writeDisposition": "WRITE_TRUNCATE",
+                "priority": "INTERACTIVE",
+                "useLegacySql": False,
+            },
+            "jobType": "QUERY",
+        },
+        "jobReference": {
+            "projectId": "project-id",
+            "jobId": "job-id",
+            "location": "US",
+        },
+        "statistics": {
+            "creationTime": 1767037135155.0,
+            "startTime": 1767037135238.0,
+            "endTime": 1767037135353.0,
+            "totalBytesProcessed": f"{total_bytes_processed}",
+            "query": {
+                "totalBytesProcessed": f"{total_bytes_processed}",
+                "totalBytesBilled": "0",
+                "cacheHit": True,
+                "statementType": "SELECT",
+            },
+            "reservation_id": "reservation_id",
+            "edition": "ENTERPRISE",
+            "reservationGroupPath": [""],
+        },
+        "status": {"state": "DONE"},
+        "principal_subject": "principal_subject",
+        "jobCreationReason": {"code": "REQUESTED"},
+    }
+    expected_index = pandas.Index(
+        [
+            "bigquerySchema",
+            "projectId",
+            "jobId",
+            "location",
+            "jobType",
+            "dispatchedSql",
+            "destinationTable",
+            "useLegacySql",
+            "referencedTables",
+            "totalBytesProcessed",
+            "cacheHit",
+            "statementType",
+            "creationTime",
+        ]
+    )
+
+    result = dry_runs.get_query_stats(mock_query_job)
+
+    assert isinstance(result, pandas.Series)
+    pandas.testing.assert_index_equal(expected_index, result.index)
+    assert result["totalBytesProcessed"] == total_bytes_processed
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 070eb351..6af42c47 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -943,8 +943,29 @@ def test_run_query_with_dml_query(mock_bigquery_client, mock_query_job):
 
 
 def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job):
-    type(mock_query_job).total_bytes_processed = mock.PropertyMock(return_value=12345)
-    cost = gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True)
+    total_bytes_processed = 15
+    type(mock_query_job)._properties = mock.PropertyMock(
+        return_value={
+            "statistics": {
+                "creationTime": 1767037135155.0,
+                "startTime": 1767037135238.0,
+                "endTime": 1767037135353.0,
+                "totalBytesProcessed": f"{total_bytes_processed}",
+                "query": {
+                    "totalBytesProcessed": f"{total_bytes_processed}",
+                    "totalBytesBilled": "0",
+                    "cacheHit": True,
+                    "statementType": "SELECT",
+                },
+                "reservation_id": "reservation_id",
+                "edition": "ENTERPRISE",
+                "reservationGroupPath": [""],
+            },
+        }
+    )
+
+    dry_run_result = gbq.read_gbq("SELECT 1", project_id="my-project", dry_run=True)
+
     # Check which method was called based on BigQuery version
     if (
         hasattr(mock_bigquery_client, "query_and_wait")
@@ -956,4 +977,4 @@ def test_read_gbq_with_dry_run(mock_bigquery_client, mock_query_job):
         _, kwargs = mock_bigquery_client.query.call_args
         job_config = kwargs["job_config"]
     assert job_config.dry_run is True
-    assert cost >= 0
+    assert dry_run_result["totalBytesProcessed"] == total_bytes_processed

From bdffe9283348324fad6f8559a2e3928a47554ea0 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Mon, 29 Dec 2025 22:48:18 +0000
Subject: [PATCH 09/10] increase test coverage

---
 tests/unit/test_dry_runs.py | 68 +++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/tests/unit/test_dry_runs.py b/tests/unit/test_dry_runs.py
index 1fe00351..8d72066e 100644
--- a/tests/unit/test_dry_runs.py
+++ b/tests/unit/test_dry_runs.py
@@ -81,3 +81,71 @@ def test_get_query_stats():
     assert isinstance(result, pandas.Series)
     pandas.testing.assert_index_equal(expected_index, result.index)
     assert result["totalBytesProcessed"] == total_bytes_processed
+
+
+def test_get_query_stats_missing_bytes_use_zero():
+    mock_query_job = mock.create_autospec(bigquery.QueryJob)
+    mock_query_job._properties = {
+        "kind": "bigquery#job",
+        "etag": "e-tag",
+        "id": "id",
+        "selfLink": "self-link",
+        "user_email": "user-emial",
+        "configuration": {
+            "query": {
+                "query": "SELECT * FROM `test_table`",
+                "destinationTable": {
+                    "projectId": "project-id",
+                    "datasetId": "dataset-id",
+                    "tableId": "table-id",
+                },
+                "writeDisposition": "WRITE_TRUNCATE",
+                "priority": "INTERACTIVE",
+                "useLegacySql": False,
+            },
+            "jobType": "QUERY",
+        },
+        "jobReference": {
+            "projectId": "project-id",
+            "jobId": "job-id",
+            "location": "US",
+        },
+        "statistics": {
+            "creationTime": 1767037135155.0,
+            "startTime": 1767037135238.0,
+            "endTime": 1767037135353.0,
+            "query": {
+                "cacheHit": True,
+                "statementType": "SELECT",
+            },
+            "reservation_id": "reservation_id",
+            "edition": "ENTERPRISE",
+            "reservationGroupPath": [""],
+        },
+        "status": {"state": "DONE"},
+        "principal_subject": "principal_subject",
+        "jobCreationReason": {"code": "REQUESTED"},
+    }
+    expected_index = pandas.Index(
+        [
+            "bigquerySchema",
+            "projectId",
+            "jobId",
+            "location",
+            "jobType",
+            "dispatchedSql",
+            "destinationTable",
+            "useLegacySql",
+            "referencedTables",
+            "totalBytesProcessed",
+            "cacheHit",
+            "statementType",
+            "creationTime",
+        ]
+    )
+
+    result = dry_runs.get_query_stats(mock_query_job)
+
+    assert isinstance(result, pandas.Series)
+    pandas.testing.assert_index_equal(expected_index, result.index)
+    assert result["totalBytesProcessed"] == 0

From 9b09fdbadd460b6524c26feec61d5991b65d2e9b Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Tue, 30 Dec 2025 02:09:40 +0000
Subject: [PATCH 10/10] simplify logic

---
 pandas_gbq/gbq_connector.py | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/pandas_gbq/gbq_connector.py b/pandas_gbq/gbq_connector.py
index c3b0e6ff..dec1a00c 100644
--- a/pandas_gbq/gbq_connector.py
+++ b/pandas_gbq/gbq_connector.py
@@ -245,24 +245,14 @@ def run_query(
                 timeout_ms=timeout_ms,
             )
 
-        dtypes = kwargs.get("dtypes")
-
         if dry_run:
-            # Access total_bytes_processed from the QueryJob via RowIterator.job
-            # RowIterator has a job attribute that references the QueryJob
-            if hasattr(rows_iter, "job") and rows_iter.job:
-                return dry_runs.get_query_stats(rows_iter.job)
-
-            # Fallback: if query_and_wait_via_client_library doesn't set job,
-            # we need to get it from the query result
-            # For query_and_wait_via_client_library, the RowIterator should have job set
-            raise ValueError("Cannot access QueryJob from RowIterator for dry_run")
+            return dry_runs.get_query_stats(rows_iter.job)
 
         return self._download_results(
             rows_iter,
             max_results=max_results,
             progress_bar_type=progress_bar_type,
-            user_dtypes=dtypes,
+            user_dtypes=kwargs.get("dtypes"),
         )
 
     def _download_results(