From ff5d5cb45695a3b706b9e33267a41a9a8932add7 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Mon, 15 Dec 2025 14:31:31 +0000
Subject: [PATCH 1/3] Fix boolean round-trip test and CSV datetime loading

- Fix `test_dataframe_round_trip_with_table_schema` failure by expecting `pd.NA` for boolean columns loaded as object, aligning with BigQuery Storage API behavior.
- Fix CSV loading failure for extreme datetimes (e.g., year 0001) by introducing `cast_dataframe_for_csv`. This helper forces `isoformat()` string conversion for DATETIME/TIMESTAMP columns, ensuring 4-digit years (e.g., `0001-01-01` instead of `1-01-01`) which prevents BigQuery BadRequest errors.
---
 pandas_gbq/load/__init__.py |  2 ++
 pandas_gbq/load/core.py     | 35 +++++++++++++++++++++++++++++++++++
 tests/system/test_to_gbq.py | 25 +++++++++++++++++++++++++
 3 files changed, 62 insertions(+)

diff --git a/pandas_gbq/load/__init__.py b/pandas_gbq/load/__init__.py
index 250d6517..2fa2f24c 100644
--- a/pandas_gbq/load/__init__.py
+++ b/pandas_gbq/load/__init__.py
@@ -3,6 +3,7 @@
 # license that can be found in the LICENSE file.
 
 from pandas_gbq.load.core import (
+    cast_dataframe_for_csv,
     cast_dataframe_for_parquet,
     encode_chunk,
     load_chunks,
@@ -13,6 +14,7 @@
 )
 
 __all__ = [
+    "cast_dataframe_for_csv",
     "cast_dataframe_for_parquet",
     "encode_chunk",
     "load_chunks",
diff --git a/pandas_gbq/load/core.py b/pandas_gbq/load/core.py
index d98f8306..f230794c 100644
--- a/pandas_gbq/load/core.py
+++ b/pandas_gbq/load/core.py
@@ -124,6 +124,38 @@ def convert(x):
     return dataframe
 
 
+def cast_dataframe_for_csv(
+    dataframe: pandas.DataFrame,
+    schema: Optional[Dict[str, Any]],
+) -> pandas.DataFrame:
+    """Cast columns to needed dtype when writing CSV files."""
+
+    columns = schema.get("fields", [])
+
+    # Protect against an explicit None in the dictionary.
+    columns = columns if columns is not None else []
+
+    for column in columns:
+        # Schema can be a superset of the columns in the dataframe, so ignore
+        # columns that aren't present.
+        column_name = column.get("name")
+        if column_name not in dataframe.columns:
+            continue
+
+        column_type = column.get("type", "").upper()
+        if column_type in {"DATETIME", "TIMESTAMP"}:
+            # Use isoformat to ensure that the years are 4 digits.
+            # https://github.com/googleapis/python-bigquery-pandas/issues/365
+            def convert(x):
+                if pandas.isna(x):
+                    return None
+                return x.isoformat(sep=" ")
+
+            cast_column = dataframe[column_name].map(convert)
+            dataframe = dataframe.assign(**{column_name: cast_column})
+    return dataframe
+
+
 def load_parquet(
     client: bigquery.Client,
     dataframe: pandas.DataFrame,
@@ -195,6 +227,9 @@ def load_csv_from_dataframe(
         bq_schema = pandas_gbq.schema.to_google_cloud_bigquery(schema)
 
     def load_chunk(chunk, job_config):
+        if schema is not None:
+            chunk = cast_dataframe_for_csv(chunk, schema)
+
         client.load_table_from_dataframe(
             chunk,
             destination_table_ref,
diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py
index ad7c58ec..f202389d 100644
--- a/tests/system/test_to_gbq.py
+++ b/tests/system/test_to_gbq.py
@@ -160,6 +160,31 @@ def test_series_round_trip(
                     ),
                 }
             ),
+            expected_df=pandas.DataFrame(
+                {
+                    "row_num": [0, 1, 2],
+                    "bool_col": pandas.Series(
+                        [True, False, True],
+                        dtype="bool",
+                    ),
+                    "boolean_col": pandas.Series(
+                        [None, True, False],
+                        dtype="boolean",
+                    ),
+                    "object_col": pandas.Series(
+                        [
+                            False,
+                            (
+                                pandas.NA
+                                if hasattr(pandas, "NA")
+                                else None
+                            ),
+                            True,
+                        ],
+                        dtype="object",
+                    ),
+                }
+            ),
             table_schema=[
                 {"name": "bool_col", "type": "BOOLEAN"},
                 {"name": "boolean_col", "type": "BOOLEAN"},

From 57046afab48f5a3ad810529006bf6e8f2c194d83 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Mon, 15 Dec 2025 16:02:48 +0000
Subject: [PATCH 2/3] Fix boolean round-trip test and CSV datetime loading (v2)

- Fix `test_dataframe_round_trip_with_table_schema` failure by expecting `pd.NA` for boolean columns loaded as object, aligning with BigQuery Storage API behavior.
- Fix CSV loading failure for extreme datetimes (e.g., year 0001) by introducing `cast_dataframe_for_csv`. This helper forces `isoformat()` string conversion for DATETIME/TIMESTAMP columns, ensuring 4-digit years (e.g., `0001-01-01` instead of `1-01-01`).
- `cast_dataframe_for_csv` is robust against non-datetime inputs (falls back to original value) and efficient (batch assigns new columns).
- Code formatting applied with `black`.
---
 pandas_gbq/load/core.py     | 14 +++++++++++---
 tests/system/test_to_gbq.py |  6 +-----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/pandas_gbq/load/core.py b/pandas_gbq/load/core.py
index f230794c..1c3d4724 100644
--- a/pandas_gbq/load/core.py
+++ b/pandas_gbq/load/core.py
@@ -73,6 +73,7 @@ def cast_dataframe_for_parquet(
     # Protect against an explicit None in the dictionary.
     columns = columns if columns is not None else []
 
+    new_columns = {}
     for column in columns:
         # Schema can be a superset of the columns in the dataframe, so ignore
         # columns that aren't present.
@@ -135,6 +136,7 @@ def cast_dataframe_for_csv(
     # Protect against an explicit None in the dictionary.
     columns = columns if columns is not None else []
 
+    new_columns = {}
     for column in columns:
         # Schema can be a superset of the columns in the dataframe, so ignore
         # columns that aren't present.
@@ -149,10 +151,16 @@ def cast_dataframe_for_csv(
             def convert(x):
                 if pandas.isna(x):
                     return None
-                return x.isoformat(sep=" ")
+                try:
+                    return x.isoformat(sep=" ")
+                except AttributeError:
+                    # It might be a string already or some other type.
+                    return x
 
-            cast_column = dataframe[column_name].map(convert)
-            dataframe = dataframe.assign(**{column_name: cast_column})
+            new_columns[column_name] = dataframe[column_name].map(convert)
+
+    if new_columns:
+        dataframe = dataframe.assign(**new_columns)
     return dataframe
 
 
diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py
index f202389d..a398b9ad 100644
--- a/tests/system/test_to_gbq.py
+++ b/tests/system/test_to_gbq.py
@@ -174,11 +174,7 @@ def test_series_round_trip(
                     "object_col": pandas.Series(
                         [
                             False,
-                            (
-                                pandas.NA
-                                if hasattr(pandas, "NA")
-                                else None
-                            ),
+                            (pandas.NA if hasattr(pandas, "NA") else None),
                             True,
                         ],
                         dtype="object",

From 04bc21cbacc163182f0ba2f9ba560545d34e12ad Mon Sep 17 00:00:00 2001
From: chalmer lowe <chalmerlowe@google.com>
Date: Mon, 15 Dec 2025 12:11:38 -0500
Subject: [PATCH 3/3] Removes un-used variable to resolve linting

---
 pandas_gbq/load/core.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas_gbq/load/core.py b/pandas_gbq/load/core.py
index 1c3d4724..553e56f4 100644
--- a/pandas_gbq/load/core.py
+++ b/pandas_gbq/load/core.py
@@ -73,7 +73,6 @@ def cast_dataframe_for_parquet(
     # Protect against an explicit None in the dictionary.
     columns = columns if columns is not None else []
 
-    new_columns = {}
     for column in columns:
         # Schema can be a superset of the columns in the dataframe, so ignore
         # columns that aren't present.