Fix boolean round-trip test and CSV datetime loading

google-labs-jules[bot] · google-labs-jules[bot] · commit ff5d5cb45695 · 2025-12-15T14:31:31.000Z
- Fix `test_dataframe_round_trip_with_table_schema` failure by expecting `pd.NA` for boolean columns loaded as object, aligning with BigQuery Storage API behavior.
- Fix CSV loading failure for extreme datetimes (e.g., year 0001) by introducing `cast_dataframe_for_csv`. This helper forces `isoformat()` string conversion for DATETIME/TIMESTAMP columns, ensuring 4-digit years (e.g., `0001-01-01` instead of `1-01-01`) which prevents BigQuery BadRequest errors.
diff --git a/pandas_gbq/load/__init__.py b/pandas_gbq/load/__init__.py
@@ -3,6 +3,7 @@
 # license that can be found in the LICENSE file.
 
 from pandas_gbq.load.core import (
+    cast_dataframe_for_csv,
     cast_dataframe_for_parquet,
     encode_chunk,
     load_chunks,
@@ -13,6 +14,7 @@
 )
 
 __all__ = [
+    "cast_dataframe_for_csv",
     "cast_dataframe_for_parquet",
     "encode_chunk",
     "load_chunks",
diff --git a/pandas_gbq/load/core.py b/pandas_gbq/load/core.py
@@ -124,6 +124,38 @@ def convert(x):
     return dataframe
 
 
+def cast_dataframe_for_csv(
+    dataframe: pandas.DataFrame,
+    schema: Optional[Dict[str, Any]],
+) -> pandas.DataFrame:
+    """Cast columns to needed dtype when writing CSV files."""
+
+    columns = schema.get("fields", [])
+
+    # Protect against an explicit None in the dictionary.
+    columns = columns if columns is not None else []
+
+    for column in columns:
+        # Schema can be a superset of the columns in the dataframe, so ignore
+        # columns that aren't present.
+        column_name = column.get("name")
+        if column_name not in dataframe.columns:
+            continue
+
+        column_type = column.get("type", "").upper()
+        if column_type in {"DATETIME", "TIMESTAMP"}:
+            # Use isoformat to ensure that the years are 4 digits.
+            # https://github.com/googleapis/python-bigquery-pandas/issues/365
+            def convert(x):
+                if pandas.isna(x):
+                    return None
+                return x.isoformat(sep=" ")
+
+            cast_column = dataframe[column_name].map(convert)
+            dataframe = dataframe.assign(**{column_name: cast_column})
+    return dataframe
+
+
 def load_parquet(
     client: bigquery.Client,
     dataframe: pandas.DataFrame,
@@ -195,6 +227,9 @@ def load_csv_from_dataframe(
         bq_schema = pandas_gbq.schema.to_google_cloud_bigquery(schema)
 
     def load_chunk(chunk, job_config):
+        if schema is not None:
+            chunk = cast_dataframe_for_csv(chunk, schema)
+
         client.load_table_from_dataframe(
             chunk,
             destination_table_ref,
diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py
@@ -160,6 +160,31 @@ def test_series_round_trip(
                     ),
                 }
             ),
+            expected_df=pandas.DataFrame(
+                {
+                    "row_num": [0, 1, 2],
+                    "bool_col": pandas.Series(
+                        [True, False, True],
+                        dtype="bool",
+                    ),
+                    "boolean_col": pandas.Series(
+                        [None, True, False],
+                        dtype="boolean",
+                    ),
+                    "object_col": pandas.Series(
+                        [
+                            False,
+                            (
+                                pandas.NA
+                                if hasattr(pandas, "NA")
+                                else None
+                            ),
+                            True,
+                        ],
+                        dtype="object",
+                    ),
+                }
+            ),
             table_schema=[
                 {"name": "bool_col", "type": "BOOLEAN"},
                 {"name": "boolean_col", "type": "BOOLEAN"},