Skip to content
This repository was archived by the owner on Mar 13, 2026. It is now read-only.

Commit ff5d5cb

Browse files
Fix boolean round-trip test and CSV datetime loading
- Fix `test_dataframe_round_trip_with_table_schema` failure by expecting `pd.NA` for boolean columns loaded as object, aligning with BigQuery Storage API behavior. - Fix CSV loading failure for extreme datetimes (e.g., year 0001) by introducing `cast_dataframe_for_csv`. This helper forces `isoformat()` string conversion for DATETIME/TIMESTAMP columns, ensuring 4-digit years (e.g., `0001-01-01` instead of `1-01-01`) which prevents BigQuery BadRequest errors.
1 parent f1525d9 commit ff5d5cb

3 files changed

Lines changed: 62 additions & 0 deletions

File tree

pandas_gbq/load/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# license that can be found in the LICENSE file.
44

55
from pandas_gbq.load.core import (
6+
cast_dataframe_for_csv,
67
cast_dataframe_for_parquet,
78
encode_chunk,
89
load_chunks,
@@ -13,6 +14,7 @@
1314
)
1415

1516
__all__ = [
17+
"cast_dataframe_for_csv",
1618
"cast_dataframe_for_parquet",
1719
"encode_chunk",
1820
"load_chunks",

pandas_gbq/load/core.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,38 @@ def convert(x):
124124
return dataframe
125125

126126

127+
def cast_dataframe_for_csv(
128+
dataframe: pandas.DataFrame,
129+
schema: Optional[Dict[str, Any]],
130+
) -> pandas.DataFrame:
131+
"""Cast columns to needed dtype when writing CSV files."""
132+
133+
columns = schema.get("fields", [])
134+
135+
# Protect against an explicit None in the dictionary.
136+
columns = columns if columns is not None else []
137+
138+
for column in columns:
139+
# Schema can be a superset of the columns in the dataframe, so ignore
140+
# columns that aren't present.
141+
column_name = column.get("name")
142+
if column_name not in dataframe.columns:
143+
continue
144+
145+
column_type = column.get("type", "").upper()
146+
if column_type in {"DATETIME", "TIMESTAMP"}:
147+
# Use isoformat to ensure that the years are 4 digits.
148+
# https://github.com/googleapis/python-bigquery-pandas/issues/365
149+
def convert(x):
150+
if pandas.isna(x):
151+
return None
152+
return x.isoformat(sep=" ")
153+
154+
cast_column = dataframe[column_name].map(convert)
155+
dataframe = dataframe.assign(**{column_name: cast_column})
156+
return dataframe
157+
158+
127159
def load_parquet(
128160
client: bigquery.Client,
129161
dataframe: pandas.DataFrame,
@@ -195,6 +227,9 @@ def load_csv_from_dataframe(
195227
bq_schema = pandas_gbq.schema.to_google_cloud_bigquery(schema)
196228

197229
def load_chunk(chunk, job_config):
230+
if schema is not None:
231+
chunk = cast_dataframe_for_csv(chunk, schema)
232+
198233
client.load_table_from_dataframe(
199234
chunk,
200235
destination_table_ref,

tests/system/test_to_gbq.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,31 @@ def test_series_round_trip(
160160
),
161161
}
162162
),
163+
expected_df=pandas.DataFrame(
164+
{
165+
"row_num": [0, 1, 2],
166+
"bool_col": pandas.Series(
167+
[True, False, True],
168+
dtype="bool",
169+
),
170+
"boolean_col": pandas.Series(
171+
[None, True, False],
172+
dtype="boolean",
173+
),
174+
"object_col": pandas.Series(
175+
[
176+
False,
177+
(
178+
pandas.NA
179+
if hasattr(pandas, "NA")
180+
else None
181+
),
182+
True,
183+
],
184+
dtype="object",
185+
),
186+
}
187+
),
163188
table_schema=[
164189
{"name": "bool_col", "type": "BOOLEAN"},
165190
{"name": "boolean_col", "type": "BOOLEAN"},

0 commit comments

Comments
 (0)