Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit e00bb8a

Browse files
committed
feat: preserve OBJ_REF_DTYPE table schemas on save to_gbq
1 parent 43353e2 commit e00bb8a

File tree

3 files changed

+58
-0
lines changed

3 files changed

+58
-0
lines changed

bigframes/dataframe.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4301,6 +4301,32 @@ def to_gbq(
43014301
result_table = result.query_job.destination
43024302
assert result_table is not None
43034303

4304+
obj_ref_dest_cols = []
4305+
for col_id in id_overrides.keys():
4306+
try:
4307+
if (
4308+
export_array.get_column_type(col_id)
4309+
== bigframes.dtypes.OBJ_REF_DTYPE
4310+
):
4311+
obj_ref_dest_cols.append(id_overrides[col_id])
4312+
except Exception:
4313+
pass
4314+
4315+
if obj_ref_dest_cols:
4316+
table = self._session.bqclient.get_table(result_table)
4317+
new_schema = []
4318+
for field in table.schema:
4319+
if field.name in obj_ref_dest_cols:
4320+
field_dict = field.to_api_repr()
4321+
field_dict["description"] = "bigframes_dtype: OBJ_REF_DTYPE"
4322+
new_schema.append(
4323+
google.cloud.bigquery.SchemaField.from_api_repr(field_dict)
4324+
)
4325+
else:
4326+
new_schema.append(field)
4327+
table.schema = new_schema
4328+
self._session.bqclient.update_table(table, ["schema"])
4329+
43044330
if temp_table_ref:
43054331
bigframes.session._io.bigquery.set_table_expiration(
43064332
self._session.bqclient,

bigframes/dtypes.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,13 @@ def convert_schema_field(
772772
) -> typing.Tuple[str, Dtype]:
773773
is_repeated = field.mode == "REPEATED"
774774
if field.field_type == "RECORD":
775+
if field.description == "bigframes_dtype: OBJ_REF_DTYPE":
776+
bf_dtype = OBJ_REF_DTYPE # type: ignore
777+
if is_repeated:
778+
pa_type = pa.list_(bigframes_dtype_to_arrow_dtype(bf_dtype))
779+
bf_dtype = pd.ArrowDtype(pa_type)
780+
return field.name, bf_dtype
781+
775782
mapped_fields = map(convert_schema_field, field.fields)
776783
fields = []
777784
for name, dtype in mapped_fields:

tests/system/small/test_dataframe_io.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,31 @@ def test_to_gbq_timedelta_tag_ignored_when_appending(bigquery_client, dataset_id
10021002
assert table.schema[0].description is None
10031003

10041004

1005+
def test_to_gbq_obj_ref(session, dataset_id: str, bigquery_client):
1006+
destination_table = f"{dataset_id}.test_to_gbq_obj_ref"
1007+
sql = """
1008+
SELECT
1009+
'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col
1010+
"""
1011+
df = session.read_gbq(sql)
1012+
df["obj_ref_col"] = df["uri_col"].str.to_blob()
1013+
df = df.drop(columns=["uri_col"])
1014+
1015+
# Save the dataframe to bigquery
1016+
df.to_gbq(destination_table)
1017+
1018+
# Verify the table schema description is added
1019+
table = bigquery_client.get_table(destination_table)
1020+
obj_ref_field = next(f for f in table.schema if f.name == "obj_ref_col")
1021+
assert obj_ref_field.field_type == "RECORD"
1022+
assert obj_ref_field.description == "bigframes_dtype: OBJ_REF_DTYPE"
1023+
1024+
# Verify reloading it correctly restores the dtype
1025+
reloaded_df = session.read_gbq(destination_table)
1026+
assert reloaded_df["obj_ref_col"].dtype == dtypes.OBJ_REF_DTYPE
1027+
assert len(reloaded_df) == 1
1028+
1029+
10051030
@pytest.mark.parametrize(
10061031
("index"),
10071032
[True, False],

0 commit comments

Comments
 (0)