Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit a1b262f

Browse files
committed
fix: round-trip persistence for OBJ_REF_DTYPE
1 parent 19cb613 commit a1b262f

File tree

4 files changed

+11
-35
lines changed

4 files changed

+11
-35
lines changed

bigframes/dataframe.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4301,32 +4301,6 @@ def to_gbq(
43014301
result_table = result.query_job.destination
43024302
assert result_table is not None
43034303

4304-
obj_ref_dest_cols = []
4305-
for col_id in id_overrides.keys():
4306-
try:
4307-
if (
4308-
export_array.get_column_type(col_id)
4309-
== bigframes.dtypes.OBJ_REF_DTYPE
4310-
):
4311-
obj_ref_dest_cols.append(id_overrides[col_id])
4312-
except Exception:
4313-
pass
4314-
4315-
if obj_ref_dest_cols:
4316-
table = self._session.bqclient.get_table(result_table)
4317-
new_schema = []
4318-
for field in table.schema:
4319-
if field.name in obj_ref_dest_cols:
4320-
field_dict = field.to_api_repr()
4321-
field_dict["description"] = "bigframes_dtype: OBJ_REF_DTYPE"
4322-
new_schema.append(
4323-
google.cloud.bigquery.SchemaField.from_api_repr(field_dict)
4324-
)
4325-
else:
4326-
new_schema.append(field)
4327-
table.schema = new_schema
4328-
self._session.bqclient.update_table(table, ["schema"])
4329-
43304304
if temp_table_ref:
43314305
bigframes.session._io.bigquery.set_table_expiration(
43324306
self._session.bqclient,

bigframes/dtypes.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@ def convert_schema_field(
772772
) -> typing.Tuple[str, Dtype]:
773773
is_repeated = field.mode == "REPEATED"
774774
if field.field_type == "RECORD":
775-
if field.description == "bigframes_dtype: OBJ_REF_DTYPE":
775+
if field.description == OBJ_REF_DESCRIPTION_TAG:
776776
bf_dtype = OBJ_REF_DTYPE # type: ignore
777777
if is_repeated:
778778
pa_type = pa.list_(bigframes_dtype_to_arrow_dtype(bf_dtype))
@@ -834,8 +834,11 @@ def convert_to_schema_field(
834834
convert_to_schema_field(field.name, inner_bf_type, overrides)
835835
)
836836

837+
description = (
838+
OBJ_REF_DESCRIPTION_TAG if bigframes_dtype == OBJ_REF_DTYPE else None
839+
)
837840
return google.cloud.bigquery.SchemaField(
838-
name, "RECORD", fields=inner_fields
841+
name, "RECORD", fields=inner_fields, description=description
839842
)
840843
if bigframes_dtype.pyarrow_dtype == pa.duration("us"):
841844
# Timedeltas are represented as integers in microseconds.
@@ -978,6 +981,7 @@ def lcd_type_or_throw(dtype1: Dtype, dtype2: Dtype) -> Dtype:
978981

979982

980983
TIMEDELTA_DESCRIPTION_TAG = "#microseconds"
984+
OBJ_REF_DESCRIPTION_TAG = "bigframes_dtype: OBJ_REF_DTYPE"
981985

982986

983987
def contains_db_dtypes_json_arrow_type(type_):

bigframes/session/bq_caching_executor.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -334,13 +334,14 @@ def _export_gbq(
334334
session=array_value.session,
335335
)
336336

337-
has_timedelta_col = any(
338-
t == bigframes.dtypes.TIMEDELTA_DTYPE for t in array_value.schema.dtypes
337+
has_special_dtype_col = any(
338+
t in (bigframes.dtypes.TIMEDELTA_DTYPE, bigframes.dtypes.OBJ_REF_DTYPE)
339+
for t in array_value.schema.dtypes
339340
)
340341

341-
if spec.if_exists != "append" and has_timedelta_col:
342+
if spec.if_exists != "append" and has_special_dtype_col:
342343
# Only update schema if this is not modifying an existing table, and the
343-
# new table contains timedelta columns.
344+
# new table contains special columns (like timedelta or obj_ref).
344345
table = self.bqclient.get_table(spec.table)
345346
table.schema = array_value.schema.to_bigquery()
346347
self.bqclient.update_table(table, ["schema"])

tests/system/small/test_dataframe_io.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,16 +1012,13 @@ def test_to_gbq_obj_ref(session, dataset_id: str, bigquery_client):
10121012
df["obj_ref_col"] = df["uri_col"].str.to_blob()
10131013
df = df.drop(columns=["uri_col"])
10141014

1015-
# Save the dataframe to bigquery
10161015
df.to_gbq(destination_table)
10171016

1018-
# Verify the table schema description is added
10191017
table = bigquery_client.get_table(destination_table)
10201018
obj_ref_field = next(f for f in table.schema if f.name == "obj_ref_col")
10211019
assert obj_ref_field.field_type == "RECORD"
10221020
assert obj_ref_field.description == "bigframes_dtype: OBJ_REF_DTYPE"
10231021

1024-
# Verify reloading it correctly restores the dtype
10251022
reloaded_df = session.read_gbq(destination_table)
10261023
assert reloaded_df["obj_ref_col"].dtype == dtypes.OBJ_REF_DTYPE
10271024
assert len(reloaded_df) == 1

0 commit comments

Comments
 (0)