Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.
26 changes: 26 additions & 0 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4301,6 +4301,32 @@ def to_gbq(
result_table = result.query_job.destination
assert result_table is not None

obj_ref_dest_cols = []
for col_id in id_overrides.keys():
try:
if (
export_array.get_column_type(col_id)
== bigframes.dtypes.OBJ_REF_DTYPE
):
obj_ref_dest_cols.append(id_overrides[col_id])
except Exception:
pass

if obj_ref_dest_cols:
table = self._session.bqclient.get_table(result_table)
new_schema = []
for field in table.schema:
if field.name in obj_ref_dest_cols:
field_dict = field.to_api_repr()
field_dict["description"] = "bigframes_dtype: OBJ_REF_DTYPE"
new_schema.append(
google.cloud.bigquery.SchemaField.from_api_repr(field_dict)
)
else:
new_schema.append(field)
table.schema = new_schema
self._session.bqclient.update_table(table, ["schema"])

if temp_table_ref:
bigframes.session._io.bigquery.set_table_expiration(
self._session.bqclient,
Expand Down
7 changes: 7 additions & 0 deletions bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,13 @@ def convert_schema_field(
) -> typing.Tuple[str, Dtype]:
is_repeated = field.mode == "REPEATED"
if field.field_type == "RECORD":
if field.description == "bigframes_dtype: OBJ_REF_DTYPE":
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, we can changes both convert_to_schema_field and convert_schema_field to achieve round-trip persistence without changes in dataframe.py.
Please check how TIMEDELTA_DESCRIPTION_TAG works.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks for the tip! I've updated convert_to_schema_field and convert_schema_field to handle description tags for OBJ_REF_DTYPE and reverted the dataframe.py updates.

bf_dtype = OBJ_REF_DTYPE # type: ignore
if is_repeated:
pa_type = pa.list_(bigframes_dtype_to_arrow_dtype(bf_dtype))
bf_dtype = pd.ArrowDtype(pa_type)
return field.name, bf_dtype

mapped_fields = map(convert_schema_field, field.fields)
fields = []
for name, dtype in mapped_fields:
Expand Down
25 changes: 25 additions & 0 deletions tests/system/small/test_dataframe_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,31 @@ def test_to_gbq_timedelta_tag_ignored_when_appending(bigquery_client, dataset_id
assert table.schema[0].description is None


def test_to_gbq_obj_ref(session, dataset_id: str, bigquery_client):
destination_table = f"{dataset_id}.test_to_gbq_obj_ref"
sql = """
SELECT
'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col
"""
df = session.read_gbq(sql)
df["obj_ref_col"] = df["uri_col"].str.to_blob()
df = df.drop(columns=["uri_col"])

# Save the dataframe to bigquery
df.to_gbq(destination_table)

# Verify the table schema description is added
table = bigquery_client.get_table(destination_table)
obj_ref_field = next(f for f in table.schema if f.name == "obj_ref_col")
assert obj_ref_field.field_type == "RECORD"
assert obj_ref_field.description == "bigframes_dtype: OBJ_REF_DTYPE"

# Verify reloading it correctly restores the dtype
reloaded_df = session.read_gbq(destination_table)
assert reloaded_df["obj_ref_col"].dtype == dtypes.OBJ_REF_DTYPE
assert len(reloaded_df) == 1


@pytest.mark.parametrize(
("index"),
[True, False],
Expand Down
Loading