Skip to content

Commit c73abe7

Browse files
committed
fix tests related to blob api deprecation
1 parent 8af2532 commit c73abe7

File tree

5 files changed

+118
-45
lines changed

5 files changed

+118
-45
lines changed

packages/bigframes/tests/system/small/bigquery/test_ai.py

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,31 @@
2222
import bigframes.pandas as bpd
2323
from bigframes import dataframe, dtypes, series
2424
from bigframes.testing import utils as test_utils
25+
import uuid
26+
import google.cloud.bigquery
27+
28+
29+
def _create_mock_obj_ref_df(session, uris, name="image"):
30+
df = bpd.DataFrame({name: uris}, session=session)
31+
table_id = f"bigframes-dev.bigframes_tests_sys.tmp_obj_ref_{uuid.uuid4().hex}"
32+
df.to_gbq(table_id, if_exists="replace")
33+
34+
client = session.bqclient
35+
table = client.get_table(table_id)
36+
schema = list(table.schema)
37+
for i, field in enumerate(schema):
38+
if field.name == name:
39+
schema[i] = google.cloud.bigquery.SchemaField(
40+
name=field.name,
41+
field_type=field.field_type,
42+
mode=field.mode,
43+
description="bigframes_dtype: OBJ_REF_DTYPE",
44+
)
45+
break
46+
table.schema = schema
47+
client.update_table(table, ["schema"])
48+
49+
return session.read_gbq(table_id)
2550

2651

2752
def test_ai_function_pandas_input(session):
@@ -159,8 +184,8 @@ def test_ai_generate_bool(session):
159184

160185

161186
def test_ai_generate_bool_multi_model(session):
162-
df = session.from_glob_path(
163-
"gs://bigframes-dev-testing/a_multimodel/images/*", name="image"
187+
df = _create_mock_obj_ref_df(
188+
session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image"
164189
)
165190

166191
result = bbq.ai.generate_bool((df["image"], " contains an animal"))
@@ -196,8 +221,8 @@ def test_ai_generate_int(session):
196221

197222

198223
def test_ai_generate_int_multi_model(session):
199-
df = session.from_glob_path(
200-
"gs://bigframes-dev-testing/a_multimodel/images/*", name="image"
224+
df = _create_mock_obj_ref_df(
225+
session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image"
201226
)
202227

203228
result = bbq.ai.generate_int(
@@ -235,8 +260,8 @@ def test_ai_generate_double(session):
235260

236261

237262
def test_ai_generate_double_multi_model(session):
238-
df = session.from_glob_path(
239-
"gs://bigframes-dev-testing/a_multimodel/images/*", name="image"
263+
df = _create_mock_obj_ref_df(
264+
session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image"
240265
)
241266

242267
result = bbq.ai.generate_double(
@@ -267,10 +292,8 @@ def test_ai_if(session):
267292

268293

269294
def test_ai_if_multi_model(session, bq_connection):
270-
df = session.from_glob_path(
271-
"gs://bigframes-dev-testing/a_multimodel/images/*",
272-
name="image",
273-
connection=bq_connection,
295+
df = _create_mock_obj_ref_df(
296+
session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image"
274297
)
275298

276299
result = bbq.ai.if_((df["image"], " contains an animal"))
@@ -289,10 +312,8 @@ def test_ai_classify(session):
289312

290313

291314
def test_ai_classify_multi_model(session, bq_connection):
292-
df = session.from_glob_path(
293-
"gs://bigframes-dev-testing/a_multimodel/images/*",
294-
name="image",
295-
connection=bq_connection,
315+
df = _create_mock_obj_ref_df(
316+
session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image"
296317
)
297318

298319
result = bbq.ai.classify(df["image"], ["photo", "cartoon"])
@@ -312,8 +333,8 @@ def test_ai_score(session):
312333

313334

314335
def test_ai_score_multi_model(session):
315-
df = session.from_glob_path(
316-
"gs://bigframes-dev-testing/a_multimodel/images/*", name="image"
336+
df = _create_mock_obj_ref_df(
337+
session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image"
317338
)
318339
prompt = ("Rank the liveliness of ", df["image"], "on the scale from 1 to 3")
319340

packages/bigframes/tests/system/small/pandas/test_describe.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -358,15 +358,36 @@ def test_series_groupby_describe(scalars_dfs):
358358

359359
def test_describe_json_and_obj_ref_returns_count(session):
360360
# Test describe() works on JSON and OBJ_REF types (without nunique, which fails)
361+
import uuid
362+
import google.cloud.bigquery
363+
361364
sql = """
362365
SELECT
363366
PARSE_JSON('{"a": 1}') AS json_col,
364367
'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col
365368
"""
366-
df = session.read_gbq(sql)
369+
df_init = session.read_gbq(sql)
370+
371+
table_id = f"bigframes-dev.bigframes_tests_sys.tmp_obj_ref_{uuid.uuid4().hex}"
372+
df_init.to_gbq(table_id, if_exists="replace")
373+
374+
client = session.bqclient
375+
table = client.get_table(table_id)
376+
schema = list(table.schema)
377+
for i, field in enumerate(schema):
378+
if field.name == "uri_col":
379+
schema[i] = google.cloud.bigquery.SchemaField(
380+
name=field.name,
381+
field_type=field.field_type,
382+
mode=field.mode,
383+
description="bigframes_dtype: OBJ_REF_DTYPE",
384+
)
385+
break
386+
table.schema = schema
387+
client.update_table(table, ["schema"])
367388

368-
df["obj_ref_col"] = df["uri_col"].str.to_blob()
369-
df = df.drop(columns=["uri_col"])
389+
df = session.read_gbq(table_id)
390+
df = df.rename(columns={"uri_col": "obj_ref_col"})
370391

371392
res = df.describe(include="all").to_pandas()
372393

packages/bigframes/tests/system/small/test_dataframe.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5919,9 +5919,34 @@ def test_to_gbq_table_labels(scalars_df_index):
59195919

59205920
def test_to_gbq_obj_ref_persists(session):
59215921
# Test that saving and loading an Object Reference retains its dtype
5922-
bdf = session.from_glob_path(
5923-
"gs://cloud-samples-data/vision/ocr/*.jpg", name="uris"
5924-
).head(1)
5922+
import uuid
5923+
import google.cloud.bigquery
5924+
5925+
sql = """
5926+
SELECT STRUCT('gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri, CAST(NULL AS STRING) AS version, CAST(NULL AS STRING) AS authorizer, PARSE_JSON('{}') AS details) AS uris
5927+
"""
5928+
df_init = session.read_gbq(sql)
5929+
5930+
tmp_table_id = f"bigframes-dev.bigframes_tests_sys.tmp_obj_ref_{uuid.uuid4().hex}"
5931+
df_init.to_gbq(tmp_table_id, if_exists="replace")
5932+
5933+
client = session.bqclient
5934+
table = client.get_table(tmp_table_id)
5935+
schema = list(table.schema)
5936+
for i, field in enumerate(schema):
5937+
if field.name == "uris":
5938+
schema[i] = google.cloud.bigquery.SchemaField(
5939+
name=field.name,
5940+
field_type=field.field_type,
5941+
mode=field.mode,
5942+
description="bigframes_dtype: OBJ_REF_DTYPE",
5943+
fields=field.fields,
5944+
)
5945+
break
5946+
table.schema = schema
5947+
client.update_table(table, ["schema"])
5948+
5949+
bdf = session.read_gbq(tmp_table_id)
59255950

59265951
destination_table = "bigframes-dev.bigframes_tests_sys.test_obj_ref_persistence"
59275952
bdf.to_gbq(destination_table, if_exists="replace")

packages/bigframes/tests/system/small/test_dataframe_io.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,16 +1010,38 @@ def test_to_gbq_timedelta_tag_ignored_when_appending(bigquery_client, dataset_id
10101010

10111011

10121012
def test_to_gbq_obj_ref(session, dataset_id: str, bigquery_client):
1013+
import uuid
1014+
import google.cloud.bigquery
1015+
10131016
destination_table = f"{dataset_id}.test_to_gbq_obj_ref"
10141017
sql = """
1015-
SELECT
1016-
'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col
1018+
SELECT STRUCT('gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri, CAST(NULL AS STRING) AS version, CAST(NULL AS STRING) AS authorizer, PARSE_JSON('{}') AS details) AS uri_col
10171019
"""
1018-
df = session.read_gbq(sql)
1019-
df["obj_ref_col"] = df["uri_col"].str.to_blob()
1020-
df = df.drop(columns=["uri_col"])
1021-
1022-
df.to_gbq(destination_table)
1020+
df_init = session.read_gbq(sql)
1021+
1022+
tmp_table_id = f"{dataset_id}.tmp_obj_ref_{uuid.uuid4().hex}"
1023+
df_init.to_gbq(tmp_table_id, if_exists="replace")
1024+
1025+
client = session.bqclient
1026+
table = client.get_table(tmp_table_id)
1027+
schema = list(table.schema)
1028+
for i, field in enumerate(schema):
1029+
if field.name == "uri_col":
1030+
schema[i] = google.cloud.bigquery.SchemaField(
1031+
name=field.name,
1032+
field_type=field.field_type,
1033+
mode=field.mode,
1034+
description="bigframes_dtype: OBJ_REF_DTYPE",
1035+
fields=field.fields,
1036+
)
1037+
break
1038+
table.schema = schema
1039+
client.update_table(table, ["schema"])
1040+
1041+
df = session.read_gbq(tmp_table_id)
1042+
df = df.rename(columns={"uri_col": "obj_ref_col"})
1043+
1044+
df.to_gbq(destination_table, if_exists="replace")
10231045

10241046
table = bigquery_client.get_table(destination_table)
10251047
obj_ref_field = next(f for f in table.schema if f.name == "obj_ref_col")

packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,3 @@ def test_blob_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snaps
3838
[col_name],
3939
)
4040
snapshot.assert_match(sql, "out.sql")
41-
42-
43-
def test_blob_make_ref(scalar_types_df: bpd.DataFrame, snapshot):
44-
ref_s = obj.make_ref(scalar_types_df["string_col"], authorizer="my-connection")
45-
snapshot.assert_match(ref_s.to_frame().sql, "out.sql")
46-
47-
48-
def test_blob_make_ref_json(scalar_types_df: bpd.DataFrame, snapshot):
49-
col_name = "string_col"
50-
bf_df = scalar_types_df[[col_name]]
51-
sql = utils._apply_ops_to_sql(
52-
bf_df,
53-
[ops.obj_make_ref_json_op.as_expr(col_name)],
54-
[col_name],
55-
)
56-
snapshot.assert_match(sql, "out.sql")

0 commit comments

Comments
 (0)