Skip to content

Commit bc2260d

Browse files
cleop-googlecopybara-github
authored andcommitted
fix: GenAI SDK client(multimodal) - Preserve existing metadata when creating from bigframes
PiperOrigin-RevId: 906994126
1 parent b2323ef commit bc2260d

2 files changed

Lines changed: 97 additions & 24 deletions

File tree

tests/unit/vertexai/genai/replays/test_create_multimodal_datasets.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,50 @@ def test_create_dataset_from_bigframes(client, is_replay_mode):
210210
)
211211

212212

213+
@pytest.mark.skipif(
214+
sys.version_info < (3, 10),
215+
reason="bigframes requires python 3.10 or higher",
216+
)
217+
@pytest.mark.usefixtures("mock_bigquery_client", "mock_import_bigframes")
218+
def test_create_dataset_from_bigframes_preserves_other_metadata(client, is_replay_mode):
219+
import bigframes.pandas
220+
221+
dataframe = pd.DataFrame(
222+
{
223+
"col1": ["col1"],
224+
"col2": ["col2"],
225+
}
226+
)
227+
if is_replay_mode:
228+
bf_dataframe = mock.MagicMock()
229+
bf_dataframe.to_gbq.return_value = "temp_table_id"
230+
else:
231+
bf_dataframe = bigframes.pandas.DataFrame(dataframe)
232+
233+
dataset = client.datasets.create_from_bigframes(
234+
dataframe=bf_dataframe,
235+
target_table_id=BIGQUERY_TABLE_NAME,
236+
multimodal_dataset={
237+
"display_name": "test-from-bigframes",
238+
"metadata": {
239+
"gemini_request_read_config": {
240+
"assembled_request_column_name": "test_column"
241+
}
242+
},
243+
},
244+
)
245+
246+
assert isinstance(dataset, types.MultimodalDataset)
247+
assert dataset.display_name == "test-from-bigframes"
248+
assert (
249+
dataset.metadata.gemini_request_read_config.assembled_request_column_name
250+
== "test_column"
251+
)
252+
assert dataset.metadata.input_config.bigquery_source.uri == (
253+
f"bq://{BIGQUERY_TABLE_NAME}"
254+
)
255+
256+
213257
pytestmark = pytest_helper.setup(
214258
file=__file__,
215259
globals_for_file=globals(),
@@ -371,3 +415,50 @@ async def test_create_dataset_from_bigframes_async(client, is_replay_mode):
371415
pd.testing.assert_frame_equal(
372416
rows.to_dataframe(), dataframe, check_index_type=False
373417
)
418+
419+
420+
@pytest.mark.skipif(
421+
sys.version_info < (3, 10),
422+
reason="bigframes requires python 3.10 or higher",
423+
)
424+
@pytest.mark.asyncio
425+
@pytest.mark.usefixtures("mock_bigquery_client", "mock_import_bigframes")
426+
async def test_create_dataset_from_bigframes_preserves_other_metadata_async(
427+
client, is_replay_mode
428+
):
429+
import bigframes.pandas
430+
431+
dataframe = pd.DataFrame(
432+
{
433+
"col1": ["col1"],
434+
"col2": ["col2"],
435+
}
436+
)
437+
if is_replay_mode:
438+
bf_dataframe = mock.MagicMock()
439+
bf_dataframe.to_gbq.return_value = "temp_table_id"
440+
else:
441+
bf_dataframe = bigframes.pandas.DataFrame(dataframe)
442+
443+
dataset = await client.aio.datasets.create_from_bigframes(
444+
dataframe=bf_dataframe,
445+
target_table_id=BIGQUERY_TABLE_NAME,
446+
multimodal_dataset={
447+
"display_name": "test-from-bigframes",
448+
"metadata": {
449+
"gemini_request_read_config": {
450+
"assembled_request_column_name": "test_column"
451+
}
452+
},
453+
},
454+
)
455+
456+
assert isinstance(dataset, types.MultimodalDataset)
457+
assert dataset.display_name == "test-from-bigframes"
458+
assert (
459+
dataset.metadata.gemini_request_read_config.assembled_request_column_name
460+
== "test_column"
461+
)
462+
assert dataset.metadata.input_config.bigquery_source.uri == (
463+
f"bq://{BIGQUERY_TABLE_NAME}"
464+
)

vertexai/_genai/datasets.py

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,19 +1083,10 @@ def create_from_bigframes(
10831083
client,
10841084
)
10851085

1086+
multimodal_dataset = multimodal_dataset.model_copy(deep=True)
1087+
multimodal_dataset.set_bigquery_uri(f"bq://{target_table_id}")
10861088
return self.create_from_bigquery(
1087-
multimodal_dataset=multimodal_dataset.model_copy(
1088-
update={
1089-
"metadata": types.SchemaTablesDatasetMetadata(
1090-
input_config=types.SchemaTablesDatasetMetadataInputConfig(
1091-
bigquery_source=types.SchemaTablesDatasetMetadataBigQuerySource(
1092-
uri=f"bq://{target_table_id}"
1093-
)
1094-
)
1095-
)
1096-
}
1097-
),
1098-
config=config,
1089+
multimodal_dataset=multimodal_dataset, config=config
10991090
)
11001091

11011092
def update_multimodal_dataset(
@@ -2357,19 +2348,10 @@ async def create_from_bigframes(
23572348
client,
23582349
)
23592350

2351+
multimodal_dataset = multimodal_dataset.model_copy(deep=True)
2352+
multimodal_dataset.set_bigquery_uri(f"bq://{target_table_id}")
23602353
return await self.create_from_bigquery(
2361-
multimodal_dataset=multimodal_dataset.model_copy(
2362-
update={
2363-
"metadata": types.SchemaTablesDatasetMetadata(
2364-
input_config=types.SchemaTablesDatasetMetadataInputConfig(
2365-
bigquery_source=types.SchemaTablesDatasetMetadataBigQuerySource(
2366-
uri=f"bq://{target_table_id}"
2367-
)
2368-
)
2369-
)
2370-
}
2371-
),
2372-
config=config,
2354+
multimodal_dataset=multimodal_dataset, config=config
23732355
)
23742356

23752357
async def update_multimodal_dataset(

0 commit comments

Comments
 (0)