Skip to content

Commit 0bb2ecd

Browse files
cleop-googlecopybara-github
authored andcommitted
feat: GenAI SDK client(multimodal) - Accept an explicit bigquery_uri parameter in create_from_bigquery
PiperOrigin-RevId: 907062100
1 parent bc2260d commit 0bb2ecd

2 files changed

Lines changed: 139 additions & 6 deletions

File tree

tests/unit/vertexai/genai/replays/test_create_multimodal_datasets.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,40 @@ def test_create_dataset_from_bigquery(client):
115115
)
116116

117117

118+
@pytest.mark.usefixtures("mock_generate_multimodal_dataset_display_name")
119+
def test_create_dataset_from_bigquery_with_uri(client):
120+
dataset = client.datasets.create_from_bigquery(
121+
bigquery_uri=f"bq://{BIGQUERY_TABLE_NAME}",
122+
)
123+
assert isinstance(dataset, types.MultimodalDataset)
124+
assert dataset.metadata.input_config.bigquery_source.uri == (
125+
f"bq://{BIGQUERY_TABLE_NAME}"
126+
)
127+
128+
129+
def test_create_dataset_from_bigquery_preserves_other_metadata(client):
130+
dataset = client.datasets.create_from_bigquery(
131+
bigquery_uri=f"bq://{BIGQUERY_TABLE_NAME}",
132+
multimodal_dataset={
133+
"display_name": "test-from-bigquery-uri",
134+
"metadata": {
135+
"gemini_request_read_config": {
136+
"assembled_request_column_name": "test_column"
137+
}
138+
},
139+
},
140+
)
141+
assert isinstance(dataset, types.MultimodalDataset)
142+
assert dataset.display_name == "test-from-bigquery-uri"
143+
assert (
144+
dataset.metadata.gemini_request_read_config.assembled_request_column_name
145+
== "test_column"
146+
)
147+
assert dataset.metadata.input_config.bigquery_source.uri == (
148+
f"bq://{BIGQUERY_TABLE_NAME}"
149+
)
150+
151+
118152
@pytest.mark.usefixtures("mock_generate_multimodal_dataset_display_name")
119153
def test_create_dataset_from_bigquery_no_display_name(client):
120154
dataset = client.datasets.create_from_bigquery(
@@ -130,6 +164,13 @@ def test_create_dataset_from_bigquery_no_display_name(client):
130164
assert dataset.display_name == "test-generated-name"
131165

132166

167+
def test_create_dataset_from_bigquery_raises_if_neither(client):
168+
with pytest.raises(
169+
ValueError, match="At least one of `bigquery_uri` or `multimodal_dataset`"
170+
):
171+
client.datasets.create_from_bigquery()
172+
173+
133174
@pytest.mark.usefixtures("mock_bigquery_client", "mock_import_bigframes")
134175
def test_create_dataset_from_pandas(client, is_replay_mode):
135176
dataframe = pd.DataFrame(
@@ -298,6 +339,44 @@ async def test_create_dataset_from_bigquery_async(client):
298339
)
299340

300341

342+
@pytest.mark.asyncio
343+
@pytest.mark.usefixtures("mock_generate_multimodal_dataset_display_name")
344+
async def test_create_dataset_from_bigquery_with_uri_async(client):
345+
dataset = await client.aio.datasets.create_from_bigquery(
346+
bigquery_uri=f"bq://{BIGQUERY_TABLE_NAME}",
347+
)
348+
assert isinstance(dataset, types.MultimodalDataset)
349+
assert dataset.metadata.input_config.bigquery_source.uri == (
350+
f"bq://{BIGQUERY_TABLE_NAME}"
351+
)
352+
353+
354+
@pytest.mark.asyncio
355+
async def test_create_dataset_from_bigquery_preserves_other_metadata_async(
356+
client,
357+
):
358+
dataset = await client.aio.datasets.create_from_bigquery(
359+
bigquery_uri=f"bq://{BIGQUERY_TABLE_NAME}",
360+
multimodal_dataset={
361+
"display_name": "test-from-bigquery-uri",
362+
"metadata": {
363+
"gemini_request_read_config": {
364+
"assembled_request_column_name": "test_column"
365+
}
366+
},
367+
},
368+
)
369+
assert isinstance(dataset, types.MultimodalDataset)
370+
assert dataset.display_name == "test-from-bigquery-uri"
371+
assert (
372+
dataset.metadata.gemini_request_read_config.assembled_request_column_name
373+
== "test_column"
374+
)
375+
assert dataset.metadata.input_config.bigquery_source.uri == (
376+
f"bq://{BIGQUERY_TABLE_NAME}"
377+
)
378+
379+
301380
@pytest.mark.asyncio
302381
@pytest.mark.usefixtures("mock_generate_multimodal_dataset_display_name")
303382
async def test_create_dataset_from_bigquery_no_display_name_async(client):
@@ -314,6 +393,14 @@ async def test_create_dataset_from_bigquery_no_display_name_async(client):
314393
assert dataset.display_name == "test-generated-name"
315394

316395

396+
@pytest.mark.asyncio
397+
async def test_create_dataset_from_bigquery_raises_if_neither_async(client):
398+
with pytest.raises(
399+
ValueError, match="At least one of `bigquery_uri` or `multimodal_dataset`"
400+
):
401+
await client.aio.datasets.create_from_bigquery()
402+
403+
317404
@pytest.mark.asyncio
318405
async def test_create_dataset_from_bigquery_async_with_timeout(client):
319406
dataset = await client.aio.datasets.create_from_bigquery(

vertexai/_genai/datasets.py

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -924,23 +924,46 @@ def _wait_for_operation(
924924
def create_from_bigquery(
925925
self,
926926
*,
927-
multimodal_dataset: types.MultimodalDatasetOrDict,
927+
bigquery_uri: Optional[str] = None,
928+
multimodal_dataset: Optional[types.MultimodalDatasetOrDict] = None,
928929
config: Optional[types.CreateMultimodalDatasetConfigOrDict] = None,
929930
) -> types.MultimodalDataset:
930931
"""Creates a multimodal dataset from a BigQuery table.
931932
932933
Args:
934+
bigquery_uri:
935+
Optional. The BigQuery URI of the table to create the dataset from.
936+
e.g. "bq://project.dataset.table". If both `bigquery_uri` and
937+
`multimodal_dataset` are provided, and `multimodal_dataset` also
938+
contains a BigQuery URI, the `bigquery_uri` parameter takes precedence.
933939
multimodal_dataset:
934-
Required. A representation of a multimodal dataset.
940+
Optional. A representation of a multimodal dataset. If `bigquery_uri`
941+
is set, `multimodal_dataset` can still be used to set other metadata
942+
fields. If both `bigquery_uri` and `multimodal_dataset` are provided,
943+
and `multimodal_dataset` also contains a BigQuery URI, the
944+
`bigquery_uri` parameter takes precedence.
935945
config:
936946
Optional. A configuration for creating the multimodal dataset. If not
937947
provided, the default configuration will be used.
938948
939949
Returns:
940950
A types.MultimodalDataset object representing a multimodal dataset.
941951
"""
942-
if isinstance(multimodal_dataset, dict):
952+
if not bigquery_uri and not multimodal_dataset:
953+
raise ValueError(
954+
"At least one of `bigquery_uri` or `multimodal_dataset` must be"
955+
" provided."
956+
)
957+
958+
if multimodal_dataset is None:
959+
multimodal_dataset = types.MultimodalDataset()
960+
elif isinstance(multimodal_dataset, dict):
943961
multimodal_dataset = types.MultimodalDataset(**multimodal_dataset)
962+
963+
if bigquery_uri:
964+
multimodal_dataset = multimodal_dataset.model_copy(deep=True)
965+
multimodal_dataset.set_bigquery_uri(bigquery_uri)
966+
944967
_datasets_utils.validate_multimodal_dataset_bigquery_uri(multimodal_dataset)
945968

946969
if isinstance(config, dict):
@@ -2187,23 +2210,46 @@ async def _wait_for_operation(
21872210
async def create_from_bigquery(
21882211
self,
21892212
*,
2190-
multimodal_dataset: types.MultimodalDatasetOrDict,
2213+
bigquery_uri: Optional[str] = None,
2214+
multimodal_dataset: Optional[types.MultimodalDatasetOrDict] = None,
21912215
config: Optional[types.CreateMultimodalDatasetConfigOrDict] = None,
21922216
) -> types.MultimodalDataset:
21932217
"""Creates a multimodal dataset from a BigQuery table.
21942218
21952219
Args:
2220+
bigquery_uri:
2221+
Optional. The BigQuery URI of the table to create the dataset from.
2222+
e.g. "bq://project.dataset.table". If both `bigquery_uri` and
2223+
`multimodal_dataset` are provided, and `multimodal_dataset` also
2224+
contains a BigQuery URI, the `bigquery_uri` parameter takes precedence.
21962225
multimodal_dataset:
2197-
Required. A representation of a multimodal dataset.
2226+
Optional. A representation of a multimodal dataset. If `bigquery_uri`
2227+
is set, `multimodal_dataset` can still be used to set other metadata
2228+
fields. If both `bigquery_uri` and `multimodal_dataset` are provided,
2229+
and `multimodal_dataset` also contains a BigQuery URI, the
2230+
`bigquery_uri` parameter takes precedence.
21982231
config:
21992232
Optional. A configuration for creating the multimodal dataset. If not
22002233
provided, the default configuration will be used.
22012234
22022235
Returns:
22032236
A types.MultimodalDataset object representing a multimodal dataset.
22042237
"""
2205-
if isinstance(multimodal_dataset, dict):
2238+
if not bigquery_uri and not multimodal_dataset:
2239+
raise ValueError(
2240+
"At least one of `bigquery_uri` or `multimodal_dataset` must be"
2241+
" provided."
2242+
)
2243+
2244+
if multimodal_dataset is None:
2245+
multimodal_dataset = types.MultimodalDataset()
2246+
elif isinstance(multimodal_dataset, dict):
22062247
multimodal_dataset = types.MultimodalDataset(**multimodal_dataset)
2248+
2249+
if bigquery_uri:
2250+
multimodal_dataset = multimodal_dataset.model_copy(deep=True)
2251+
multimodal_dataset.set_bigquery_uri(bigquery_uri)
2252+
22072253
_datasets_utils.validate_multimodal_dataset_bigquery_uri(multimodal_dataset)
22082254

22092255
if isinstance(config, dict):

0 commit comments

Comments
 (0)