Skip to content

Commit 6874b8d

Browse files
cleop-googlecopybara-github
authored andcommitted
chore: GenAI SDK client(multimodal) - Move to_bigframes method to MultimodalDataset class.
BREAKING CHANGE: `to_bigframes` has been removed from the datasets module and moved into the `MultimodalDataset` class. Instead of `dataframe = client.datasets.to_bigframes(multimodal_dataset=multimodal_dataset)`, use `dataframe = multimodal_dataset.to_bigframes()` to create a BigFrame instance from a multimodal dataset. PiperOrigin-RevId: 895936431
1 parent 5414089 commit 6874b8d

File tree

4 files changed

+48
-100
lines changed

4 files changed

+48
-100
lines changed

tests/unit/vertexai/genai/replays/test_get_multimodal_datasets.py

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -15,34 +15,14 @@
1515
# pylint: disable=protected-access,bad-continuation,missing-function-docstring
1616

1717
from tests.unit.vertexai.genai.replays import pytest_helper
18-
from vertexai._genai import _datasets_utils
1918
from vertexai._genai import types
2019

21-
from unittest import mock
2220
import pytest
2321

2422
BIGQUERY_TABLE_NAME = "vertex-sdk-dev.multimodal_dataset.test-table"
2523
DATASET = "8810841321427173376"
2624

2725

28-
@pytest.fixture
29-
def mock_import_bigframes(is_replay_mode):
30-
if is_replay_mode:
31-
with mock.patch.object(
32-
_datasets_utils, "_try_import_bigframes"
33-
) as mock_import_bigframes:
34-
mock_read_gbq_table_result = mock.MagicMock()
35-
mock_read_gbq_table_result.sql = f"SLECT * FROM `{BIGQUERY_TABLE_NAME}`"
36-
37-
bigframes = mock.MagicMock()
38-
bigframes.pandas.read_gbq_table.return_value = mock_read_gbq_table_result
39-
40-
mock_import_bigframes.return_value = bigframes
41-
yield mock_import_bigframes
42-
else:
43-
yield None
44-
45-
4626
def test_get_dataset(client):
4727
dataset = client.datasets._get_multimodal_dataset(
4828
name=DATASET,
@@ -61,15 +41,6 @@ def test_get_dataset_from_public_method(client):
6141
assert dataset.display_name == "test-display-name"
6242

6343

64-
@pytest.mark.usefixtures("mock_import_bigframes")
65-
def test_to_bigframes(client):
66-
dataset = client.datasets.get_multimodal_dataset(
67-
name=DATASET,
68-
)
69-
df = client.datasets.to_bigframes(multimodal_dataset=dataset)
70-
assert BIGQUERY_TABLE_NAME in df.sql
71-
72-
7344
pytestmark = pytest_helper.setup(
7445
file=__file__,
7546
globals_for_file=globals(),
@@ -96,13 +67,3 @@ async def test_get_dataset_from_public_method_async(client):
9667
assert isinstance(dataset, types.MultimodalDataset)
9768
assert dataset.name.endswith(DATASET)
9869
assert dataset.display_name == "test-display-name"
99-
100-
101-
@pytest.mark.asyncio
102-
@pytest.mark.usefixtures("mock_import_bigframes")
103-
async def test_to_bigframes_async(client):
104-
dataset = await client.aio.datasets.get_multimodal_dataset(
105-
name=DATASET,
106-
)
107-
df = await client.aio.datasets.to_bigframes(multimodal_dataset=dataset)
108-
assert BIGQUERY_TABLE_NAME in df.sql

tests/unit/vertexai/genai/test_multimodal_datasets_genai.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,26 @@
1313
# limitations under the License.
1414
#
1515
"""Tests for multimodal datasets."""
16+
from unittest import mock
1617

18+
from vertexai._genai import _datasets_utils
1719
from vertexai._genai import types
20+
import pytest
21+
22+
23+
@pytest.fixture
24+
def mock_import_bigframes():
25+
with mock.patch.object(
26+
_datasets_utils, "_try_import_bigframes"
27+
) as mock_import_bigframes:
28+
mock_read_gbq_table_result = mock.MagicMock()
29+
mock_read_gbq_table_result.sql = "SELECT * FROM `project.dataset.table`"
30+
31+
bigframes = mock.MagicMock()
32+
bigframes.pandas.read_gbq_table.return_value = mock_read_gbq_table_result
33+
34+
mock_import_bigframes.return_value = bigframes
35+
yield mock_import_bigframes
1836

1937

2038
class TestMultimodalDataset:
@@ -126,3 +144,14 @@ def test_set_bigquery_uri_preserves_other_fields(self):
126144
dataset.metadata.gemini_request_read_config.assembled_request_column_name
127145
== "test_column"
128146
)
147+
148+
def test_to_bigframes(self, mock_import_bigframes):
149+
dataset = types.MultimodalDataset()
150+
dataset.set_bigquery_uri("bq://project.dataset.table")
151+
152+
df = dataset.to_bigframes()
153+
154+
assert "project.dataset.table" in df.sql
155+
mock_import_bigframes.return_value.pandas.read_gbq_table.assert_called_once_with(
156+
"project.dataset.table"
157+
)

vertexai/_genai/datasets.py

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -940,36 +940,6 @@ def create_from_bigframes(
940940
config=config,
941941
)
942942

943-
def to_bigframes(
944-
self,
945-
*,
946-
multimodal_dataset: types.MultimodalDatasetOrDict,
947-
) -> "bigframes.pandas.DataFrame": # type: ignore # noqa: F821
948-
"""Converts a multimodal dataset to a BigFrames dataframe.
949-
950-
This is the preferred method to inspect the multimodal dataset in a
951-
notebook.
952-
953-
Args:
954-
multimodal_dataset:
955-
Required. A representation of a multimodal dataset.
956-
957-
Returns:
958-
A BigFrames dataframe.
959-
"""
960-
bigframes = _datasets_utils._try_import_bigframes()
961-
962-
if isinstance(multimodal_dataset, dict):
963-
multimodal_dataset = types.MultimodalDataset(**multimodal_dataset)
964-
elif not multimodal_dataset:
965-
multimodal_dataset = types.MultimodalDataset()
966-
967-
if multimodal_dataset.bigquery_uri is None:
968-
raise ValueError("Multimodal dataset bigquery source uri is not set.")
969-
return bigframes.pandas.read_gbq_table(
970-
multimodal_dataset.bigquery_uri.removeprefix("bq://")
971-
)
972-
973943
def update_multimodal_dataset(
974944
self,
975945
*,
@@ -2053,37 +2023,6 @@ async def create_from_bigframes(
20532023
config=config,
20542024
)
20552025

2056-
async def to_bigframes(
2057-
self,
2058-
*,
2059-
multimodal_dataset: types.MultimodalDatasetOrDict,
2060-
) -> "bigframes.pandas.DataFrame": # type: ignore # noqa: F821
2061-
"""Converts a multimodal dataset to a BigFrames dataframe.
2062-
2063-
This is the preferred method to inspect the multimodal dataset in a
2064-
notebook.
2065-
2066-
Args:
2067-
multimodal_dataset:
2068-
Required. A representation of a multimodal dataset.
2069-
2070-
Returns:
2071-
A BigFrames dataframe.
2072-
"""
2073-
bigframes = _datasets_utils._try_import_bigframes()
2074-
2075-
if isinstance(multimodal_dataset, dict):
2076-
multimodal_dataset = types.MultimodalDataset(**multimodal_dataset)
2077-
elif not multimodal_dataset:
2078-
multimodal_dataset = types.MultimodalDataset()
2079-
2080-
if multimodal_dataset.bigquery_uri is None:
2081-
raise ValueError("Multimodal dataset bigquery source uri is missing.")
2082-
return await asyncio.to_thread(
2083-
bigframes.pandas.read_gbq_table,
2084-
multimodal_dataset.bigquery_uri.removeprefix("bq://"),
2085-
)
2086-
20872026
async def update_multimodal_dataset(
20882027
self,
20892028
*,

vertexai/_genai/types/common.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12836,6 +12836,25 @@ def set_bigquery_uri(
1283612836
metadata.input_config = input_config
1283712837
self.metadata = metadata
1283812838

12839+
def to_bigframes(
12840+
self,
12841+
) -> "bigframes.pandas.DataFrame": # type: ignore # noqa: F821
12842+
"""Converts the multimodal dataset to a BigFrames dataframe.
12843+
12844+
This is the preferred method to inspect the multimodal dataset in a
12845+
notebook.
12846+
12847+
Returns:
12848+
A BigFrames dataframe.
12849+
"""
12850+
from .. import _datasets_utils
12851+
12852+
bigframes = _datasets_utils._try_import_bigframes()
12853+
12854+
if self.bigquery_uri is None:
12855+
raise ValueError("Multimodal dataset bigquery source uri is not set.")
12856+
return bigframes.pandas.read_gbq_table(self.bigquery_uri.removeprefix("bq://"))
12857+
1283912858

1284012859
class MultimodalDatasetDict(TypedDict, total=False):
1284112860
"""Represents a multimodal dataset."""

0 commit comments

Comments
 (0)