This repository was archived by the owner on Apr 1, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 67
feat: support pandas series in ai.generate_bool #2086
Merged
Merged
Changes from 9 commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
67206cb
feat: support pandas series in ai.generate_bool
sycai 87f37be
fix mypy error
sycai e1a6a5a
define PROMPT_TYPE with Union
sycai f0b1d1a
fix type
sycai b25a2ab
Merge branch 'main' into sycai_ai_gen_bool_pandas
sycai 1edcfab
update test
sycai 59ffbee
update comment
sycai 6f3a95c
fix mypy
sycai 6f09287
Merge branch 'main' into sycai_ai_gen_bool_pandas
sycai 3093c5e
fix return type
sycai 6c8dd53
Merge branch 'main' into sycai_ai_gen_bool_pandas
sycai ef6794e
update doc
sycai 22ce762
Merge branch 'main' into sycai_ai_gen_bool_pandas
sycai 6ea8c70
Merge branch 'main' into sycai_ai_gen_bool_pandas
sycai 4f7efd8
Merge branch 'main' into sycai_ai_gen_bool_pandas
sycai c3a658a
fix doctest
sycai 6d7dcba
Merge branch 'main' into sycai_ai_gen_bool_pandas
sycai 796a41a
Merge branch 'main' into sycai_ai_gen_bool_pandas
sycai File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -19,16 +19,25 @@ | |||||||
| from __future__ import annotations | ||||||||
|
|
||||||||
| import json | ||||||||
| from typing import Any, List, Literal, Mapping, Tuple | ||||||||
| from typing import Any, List, Literal, Mapping, Tuple, Union | ||||||||
|
|
||||||||
| from bigframes import clients, dtypes, series | ||||||||
| from bigframes.core import log_adapter | ||||||||
| import pandas as pd | ||||||||
|
|
||||||||
| from bigframes import clients, dtypes, series, session | ||||||||
| from bigframes.core import convert, log_adapter | ||||||||
| from bigframes.operations import ai_ops | ||||||||
|
|
||||||||
| PROMPT_TYPE = Union[ | ||||||||
| series.Series, | ||||||||
| pd.Series, | ||||||||
| List[Union[str, series.Series, pd.Series]], | ||||||||
| Tuple[Union[str, series.Series, pd.Series], ...], | ||||||||
| ] | ||||||||
|
|
||||||||
|
|
||||||||
| @log_adapter.method_logger(custom_base_name="bigquery_ai") | ||||||||
| def generate_bool( | ||||||||
| prompt: series.Series | List[str | series.Series] | Tuple[str | series.Series, ...], | ||||||||
| prompt: PROMPT_TYPE, | ||||||||
| *, | ||||||||
| connection_id: str | None = None, | ||||||||
| endpoint: str | None = None, | ||||||||
|
|
@@ -60,8 +69,9 @@ def generate_bool( | |||||||
| Name: result, dtype: boolean | ||||||||
|
|
||||||||
| Args: | ||||||||
| prompt (series.Series | List[str|series.Series] | Tuple[str|series.Series, ...]): | ||||||||
| A mixture of Series and string literals that specifies the prompt to send to the model. | ||||||||
| prompt (Series | List[str|Series] | Tuple[str|Series, ...]): | ||||||||
| A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series | ||||||||
| or pandas Series. | ||||||||
| connection_id (str, optional): | ||||||||
| Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. | ||||||||
| If not provided, the connection from the current session will be used. | ||||||||
|
|
@@ -104,7 +114,7 @@ def generate_bool( | |||||||
|
|
||||||||
|
|
||||||||
| def _separate_context_and_series( | ||||||||
| prompt: series.Series | List[str | series.Series] | Tuple[str | series.Series, ...], | ||||||||
| prompt: PROMPT_TYPE, | ||||||||
| ) -> Tuple[List[str | None], List[series.Series]]: | ||||||||
| """ | ||||||||
| Returns the two values. The first value is the prompt with all series replaced by None. The second value is all the series | ||||||||
|
|
@@ -123,18 +133,19 @@ def _separate_context_and_series( | |||||||
| return [None], [prompt] | ||||||||
|
|
||||||||
| prompt_context: List[str | None] = [] | ||||||||
| series_list: List[series.Series] = [] | ||||||||
| series_list: List[series.Series | pd.Series] = [] | ||||||||
|
|
||||||||
| session = None | ||||||||
| for item in prompt: | ||||||||
| if isinstance(item, str): | ||||||||
| prompt_context.append(item) | ||||||||
|
|
||||||||
| elif isinstance(item, series.Series): | ||||||||
| elif isinstance(item, (series.Series, pd.Series)): | ||||||||
| prompt_context.append(None) | ||||||||
|
|
||||||||
| if item.dtype == dtypes.OBJ_REF_DTYPE: | ||||||||
| # Multi-model support | ||||||||
| item = item.blob.read_url() | ||||||||
| if isinstance(item, series.Series) and session is None: | ||||||||
| # Use the first available BF session if there's any. | ||||||||
| session = item._session | ||||||||
| series_list.append(item) | ||||||||
|
|
||||||||
| else: | ||||||||
|
|
@@ -143,7 +154,20 @@ def _separate_context_and_series( | |||||||
| if not series_list: | ||||||||
| raise ValueError("Please provide at least one Series in the prompt") | ||||||||
|
|
||||||||
| return prompt_context, series_list | ||||||||
| converted_list = [_convert_series(s, session) for s in series_list] | ||||||||
|
|
||||||||
| return prompt_context, converted_list | ||||||||
|
|
||||||||
|
|
||||||||
| def _convert_series( | ||||||||
| s: series.Series | pd.Series, session: session.Session | None | ||||||||
| ) -> series.Series: | ||||||||
| result = convert.to_bf_series(s, default_index=None, session=session) | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when two
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would like to leave that check to the place where we "align" series: python-bigquery-dataframes/bigframes/operations/base.py Lines 210 to 212 in 090ce8e
|
||||||||
|
|
||||||||
| if result.dtype == dtypes.OBJ_REF_DTYPE: | ||||||||
| # Support multimodel | ||||||||
| return result.blob.read_url() | ||||||||
| return result | ||||||||
|
|
||||||||
|
|
||||||||
| def _resolve_connection_id(series: series.Series, connection_id: str | None): | ||||||||
|
|
||||||||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can the
pd.Seriesbe converted into aseries.Seriesrepresented for multi-model? I would suggest to have twoIFbranches: one forpd.Seriesand another one for.series.Series, for more readable.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
series conversions happens at the bottom of the function body at line 157
This
ifbranch is just to grab the session from the first BigFrames session