Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit a0ae97c

Browse files
committed
Merge branch 'main' into shuowei-anywidget-complete-status-update
2 parents 40162bb + c932d2d commit a0ae97c

File tree

23 files changed

+314
-61
lines changed

23 files changed

+314
-61
lines changed

bigframes/_config/experiment_options.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from typing import Optional
15+
from typing import Literal, Optional
1616
import warnings
1717

1818
import bigframes
@@ -27,6 +27,7 @@ class ExperimentOptions:
2727
def __init__(self):
2828
self._semantic_operators: bool = False
2929
self._ai_operators: bool = False
30+
self._sql_compiler: Literal["legacy", "stable", "experimental"] = "stable"
3031

3132
@property
3233
def semantic_operators(self) -> bool:
@@ -55,6 +56,24 @@ def ai_operators(self, value: bool):
5556
warnings.warn(msg, category=bfe.PreviewWarning)
5657
self._ai_operators = value
5758

59+
@property
60+
def sql_compiler(self) -> Literal["legacy", "stable", "experimental"]:
61+
return self._sql_compiler
62+
63+
@sql_compiler.setter
64+
def sql_compiler(self, value: Literal["legacy", "stable", "experimental"]):
65+
if value not in ["legacy", "stable", "experimental"]:
66+
raise ValueError(
67+
"sql_compiler must be one of 'legacy', 'stable', or 'experimental'"
68+
)
69+
if value == "experimental":
70+
msg = bfe.format_message(
71+
"The experimental SQL compiler is still under experiments, and is subject "
72+
"to change in the future."
73+
)
74+
warnings.warn(msg, category=FutureWarning)
75+
self._sql_compiler = value
76+
5877
@property
5978
def blob(self) -> bool:
6079
msg = bfe.format_message(

bigframes/bigquery/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
from bigframes.bigquery._operations.search import create_vector_index, vector_search
6161
from bigframes.bigquery._operations.sql import sql_scalar
6262
from bigframes.bigquery._operations.struct import struct
63-
from bigframes.bigquery.table import create_external_table
63+
from bigframes.bigquery._operations.table import create_external_table
6464
from bigframes.core.logging import log_adapter
6565

6666
_functions = [

bigframes/bigquery/_operations/ml.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,3 +520,63 @@ def generate_text(
520520
return bpd.read_gbq_query(sql)
521521
else:
522522
return session.read_gbq_query(sql)
523+
524+
525+
@log_adapter.method_logger(custom_base_name="bigquery_ml")
526+
def generate_embedding(
527+
model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
528+
input_: Union[pd.DataFrame, dataframe.DataFrame, str],
529+
*,
530+
flatten_json_output: Optional[bool] = None,
531+
task_type: Optional[str] = None,
532+
output_dimensionality: Optional[int] = None,
533+
) -> dataframe.DataFrame:
534+
"""
535+
Generates text embedding using a BigQuery ML model.
536+
537+
See the `BigQuery ML GENERATE_EMBEDDING function syntax
538+
<https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-generate-embedding>`_
539+
for additional reference.
540+
541+
Args:
542+
model (bigframes.ml.base.BaseEstimator or str):
543+
The model to use for text embedding.
544+
input_ (Union[bigframes.pandas.DataFrame, str]):
545+
The DataFrame or query to use for text embedding.
546+
flatten_json_output (bool, optional):
547+
A BOOL value that determines the content of the generated JSON column.
548+
task_type (str, optional):
549+
A STRING value that specifies the intended downstream application task.
550+
Supported values are:
551+
- `RETRIEVAL_QUERY`
552+
- `RETRIEVAL_DOCUMENT`
553+
- `SEMANTIC_SIMILARITY`
554+
- `CLASSIFICATION`
555+
- `CLUSTERING`
556+
- `QUESTION_ANSWERING`
557+
- `FACT_VERIFICATION`
558+
- `CODE_RETRIEVAL_QUERY`
559+
output_dimensionality (int, optional):
560+
An INT64 value that specifies the size of the output embedding.
561+
562+
Returns:
563+
bigframes.pandas.DataFrame:
564+
The generated text embedding.
565+
"""
566+
import bigframes.pandas as bpd
567+
568+
model_name, session = _get_model_name_and_session(model, input_)
569+
table_sql = _to_sql(input_)
570+
571+
sql = bigframes.core.sql.ml.generate_embedding(
572+
model_name=model_name,
573+
table=table_sql,
574+
flatten_json_output=flatten_json_output,
575+
task_type=task_type,
576+
output_dimensionality=output_dimensionality,
577+
)
578+
579+
if session is None:
580+
return bpd.read_gbq_query(sql)
581+
else:
582+
return session.read_gbq_query(sql)
Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
from typing import Mapping, Optional, Union
1818

19-
import bigframes_vendored.constants
2019
import google.cloud.bigquery
2120
import pandas as pd
2221

@@ -94,9 +93,6 @@ def create_external_table(
9493
if session is None:
9594
bpd.read_gbq_query(sql)
9695
session = bpd.get_global_session()
97-
assert (
98-
session is not None
99-
), f"Missing connection to BigQuery. Please report how you encountered this error at {bigframes_vendored.constants.FEEDBACK_LINK}."
10096
else:
10197
session.read_gbq_query(sql)
10298

bigframes/bigquery/ml.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
create_model,
2424
evaluate,
2525
explain_predict,
26+
generate_embedding,
2627
generate_text,
2728
global_explain,
2829
predict,
@@ -37,4 +38,5 @@
3738
"global_explain",
3839
"transform",
3940
"generate_text",
41+
"generate_embedding",
4042
]

bigframes/core/compile/__init__.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,28 @@
1313
# limitations under the License.
1414
from __future__ import annotations
1515

16+
from typing import Any
17+
18+
from bigframes import options
1619
from bigframes.core.compile.api import test_only_ibis_inferred_schema
1720
from bigframes.core.compile.configs import CompileRequest, CompileResult
18-
from bigframes.core.compile.ibis_compiler.ibis_compiler import compile_sql
21+
22+
23+
def compiler() -> Any:
24+
"""Returns the appropriate compiler module based on session options."""
25+
if options.experiments.sql_compiler == "experimental":
26+
import bigframes.core.compile.sqlglot.compiler as sqlglot_compiler
27+
28+
return sqlglot_compiler
29+
else:
30+
import bigframes.core.compile.ibis_compiler.ibis_compiler as ibis_compiler
31+
32+
return ibis_compiler
33+
1934

2035
__all__ = [
2136
"test_only_ibis_inferred_schema",
22-
"compile_sql",
2337
"CompileRequest",
2438
"CompileResult",
39+
"compiler",
2540
]

bigframes/core/sql/ml.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,3 +296,31 @@ def generate_text(
296296
sql += _build_struct_sql(struct_options)
297297
sql += ")\n"
298298
return sql
299+
300+
301+
def generate_embedding(
302+
model_name: str,
303+
table: str,
304+
*,
305+
flatten_json_output: Optional[bool] = None,
306+
task_type: Optional[str] = None,
307+
output_dimensionality: Optional[int] = None,
308+
) -> str:
309+
"""Encode the ML.GENERATE_EMBEDDING statement.
310+
See https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-generate-embedding for reference.
311+
"""
312+
struct_options: Dict[
313+
str,
314+
Union[str, int, float, bool, Mapping[str, str], List[str], Mapping[str, Any]],
315+
] = {}
316+
if flatten_json_output is not None:
317+
struct_options["flatten_json_output"] = flatten_json_output
318+
if task_type is not None:
319+
struct_options["task_type"] = task_type
320+
if output_dimensionality is not None:
321+
struct_options["output_dimensionality"] = output_dimensionality
322+
323+
sql = f"SELECT * FROM ML.GENERATE_EMBEDDING(MODEL {googlesql.identifier(model_name)}, ({table})"
324+
sql += _build_struct_sql(struct_options)
325+
sql += ")\n"
326+
return sql

bigframes/formatting_helpers.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@
2525
import google.api_core.exceptions as api_core_exceptions
2626
import google.cloud.bigquery as bigquery
2727
import humanize
28-
import IPython
29-
import IPython.display as display
3028

3129
if TYPE_CHECKING:
30+
from IPython import display
31+
3232
import bigframes.core.events
3333

3434
GenericJob = Union[
@@ -160,6 +160,8 @@ def progress_callback(
160160
progress_bar = "notebook" if in_ipython() else "terminal"
161161

162162
if progress_bar == "notebook":
163+
import IPython.display as display
164+
163165
if (
164166
isinstance(event, bigframes.core.events.ExecutionStarted)
165167
or current_display is None
@@ -245,6 +247,8 @@ def wait_for_job(job: GenericJob, progress_bar: Optional[str] = None):
245247

246248
try:
247249
if progress_bar == "notebook":
250+
import IPython.display as display
251+
248252
display_id = str(random.random())
249253
loading_bar = display.HTML(get_base_job_loading_html(job))
250254
display.display(loading_bar, display_id=display_id)
@@ -613,4 +617,8 @@ def get_bytes_processed_string(val: Any):
613617

614618
def in_ipython():
615619
"""Return True iff we're in a colab-like IPython."""
620+
try:
621+
import IPython
622+
except (ImportError, NameError):
623+
return False
616624
return hasattr(IPython.get_ipython(), "kernel")

bigframes/operations/blob.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from typing import cast, Literal, Optional, Union
1919
import warnings
2020

21-
import IPython.display as ipy_display
2221
import pandas as pd
2322
import requests
2423

@@ -241,6 +240,8 @@ def display(
241240
width (int or None, default None): width in pixels that the image/video are constrained to. If unset, use the global setting in bigframes.options.display.blob_display_width, otherwise image/video's original size or ratio is used. No-op for other content types.
242241
height (int or None, default None): height in pixels that the image/video are constrained to. If unset, use the global setting in bigframes.options.display.blob_display_height, otherwise image/video's original size or ratio is used. No-op for other content types.
243242
"""
243+
import IPython.display as ipy_display
244+
244245
width = width or bigframes.options.display.blob_display_width
245246
height = height or bigframes.options.display.blob_display_height
246247

bigframes/session/bq_caching_executor.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,9 @@ def to_sql(
174174
else array_value.node
175175
)
176176
node = self._substitute_large_local_sources(node)
177-
compiled = compile.compile_sql(compile.CompileRequest(node, sort_rows=ordered))
177+
compiled = compile.compiler().compile_sql(
178+
compile.CompileRequest(node, sort_rows=ordered)
179+
)
178180
return compiled.sql
179181

180182
def execute(
@@ -290,7 +292,9 @@ def _export_gbq(
290292
# validate destination table
291293
existing_table = self._maybe_find_existing_table(spec)
292294

293-
compiled = compile.compile_sql(compile.CompileRequest(plan, sort_rows=False))
295+
compiled = compile.compiler().compile_sql(
296+
compile.CompileRequest(plan, sort_rows=False)
297+
)
294298
sql = compiled.sql
295299

296300
if (existing_table is not None) and _if_schema_match(
@@ -641,7 +645,7 @@ def _execute_plan_gbq(
641645
]
642646
cluster_cols = cluster_cols[:_MAX_CLUSTER_COLUMNS]
643647

644-
compiled = compile.compile_sql(
648+
compiled = compile.compiler().compile_sql(
645649
compile.CompileRequest(
646650
plan,
647651
sort_rows=ordered,

0 commit comments

Comments
 (0)