Merge remote-tracking branch 'github/main' into fix_solo_if_else

TrevorBergeron · TrevorBergeron · commit 3d741764909a · 2025-09-29T20:09:24.000Z
diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
@@ -188,6 +188,81 @@ def generate_int(
     return series_list[0]._apply_nary_op(operator, series_list[1:])
 
 
+@log_adapter.method_logger(custom_base_name="bigquery_ai")
+def generate_double(
+    prompt: PROMPT_TYPE,
+    *,
+    connection_id: str | None = None,
+    endpoint: str | None = None,
+    request_type: Literal["dedicated", "shared", "unspecified"] = "unspecified",
+    model_params: Mapping[Any, Any] | None = None,
+) -> series.Series:
+    """
+    Returns the AI analysis based on the prompt, which can be any combination of text and unstructured data.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> bpd.options.display.progress_bar = None
+        >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"])
+        >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?"))
+        0    {'result': 2.0, 'full_response': '{"candidates...
+        1    {'result': 4.0, 'full_response': '{"candidates...
+        2    {'result': 8.0, 'full_response': '{"candidates...
+        dtype: struct<result: double, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow]
+
+        >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")).struct.field("result")
+        0    2.0
+        1    4.0
+        2    8.0
+        Name: result, dtype: Float64
+
+    Args:
+        prompt (Series | List[str|Series] | Tuple[str|Series, ...]):
+            A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
+            or pandas Series.
+        connection_id (str, optional):
+            Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
+            If not provided, the connection from the current session will be used.
+        endpoint (str, optional):
+            Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any
+            generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and
+            uses the full endpoint of the model. If you don't specify an ENDPOINT value, BigQuery ML selects a recent stable
+            version of Gemini to use.
+        request_type (Literal["dedicated", "shared", "unspecified"]):
+            Specifies the type of inference request to send to the Gemini model. The request type determines what quota the request uses.
+            * "dedicated": function only uses Provisioned Throughput quota. The function returns the error Provisioned throughput is not
+            purchased or is not active if Provisioned Throughput quota isn't available.
+            * "shared": the function only uses dynamic shared quota (DSQ), even if you have purchased Provisioned Throughput quota.
+            * "unspecified": If you haven't purchased Provisioned Throughput quota, the function uses DSQ quota.
+            If you have purchased Provisioned Throughput quota, the function uses the Provisioned Throughput quota first.
+            If requests exceed the Provisioned Throughput quota, the overflow traffic uses DSQ quota.
+        model_params (Mapping[Any, Any]):
+            Provides additional parameters to the model. The MODEL_PARAMS value must conform to the generateContent request body format.
+
+    Returns:
+        bigframes.series.Series: A new struct Series with the result data. The struct contains these fields:
+        * "result": an DOUBLE value containing the model's response to the prompt. The result is None if the request fails or is filtered by responsible AI.
+        * "full_response": a JSON value containing the response from the projects.locations.endpoints.generateContent call to the model.
+        The generated text is in the text element.
+        * "status": a STRING value that contains the API response status for the corresponding row. This value is empty if the operation was successful.
+    """
+
+    prompt_context, series_list = _separate_context_and_series(prompt)
+    assert len(series_list) > 0
+
+    operator = ai_ops.AIGenerateDouble(
+        prompt_context=tuple(prompt_context),
+        connection_id=_resolve_connection_id(series_list[0], connection_id),
+        endpoint=endpoint,
+        request_type=request_type,
+        model_params=json.dumps(model_params) if model_params else None,
+    )
+
+    return series_list[0]._apply_nary_op(operator, series_list[1:])
+
+
 def _separate_context_and_series(
     prompt: PROMPT_TYPE,
 ) -> Tuple[List[str | None], List[series.Series]]:
diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
@@ -1990,7 +1990,7 @@ def ai_generate_bool(
 
 @scalar_op_compiler.register_nary_op(ops.AIGenerateInt, pass_op=True)
 def ai_generate_int(
-    *values: ibis_types.Value, op: ops.AIGenerateBool
+    *values: ibis_types.Value, op: ops.AIGenerateInt
 ) -> ibis_types.StructValue:
 
     return ai_ops.AIGenerateInt(
@@ -2002,6 +2002,20 @@ def ai_generate_int(
     ).to_expr()
 
 
+@scalar_op_compiler.register_nary_op(ops.AIGenerateDouble, pass_op=True)
+def ai_generate_double(
+    *values: ibis_types.Value, op: ops.AIGenerateDouble
+) -> ibis_types.StructValue:
+
+    return ai_ops.AIGenerateDouble(
+        _construct_prompt(values, op.prompt_context),  # type: ignore
+        op.connection_id,  # type: ignore
+        op.endpoint,  # type: ignore
+        op.request_type.upper(),  # type: ignore
+        op.model_params,  # type: ignore
+    ).to_expr()
+
+
 def _construct_prompt(
     col_refs: tuple[ibis_types.Value], prompt_context: tuple[str | None]
 ) -> ibis_types.StructValue:
diff --git a/bigframes/core/compile/sqlglot/aggregate_compiler.py b/bigframes/core/compile/sqlglot/aggregate_compiler.py
@@ -63,7 +63,7 @@ def compile_analytic(
     window: window_spec.WindowSpec,
 ) -> sge.Expression:
     if isinstance(aggregate, agg_expressions.NullaryAggregation):
-        return nullary_compiler.compile(aggregate.op)
+        return nullary_compiler.compile(aggregate.op, window)
     if isinstance(aggregate, agg_expressions.UnaryAggregation):
         column = typed_expr.TypedExpr(
             scalar_compiler.scalar_op_compiler.compile_expression(aggregate.arg),
diff --git a/bigframes/core/compile/sqlglot/aggregations/binary_compiler.py b/bigframes/core/compile/sqlglot/aggregations/binary_compiler.py
@@ -20,6 +20,7 @@
 
 from bigframes.core import window_spec
 import bigframes.core.compile.sqlglot.aggregations.op_registration as reg
+from bigframes.core.compile.sqlglot.aggregations.windows import apply_window_if_present
 import bigframes.core.compile.sqlglot.expressions.typed_expr as typed_expr
 from bigframes.operations import aggregations as agg_ops
 
@@ -33,3 +34,25 @@ def compile(
     window: typing.Optional[window_spec.WindowSpec] = None,
 ) -> sge.Expression:
     return BINARY_OP_REGISTRATION[op](op, left, right, window=window)
+
+
+@BINARY_OP_REGISTRATION.register(agg_ops.CorrOp)
+def _(
+    op: agg_ops.CorrOp,
+    left: typed_expr.TypedExpr,
+    right: typed_expr.TypedExpr,
+    window: typing.Optional[window_spec.WindowSpec] = None,
+) -> sge.Expression:
+    result = sge.func("CORR", left.expr, right.expr)
+    return apply_window_if_present(result, window)
+
+
+@BINARY_OP_REGISTRATION.register(agg_ops.CovOp)
+def _(
+    op: agg_ops.CovOp,
+    left: typed_expr.TypedExpr,
+    right: typed_expr.TypedExpr,
+    window: typing.Optional[window_spec.WindowSpec] = None,
+) -> sge.Expression:
+    result = sge.func("COVAR_SAMP", left.expr, right.expr)
+    return apply_window_if_present(result, window)
diff --git a/bigframes/core/compile/sqlglot/aggregations/nullary_compiler.py b/bigframes/core/compile/sqlglot/aggregations/nullary_compiler.py
@@ -39,3 +39,15 @@ def _(
     window: typing.Optional[window_spec.WindowSpec] = None,
 ) -> sge.Expression:
     return apply_window_if_present(sge.func("COUNT", sge.convert(1)), window)
+
+
+@NULLARY_OP_REGISTRATION.register(agg_ops.RowNumberOp)
+def _(
+    op: agg_ops.RowNumberOp,
+    window: typing.Optional[window_spec.WindowSpec] = None,
+) -> sge.Expression:
+    result: sge.Expression = sge.func("ROW_NUMBER")
+    if window is None:
+        # ROW_NUMBER always needs an OVER clause.
+        return sge.Window(this=result)
+    return apply_window_if_present(result, window)
diff --git a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py
@@ -84,10 +84,7 @@ def _(
     column: typed_expr.TypedExpr,
     window: typing.Optional[window_spec.WindowSpec] = None,
 ) -> sge.Expression:
-    # Ranking functions do not support window framing clauses.
-    return apply_window_if_present(
-        sge.func("DENSE_RANK"), window, include_framing_clauses=False
-    )
+    return apply_window_if_present(sge.func("DENSE_RANK"), window)
 
 
 @UNARY_OP_REGISTRATION.register(agg_ops.MaxOp)
@@ -165,10 +162,7 @@ def _(
     column: typed_expr.TypedExpr,
     window: typing.Optional[window_spec.WindowSpec] = None,
 ) -> sge.Expression:
-    # Ranking functions do not support window framing clauses.
-    return apply_window_if_present(
-        sge.func("RANK"), window, include_framing_clauses=False
-    )
+    return apply_window_if_present(sge.func("RANK"), window)
 
 
 @UNARY_OP_REGISTRATION.register(agg_ops.SizeUnaryOp)
diff --git a/bigframes/core/compile/sqlglot/aggregations/windows.py b/bigframes/core/compile/sqlglot/aggregations/windows.py
@@ -25,7 +25,6 @@
 def apply_window_if_present(
     value: sge.Expression,
     window: typing.Optional[window_spec.WindowSpec] = None,
-    include_framing_clauses: bool = True,
 ) -> sge.Expression:
     if window is None:
         return value
@@ -65,7 +64,7 @@ def apply_window_if_present(
     if not window.bounds and not order:
         return sge.Window(this=value, partition_by=group_by)
 
-    if not window.bounds and not include_framing_clauses:
+    if not window.bounds:
         return sge.Window(this=value, partition_by=group_by, order=order)
 
     kind = (
diff --git a/bigframes/core/compile/sqlglot/expressions/ai_ops.py b/bigframes/core/compile/sqlglot/expressions/ai_ops.py
@@ -40,6 +40,13 @@ def _(*exprs: TypedExpr, op: ops.AIGenerateInt) -> sge.Expression:
     return sge.func("AI.GENERATE_INT", *args)
 
 
+@register_nary_op(ops.AIGenerateDouble, pass_op=True)
+def _(*exprs: TypedExpr, op: ops.AIGenerateDouble) -> sge.Expression:
+    args = [_construct_prompt(exprs, op.prompt_context)] + _construct_named_args(op)
+
+    return sge.func("AI.GENERATE_DOUBLE", *args)
+
+
 def _construct_prompt(
     exprs: tuple[TypedExpr, ...], prompt_context: tuple[str | None, ...]
 ) -> sge.Kwarg:
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
@@ -14,7 +14,7 @@
 
 from __future__ import annotations
 
-from bigframes.operations.ai_ops import AIGenerateBool, AIGenerateInt
+from bigframes.operations.ai_ops import AIGenerateBool, AIGenerateDouble, AIGenerateInt
 from bigframes.operations.array_ops import (
     ArrayIndexOp,
     ArrayReduceOp,
@@ -413,6 +413,7 @@
     "GeoStDistanceOp",
     # AI ops
     "AIGenerateBool",
+    "AIGenerateDouble",
     "AIGenerateInt",
     # Numpy ops mapping
     "NUMPY_TO_BINOP",
diff --git a/bigframes/operations/ai_ops.py b/bigframes/operations/ai_ops.py
@@ -66,3 +66,25 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
                 )
             )
         )
+
+
+@dataclasses.dataclass(frozen=True)
+class AIGenerateDouble(base_ops.NaryOp):
+    name: ClassVar[str] = "ai_generate_double"
+
+    prompt_context: Tuple[str | None, ...]
+    connection_id: str
+    endpoint: str | None
+    request_type: Literal["dedicated", "shared", "unspecified"]
+    model_params: str | None
+
+    def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
+        return pd.ArrowDtype(
+            pa.struct(
+                (
+                    pa.field("result", pa.float64()),
+                    pa.field("full_response", dtypes.JSON_ARROW_TYPE),
+                    pa.field("status", pa.string()),
+                )
+            )
+        )
diff --git a/tests/system/small/bigquery/test_ai.py b/tests/system/small/bigquery/test_ai.py
@@ -146,5 +146,44 @@ def test_ai_generate_int_multi_model(session):
     )
 
 
+def test_ai_generate_double(session):
+    s = bpd.Series(["Cat"], session=session)
+    prompt = ("How many legs does a ", s, " have?")
+
+    result = bbq.ai.generate_double(prompt, endpoint="gemini-2.5-flash")
+
+    assert _contains_no_nulls(result)
+    assert result.dtype == pd.ArrowDtype(
+        pa.struct(
+            (
+                pa.field("result", pa.float64()),
+                pa.field("full_response", dtypes.JSON_ARROW_TYPE),
+                pa.field("status", pa.string()),
+            )
+        )
+    )
+
+
+def test_ai_generate_double_multi_model(session):
+    df = session.from_glob_path(
+        "gs://bigframes-dev-testing/a_multimodel/images/*", name="image"
+    )
+
+    result = bbq.ai.generate_double(
+        ("How many animals are there in the picture ", df["image"])
+    )
+
+    assert _contains_no_nulls(result)
+    assert result.dtype == pd.ArrowDtype(
+        pa.struct(
+            (
+                pa.field("result", pa.float64()),
+                pa.field("full_response", dtypes.JSON_ARROW_TYPE),
+                pa.field("status", pa.string()),
+            )
+        )
+    )
+
+
 def _contains_no_nulls(s: series.Series) -> bool:
     return len(s) == s.count()
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_corr/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_corr/out.sql
@@ -0,0 +1,13 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `int64_col` AS `bfcol_0`,
+    `float64_col` AS `bfcol_1`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    CORR(`bfcol_0`, `bfcol_1`) AS `bfcol_2`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_2` AS `corr_col`
+FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_cov/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_cov/out.sql
@@ -0,0 +1,13 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `int64_col` AS `bfcol_0`,
+    `float64_col` AS `bfcol_1`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    COVAR_SAMP(`bfcol_0`, `bfcol_1`) AS `bfcol_2`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_2` AS `cov_col`
+FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_row_number/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_row_number/out.sql
@@ -0,0 +1,13 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `bool_col` AS `bfcol_0`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    *,
+    ROW_NUMBER() OVER () AS `bfcol_1`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_1` AS `row_number`
+FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_row_number_with_window/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_row_number_with_window/out.sql
@@ -0,0 +1,13 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `int64_col` AS `bfcol_0`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    *,
+    ROW_NUMBER() OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_1` AS `row_number`
+FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_size/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_size/out.sql
@@ -0,0 +1,12 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `rowindex` AS `bfcol_0`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    COUNT(1) AS `bfcol_2`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_2` AS `size`
+FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_binary_compiler.py b/tests/unit/core/compile/sqlglot/aggregations/test_binary_compiler.py
diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_nullary_compiler.py b/tests/unit/core/compile/sqlglot/aggregations/test_nullary_compiler.py
diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_ai_ops/test_ai_generate_double/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_ai_ops/test_ai_generate_double/out.sql
diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_ai_ops/test_ai_generate_double_with_model_param/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_ai_ops/test_ai_generate_double_with_model_param/out.sql
diff --git a/tests/unit/core/compile/sqlglot/expressions/test_ai_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_ai_ops.py
diff --git a/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py b/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/ai_ops.py b/third_party/bigframes_vendored/ibis/expr/operations/ai_ops.py