googleapis
diff --git a/‎bigframes/_magics.py‎
Lines changed: 1 addition & 5 deletions b/‎bigframes/_magics.py‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎bigframes/bigquery/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎bigframes/bigquery/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎bigframes/bigquery/_operations/ai.py‎
Lines changed: 13 additions & 14 deletions b/‎bigframes/bigquery/_operations/ai.py‎
Lines changed: 13 additions & 14 deletions
diff --git a/‎bigframes/bigquery/_operations/mathematical.py‎
Lines changed: 53 additions & 0 deletions b/‎bigframes/bigquery/_operations/mathematical.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎bigframes/core/bigframe_node.py‎
Lines changed: 4 additions & 14 deletions b/‎bigframes/core/bigframe_node.py‎
Lines changed: 4 additions & 14 deletions
diff --git a/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 28 additions & 3 deletions b/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 28 additions & 3 deletions
diff --git a/‎bigframes/core/compile/sqlglot/expressions/comparison_ops.py‎
Lines changed: 16 additions & 4 deletions b/‎bigframes/core/compile/sqlglot/expressions/comparison_ops.py‎
Lines changed: 16 additions & 4 deletions
@@ -48,8 +48,4 @@ def _cell_magic(line, cell):
     if args.destination_var:
         ipython.push({args.destination_var: dataframe})
 
-    with bigframes.option_context(
-        "display.repr_mode",
-        "anywidget",
-    ):
-        display(dataframe)
+    display(dataframe)
@@ -58,6 +58,7 @@
     to_json,
     to_json_string,
 )
+from bigframes.bigquery._operations.mathematical import rand
 from bigframes.bigquery._operations.search import create_vector_index, vector_search
 from bigframes.bigquery._operations.sql import sql_scalar
 from bigframes.bigquery._operations.struct import struct
@@ -99,6 +100,8 @@
     parse_json,
     to_json,
     to_json_string,
+    # mathematical ops
+    rand,
     # search ops
     create_vector_index,
     vector_search,
@@ -154,6 +157,8 @@
     "parse_json",
     "to_json",
     "to_json_string",
+    # mathematical ops
+    "rand",
     # search ops
     "create_vector_index",
     "vector_search",
 
@@ -28,8 +28,9 @@
 from bigframes import series, session
 from bigframes.bigquery._operations import utils as bq_utils
 from bigframes.core import convert
+from bigframes.core.compile.sqlglot import sql as sg_sql
 from bigframes.core.logging import log_adapter
-import bigframes.core.sql.literals
+from bigframes.ml import base as ml_base
 from bigframes.ml import core as ml_core
 from bigframes.operations import ai_ops, output_schemas
 
@@ -392,7 +393,7 @@ def generate_double(
 
 @log_adapter.method_logger(custom_base_name="bigquery_ai")
 def generate_embedding(
-    model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
+    model: Union[ml_base.BaseEstimator, str, pd.Series],
     data: Union[dataframe.DataFrame, series.Series, pd.DataFrame, pd.Series],
     *,
     output_dimensionality: Optional[int] = None,
@@ -416,7 +417,7 @@ def generate_embedding(
         ... ) # doctest: +SKIP
 
     Args:
-        model (bigframes.ml.base.BaseEstimator or str):
+        model (ml_base.BaseEstimator or str):
             The model to use for text embedding.
         data (bigframes.pandas.DataFrame or bigframes.pandas.Series):
             The data to generate embeddings for. If a Series is provided, it is
@@ -458,7 +459,7 @@ def generate_embedding(
     model_name, session = bq_utils.get_model_name_and_session(model, data)
     table_sql = bq_utils.to_sql(data)
 
-    struct_fields: Dict[str, bigframes.core.sql.literals.STRUCT_VALUES] = {}
+    struct_fields: Dict[str, Any] = {}
     if output_dimensionality is not None:
         struct_fields["OUTPUT_DIMENSIONALITY"] = output_dimensionality
     if task_type is not None:
@@ -478,7 +479,7 @@ def generate_embedding(
         FROM AI.GENERATE_EMBEDDING(
             MODEL `{model_name}`,
             ({table_sql}),
-            {bigframes.core.sql.literals.struct_literal(struct_fields)}
+            {sg_sql.to_sql(sg_sql.literal(struct_fields))}
         )
     """
 
@@ -490,7 +491,7 @@ def generate_embedding(
 
 @log_adapter.method_logger(custom_base_name="bigquery_ai")
 def generate_text(
-    model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
+    model: Union[ml_base.BaseEstimator, str, pd.Series],
     data: Union[dataframe.DataFrame, series.Series, pd.DataFrame, pd.Series],
     *,
     temperature: Optional[float] = None,
@@ -519,7 +520,7 @@ def generate_text(
         ... ) # doctest: +SKIP
 
     Args:
-        model (bigframes.ml.base.BaseEstimator or str):
+        model (ml_base.BaseEstimator or str):
             The model to use for text generation.
         data (bigframes.pandas.DataFrame or bigframes.pandas.Series):
             The data to generate text for. If a Series is provided, it is
@@ -591,7 +592,7 @@ def generate_text(
         FROM AI.GENERATE_TEXT(
             MODEL `{model_name}`,
             ({table_sql}),
-            {bigframes.core.sql.literals.struct_literal(struct_fields)}
+            {sg_sql.to_sql(sg_sql.literal(struct_fields))}
         )
     """
 
@@ -603,7 +604,7 @@ def generate_text(
 
 @log_adapter.method_logger(custom_base_name="bigquery_ai")
 def generate_table(
-    model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
+    model: Union[ml_base.BaseEstimator, str, pd.Series],
     data: Union[dataframe.DataFrame, series.Series, pd.DataFrame, pd.Series],
     *,
     output_schema: Union[str, Mapping[str, str]],
@@ -635,7 +636,7 @@ def generate_table(
         ... ) # doctest: +SKIP
 
     Args:
-        model (bigframes.ml.base.BaseEstimator or str):
+        model (ml_base.BaseEstimator or str):
             The model to use for table generation.
         data (bigframes.pandas.DataFrame or bigframes.pandas.Series):
             The data to generate table for. If a Series is provided, it is
@@ -677,9 +678,7 @@ def generate_table(
     else:
         output_schema_str = output_schema
 
-    struct_fields_bq: Dict[str, bigframes.core.sql.literals.STRUCT_VALUES] = {
-        "output_schema": output_schema_str
-    }
+    struct_fields_bq: Dict[str, Any] = {"output_schema": output_schema_str}
     if temperature is not None:
         struct_fields_bq["temperature"] = temperature
     if top_p is not None:
@@ -691,7 +690,7 @@ def generate_table(
     if request_type is not None:
         struct_fields_bq["request_type"] = request_type
 
-    struct_sql = bigframes.core.sql.literals.struct_literal(struct_fields_bq)
+    struct_sql = sg_sql.to_sql(sg_sql.literal(struct_fields_bq))
     query = f"""
         SELECT *
         FROM AI.GENERATE_TABLE(
 
@@ -0,0 +1,53 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from bigframes import dtypes
+from bigframes import operations as ops
+import bigframes.core.col
+import bigframes.core.expression
+
+
+def rand() -> bigframes.core.col.Expression:
+    """
+    Generates a pseudo-random value of type FLOAT64 in the range of [0, 1),
+    inclusive of 0 and exclusive of 1.
+
+    .. warning::
+        This method introduces non-determinism to the expression. Reading the
+        same column twice may result in different results. The value might
+        change. Do not use this value or any value derived from it as a join
+        key.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> df = bpd.DataFrame({"a": [1, 2, 3]})
+        >>> df['random'] = bbq.rand()
+        >>> # Resulting column 'random' will contain random floats between 0 and 1.
+
+    Returns:
+        bigframes.pandas.api.typing.Expression:
+            An expression that can be used in
+            :func:`~bigframes.pandas.DataFrame.assign` and other methods.  See
+            :func:`bigframes.pandas.col`.
+    """
+    op = ops.SqlScalarOp(
+        _output_type=dtypes.FLOAT_DTYPE,
+        sql_template="RAND()",
+        is_deterministic=False,
+    )
+    return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(op, ()))
@@ -330,22 +330,12 @@ def top_down(
         """
         Perform a top-down transformation of the BigFrameNode tree.
         """
-        to_process = [self]
-        results: Dict[BigFrameNode, BigFrameNode] = {}
 
-        while to_process:
-            item = to_process.pop()
-            if item not in results.keys():
-                item_result = transform(item)
-                results[item] = item_result
-                to_process.extend(item_result.child_nodes)
+        @functools.cache
+        def recursive_transform(node: BigFrameNode) -> BigFrameNode:
+            return transform(node).transform_children(recursive_transform)
 
-        to_process = [self]
-        # for each processed item, replace its children
-        for item in reversed(list(results.keys())):
-            results[item] = results[item].transform_children(lambda x: results[x])
-
-        return results[self]
+        return recursive_transform(self)
 
     def bottom_up(
         self: BigFrameNode,
 
@@ -62,6 +62,8 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
     if request.sort_rows:
         result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node))
         encoded_type_refs = data_type_logger.encode_type_refs(result_node)
+        # TODO: Extract CTEs earlier
+        result_node = typing.cast(nodes.ResultNode, rewrite.extract_ctes(result_node))
         sql = _compile_result_node(result_node)
         return configs.CompileResult(
             sql,
@@ -74,6 +76,8 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
     result_node = dataclasses.replace(result_node, order_by=None)
     result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node))
     encoded_type_refs = data_type_logger.encode_type_refs(result_node)
+    # TODO: Extract CTEs earlier
+    result_node = typing.cast(nodes.ResultNode, rewrite.extract_ctes(result_node))
     sql = _compile_result_node(result_node)
     # Return the ordering iff no extra columns are needed to define the row order
     if ordering is not None:
@@ -94,6 +98,7 @@ def _remap_variables(
     result_node, _ = rewrite.remap_variables(
         node, map(identifiers.ColumnId, uid_gen.get_uid_stream("bfcol_"))
     )
+    result_node.validate_tree()
     return typing.cast(nodes.ResultNode, result_node)
 
 
@@ -102,13 +107,16 @@ def _compile_result_node(root: nodes.ResultNode) -> str:
     # of nodes using the same generator.
     uid_gen = guid.SequentialUIDGenerator()
     root = _remap_variables(root, uid_gen)
+    # Remap variables creates too mayn new
+    # root = rewrite.select_pullup(root, prefer_source_names=False)
     root = typing.cast(nodes.ResultNode, rewrite.defer_selection(root))
 
     # Have to bind schema as the final step before compilation.
     # Probably, should defer even further
     root = typing.cast(nodes.ResultNode, schema_binding.bind_schema_to_tree(root))
 
-    sqlglot_ir_obj = compile_node(rewrite.as_sql_nodes(root), uid_gen)
+    # TODO: Bake all IDs in tree, stop passing uid_gen to emitters
+    sqlglot_ir_obj = compile_node(rewrite.as_sql_nodes(root, uid_gen), uid_gen)
     return sqlglot_ir_obj.sql
 
 
@@ -121,7 +129,7 @@ def compile_node(
     for current_node in list(node.iter_nodes_topo()):
         if current_node.child_nodes == ():
             # For leaf node, generates a dumpy child to pass the UID generator.
-            child_results = tuple([sqlglot_ir.SQLGlotIR(uid_gen=uid_gen)])
+            child_results = tuple([sqlglot_ir.SQLGlotIR.empty(uid_gen=uid_gen)])
         else:
             # Child nodes should have been compiled in the reverse topological order.
             child_results = tuple(
@@ -256,6 +264,23 @@ def compile_isin_join(
     )
 
 
+@_compile_node.register
+def compile_cte_ref_node(node: sql_nodes.SqlCteRefNode, child: sqlglot_ir.SQLGlotIR):
+    return sqlglot_ir.SQLGlotIR.from_cte_ref(
+        node.cte_name,
+        uid_gen=child.uid_gen,
+    )
+
+
+@_compile_node.register
+def compile_with_ctes_node(
+    node: sql_nodes.SqlWithCtesNode,
+    child: sqlglot_ir.SQLGlotIR,
+    *ctes: sqlglot_ir.SQLGlotIR,
+):
+    return child.with_ctes(tuple(zip(node.cte_names, ctes)))
+
+
 @_compile_node.register
 def compile_concat(
     node: nodes.ConcatNode, *children: sqlglot_ir.SQLGlotIR
@@ -271,7 +296,7 @@ def compile_concat(
     ]
 
     return sqlglot_ir.SQLGlotIR.from_union(
-        [child._as_select() for child in children],
+        [child.expr.as_select_all() for child in children],
         output_aliases=output_aliases,
         uid_gen=uid_gen,
     )
 
@@ -33,27 +33,39 @@
 @register_unary_op(ops.IsInOp, pass_op=True)
 def _(expr: TypedExpr, op: ops.IsInOp) -> sge.Expression:
     values = []
+    # bools are not comparable to non-bools in SQL, so we need to cast the expression to INT64 if the values contain non-bools.
+    must_upcast_bools = dtypes.is_numeric(expr.dtype, include_bool=False) or any(
+        dtypes.is_numeric(dtypes.bigframes_type(type(value)), include_bool=False)
+        for value in op.values
+        if not _is_null(value)
+    )
     for value in op.values:
         if _is_null(value):
             continue
         dtype = dtypes.bigframes_type(type(value))
         if dtypes.can_compare(expr.dtype, dtype):
+            if must_upcast_bools and dtype == dtypes.BOOL_DTYPE:
+                value = int(value)
             values.append(sge.convert(value))
 
+    sg_lexpr: sge.Expression = expr.expr
+    if expr.dtype == dtypes.BOOL_DTYPE and must_upcast_bools:
+        sg_lexpr = sge.cast(expr.expr, "INT64")
+
     if op.match_nulls:
         contains_nulls = any(_is_null(value) for value in op.values)
         if contains_nulls:
             if len(values) == 0:
-                return sge.Is(this=expr.expr, expression=sge.Null())
-            return sge.Is(this=expr.expr, expression=sge.Null()) | sge.In(
-                this=expr.expr, expressions=values
+                return sge.Is(this=sg_lexpr, expression=sge.Null())
+            return sge.Is(this=sg_lexpr, expression=sge.Null()) | sge.In(
+                this=sg_lexpr, expressions=values
             )
 
     if len(values) == 0:
         return sge.convert(False)
 
     return sge.func(
-        "COALESCE", sge.In(this=expr.expr, expressions=values), sge.convert(False)
+        "COALESCE", sge.In(this=sg_lexpr, expressions=values), sge.convert(False)
     )