fix: avoid views when querying BigLake tables from SQL cells

tswast · tswast · commit 46c9d4827c41 · 2026-03-31T19:17:04.000Z
diff --git a/bigframes/core/compile/ibis_compiler/ibis_compiler.py b/bigframes/core/compile/ibis_compiler/ibis_compiler.py
@@ -221,18 +221,15 @@ def _table_to_ibis(
     physical_schema = ibis_bigquery.BigQuerySchema.to_ibis(
         list(source.table.physical_schema)
     )
-    if source.at_time is not None or source.sql_predicate is not None:
-        import bigframes.session._io.bigquery
-
-        sql = bigframes.session._io.bigquery.to_query(
-            full_table_name,
-            columns=scan_cols,
-            sql_predicate=source.sql_predicate,
-            time_travel_timestamp=source.at_time,
-        )
-        return ibis_bigquery.Backend().sql(schema=physical_schema, query=sql)
-    else:
-        return ibis_api.table(physical_schema, full_table_name).select(scan_cols)
+    import bigframes.session._io.bigquery
+
+    sql = bigframes.session._io.bigquery.to_query(
+        full_table_name,
+        columns=scan_cols,
+        sql_predicate=source.sql_predicate,
+        time_travel_timestamp=source.at_time,
+    )
+    return ibis_bigquery.Backend().sql(schema=physical_schema, query=sql)
 
 
 @_compile_node.register
diff --git a/bigframes/core/pyformat.py b/bigframes/core/pyformat.py
@@ -21,7 +21,7 @@
 
 import string
 import typing
-from typing import Any, Optional, Union
+from typing import Any, Optional, Tuple, Union
 
 import google.cloud.bigquery
 import pandas
@@ -39,7 +39,11 @@
 
 
 def _table_to_sql(table: _BQ_TABLE_TYPES) -> str:
-    return f"`{table.project}`.`{table.dataset_id}`.`{table.table_id}`"
+    # BiglakeIcebergTable IDs have 4 parts. BigFrames packs catalog.namespace
+    # into the dataset_id.
+    dataset_parts = table.dataset_id.split(".")
+    dataset_sql = ".".join(f"`{part}`" for part in dataset_parts)
+    return f"`{table.project}`.{dataset_sql}.`{table.table_id}`"
 
 
 def _pandas_df_to_sql_dry_run(pd_df: pandas.DataFrame) -> str:
@@ -102,6 +106,24 @@ def _field_to_template_value(
         return _pandas_df_to_sql(value, session=session, dry_run=dry_run, name=name)
 
     if isinstance(value, bigframes.dataframe.DataFrame):
+        import bigframes.core.bq_data as bq_data
+        import bigframes.core.nodes as nodes
+
+        # TODO(b/493608478): Remove this workaround for BigLake/Iceberg tables,
+        # which cannot currently be used in views, once a fix rolls out.
+        def is_biglake(
+            node: nodes.BigFrameNode, child_results: Tuple[bool, ...]
+        ) -> bool:
+            if isinstance(node, nodes.ReadTableNode):
+                return isinstance(node.source.table, bq_data.BiglakeIcebergTable)
+            return any(child_results)
+
+        contains_biglake = value._block.expr.node.reduce_up(is_biglake)
+
+        if contains_biglake:
+            sql_query, _, _ = value._to_sql_query(include_index=False)
+            return f"({sql_query})"
+
         return _table_to_sql(value._to_placeholder_table(dry_run=dry_run))
 
     if isinstance(value, str):
diff --git a/bigframes/session/_io/bigquery/__init__.py b/bigframes/session/_io/bigquery/__init__.py
@@ -519,9 +519,13 @@ def to_query(
     time_travel_timestamp: Optional[datetime.datetime] = None,
 ) -> str:
     """Compile query_or_table with conditions(filters, wildcards) to query."""
-    sub_query = (
-        f"({query_or_table})" if is_query(query_or_table) else f"`{query_or_table}`"
-    )
+    if is_query(query_or_table):
+        sub_query = f"({query_or_table})"
+    else:
+        # Table ID can have 1, 2, 3, or 4 parts. Quoting all parts to be safe.
+        # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#identifiers
+        parts = query_or_table.split(".")
+        sub_query = ".".join(f"`{part}`" for part in parts)
 
     # TODO(b/338111344): Generate an index based on DefaultIndexKind if we
     # don't have index columns specified.
diff --git a/tests/unit/core/test_pyformat.py b/tests/unit/core/test_pyformat.py
@@ -500,6 +500,15 @@ def test_pyformat_with_query_string_replaces_variables(session):
             ),
             "SELECT * FROM `ListedProject`.`ListedDataset`.`ListedTable`",
         ),
+        (
+            google.cloud.bigquery.TableReference(
+                google.cloud.bigquery.DatasetReference(
+                    "my-project", "my-catalog.my-namespace"
+                ),
+                "my-table",
+            ),
+            "SELECT * FROM `my-project`.`my-catalog`.`my-namespace`.`my-table`",
+        ),
     ),
 )
 def test_pyformat_with_table_replaces_variables(table, expected_sql, session=session):
@@ -511,3 +520,48 @@ def test_pyformat_with_table_replaces_variables(table, expected_sql, session=ses
     sql = "SELECT * FROM {table}"
     got_sql = pyformat.pyformat(sql, pyformat_args=pyformat_args, session=session)
     assert got_sql == expected_sql
+
+
+def test_pyformat_with_bigframes_dataframe_biglake_table(session):
+    # Create a real BigFrames DataFrame that points to a BigLake table.
+    import bigframes.core.array_value as array_value
+    import bigframes.core.blocks as blocks
+    import bigframes.core.bq_data as bq_data
+    import bigframes.dataframe
+
+    # Define the BigLake table
+    project_id = "my-project"
+    catalog_id = "my-catalog"
+    namespace_id = "my-namespace"
+    table_id = "my-table"
+    schema = (google.cloud.bigquery.SchemaField("col", "INTEGER"),)
+
+    biglake_table = bq_data.BiglakeIcebergTable(
+        project_id=project_id,
+        catalog_id=catalog_id,
+        namespace_id=namespace_id,
+        table_id=table_id,
+        physical_schema=schema,
+        cluster_cols=(),
+        metadata=bq_data.TableMetadata(
+            location=bq_data.BigQueryRegion("us-central1"),
+            type="TABLE",
+        ),
+    )
+
+    # ArrayValue.from_table is what read_gbq uses.
+    av = array_value.ArrayValue.from_table(biglake_table, session)
+    block = blocks.Block(av, index_columns=[], column_labels=["col"])
+    df = bigframes.dataframe.DataFrame(block)
+
+    pyformat_args = {"df": df}
+    sql = "SELECT * FROM {df}"
+
+    got_sql = pyformat.pyformat(sql, pyformat_args=pyformat_args, session=session)
+
+    # For BigLake, we now expect a SUBQUERY, not a view reference.
+    # The subquery should have correctly quoted 4-part ID.
+    assert "SELECT" in got_sql
+    assert "FROM `my-project`.`my-catalog`.`my-namespace`.`my-table`" in got_sql
+    assert got_sql.startswith("SELECT * FROM (SELECT")
+    assert got_sql.endswith(")")