chore: investigate b/440407542

chelsea-lin · chelsea-lin · commit 553368ee4119 · 2025-09-08T17:23:25.000Z
diff --git a/bigframes/bigquery/_operations/search.py b/bigframes/bigquery/_operations/search.py
@@ -87,7 +87,8 @@ def create_vector_index(
 
     read_gbq_query(sql)
 
-
+import bigframes.perf_inspect as perf_inspect
+@perf_inspect.runtime_logger
 def vector_search(
     base_table: str,
     column_to_search: str,
@@ -246,5 +247,4 @@ def vector_search(
         df.index.names = index_labels
     else:
         df = query._session.read_gbq_query(sql, allow_large_results=allow_large_results)
-
     return df
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -41,6 +41,7 @@
     Union,
 )
 import warnings
+import bigframes.perf_inspect as perf_inspect
 
 import bigframes_vendored.constants as constants
 import google.cloud.bigquery as bigquery
@@ -940,6 +941,8 @@ def split(
         ]
         return [sliced_block.drop_columns(drop_cols) for sliced_block in sliced_blocks]
 
+    
+    @perf_inspect.runtime_logger
     def _compute_dry_run(
         self,
         value_keys: Optional[Iterable[str]] = None,
@@ -1629,6 +1632,7 @@ def slice(
     # Using cache to optimize for Jupyter Notebook's behavior where both '__repr__'
     # and '__repr_html__' are called in a single display action, reducing redundant
     # queries.
+    @perf_inspect.runtime_logger
     @functools.cache
     def retrieve_repr_request_results(
         self, max_results: int
@@ -1646,22 +1650,29 @@ def retrieve_repr_request_results(
             array_value=self.expr,
             config=executors.CacheConfig(optimize_for="head", if_cached="reuse-strict"),
         )
+        import time
+        start_time = time.monotonic()
         head_result = self.session._executor.execute(
             self.expr.slice(start=None, stop=max_results, step=None),
             execution_spec.ExecutionSpec(
                 promise_under_10gb=True,
                 ordered=True,
             ),
         )
+        print("Time taken to execute head: {:.2f} seconds".format(time.monotonic() - start_time))
+        start_time = time.monotonic()
         row_count = self.session._executor.execute(
             self.expr.row_count(),
             execution_spec.ExecutionSpec(
                 promise_under_10gb=True,
                 ordered=False,
             ),
         ).to_py_scalar()
+        print("Time taken to execute row_count: {:.2f} seconds".format(time.monotonic() - start_time))
 
+        start_time = time.monotonic()
         head_df = head_result.to_pandas()
+        print("Time taken to execute to_pandas: {:.2f} seconds".format(time.monotonic() - start_time))
         return self._copy_index_to_pandas(head_df), row_count, head_result.query_job
 
     def promote_offsets(self, label: Label = None) -> typing.Tuple[Block, str]:
diff --git a/bigframes/core/global_session.py b/bigframes/core/global_session.py
@@ -83,6 +83,8 @@ def close_session() -> None:
         bigframes._config.options.bigquery._session_started = False
 
 
+import bigframes.perf_inspect as perf_inspect
+@perf_inspect.runtime_logger
 def get_global_session():
     """Gets the global session.
 
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
@@ -54,6 +54,8 @@
 import pyarrow
 import tabulate
 
+import bigframes.perf_inspect as perf_inspect
+
 import bigframes._config.display_options as display_options
 import bigframes.constants
 import bigframes.core
@@ -116,6 +118,8 @@ class DataFrame(vendored_pandas_frame.DataFrame):
     # Must be above 5000 for pandas to delegate to bigframes for binops
     __pandas_priority__ = 15000
 
+    import bigframes.perf_inspect as perf_inspect
+    @perf_inspect.runtime_logger
     def __init__(
         self,
         data=None,
@@ -725,6 +729,7 @@ def __setattr__(self, key: str, value):
         else:
             object.__setattr__(self, key, value)
 
+    @perf_inspect.runtime_logger
     def __repr__(self) -> str:
         """Converts a DataFrame to a string. Calls to_pandas.
 
@@ -777,6 +782,7 @@ def __repr__(self) -> str:
         lines.append(f"[{row_count} rows x {column_count} columns]")
         return "\n".join(lines)
 
+    @perf_inspect.runtime_logger
     def _repr_html_(self) -> str:
         """
         Returns an html string primarily for use by notebooks for displaying
@@ -1703,6 +1709,9 @@ def to_pandas(
     ) -> pandas.Series:
         ...
 
+
+    import bigframes.perf_inspect as perf_inspect
+    @perf_inspect.runtime_logger
     def to_pandas(
         self,
         max_download_size: Optional[int] = None,
@@ -1887,6 +1896,7 @@ def to_pandas_batches(
             allow_large_results=allow_large_results,
         )
 
+    @perf_inspect.runtime_logger
     def _compute_dry_run(self) -> bigquery.QueryJob:
         _, query_job = self._block._compute_dry_run()
         return query_job
diff --git a/bigframes/formatting_helpers.py b/bigframes/formatting_helpers.py
@@ -90,7 +90,6 @@ def repr_query_job_html(query_job: Optional[bigquery.QueryJob]):
     table_html += "</table>"
     return widgets.HTML(table_html)
 
-
 def repr_query_job(query_job: Optional[bigquery.QueryJob]):
     """Return query job as a formatted string.
     Args:
@@ -118,7 +117,6 @@ def repr_query_job(query_job: Optional[bigquery.QueryJob]):
                 res += f"""{key}: {job_val}"""
     return res
 
-
 def wait_for_query_job(
     query_job: bigquery.QueryJob,
     max_results: Optional[int] = None,
diff --git a/bigframes/perf_inspect.py b/bigframes/perf_inspect.py
@@ -0,0 +1,41 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import functools
+import time
+
+global_counter = 0
+
+def runtime_logger(func):
+    """Decorator to log the runtime of a function."""
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        global global_counter
+        global_counter += 1
+        prefix = "--" * global_counter
+
+        start_time = time.monotonic()
+
+        print(f"|{prefix}{func.__qualname__} started at {start_time:.2f} seconds")
+        result = func(*args, **kwargs)
+        end_time = time.monotonic()
+        print(
+            f"|{prefix}{func.__qualname__} ended at {end_time:.2f} seconds. "
+            f"Runtime: {end_time - start_time:.2f} seconds"
+        )
+        global_counter -= 1
+        return result
+
+    return wrapper
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
@@ -130,6 +130,8 @@ class Session(
             An object providing client library objects.
     """
 
+    import bigframes.perf_inspect as perf_inspect
+    @perf_inspect.runtime_logger
     def __init__(
         self,
         context: Optional[bigquery_options.BigQueryOptions] = None,
@@ -143,7 +145,6 @@ def __init__(
 
         if context is None:
             context = bigquery_options.BigQueryOptions()
-
         if context.location is None:
             self._location = "US"
             msg = bfe.format_message(
@@ -184,7 +185,6 @@ def __init__(
                 client_endpoints_override=context.client_endpoints_override,
                 requests_transport_adapters=context.requests_transport_adapters,
             )
-
         # TODO(shobs): Remove this logic after https://github.com/ibis-project/ibis/issues/8494
         # has been fixed. The ibis client changes the default query job config
         # so we are going to remember the current config and restore it after
diff --git a/bigframes/session/bq_caching_executor.py b/bigframes/session/bq_caching_executor.py
@@ -51,6 +51,7 @@
 import bigframes.session.metrics
 import bigframes.session.planner
 import bigframes.session.temporary_storage
+import bigframes.perf_inspect as perf_inspect
 
 # Max complexity that should be executed as a single query
 QUERY_COMPLEXITY_LIMIT = 1e7
@@ -182,6 +183,8 @@ def to_sql(
         compiled = compile.compile_sql(compile.CompileRequest(node, sort_rows=ordered))
         return compiled.sql
 
+
+    @perf_inspect.runtime_logger
     def execute(
         self,
         array_value: bigframes.core.ArrayValue,
diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py
@@ -50,6 +50,8 @@
 _BIGQUERYSTORAGE_REGIONAL_ENDPOINT = "bigquerystorage.{location}.rep.googleapis.com"
 
 
+import bigframes.perf_inspect as perf_inspect
+@perf_inspect.runtime_logger
 def _get_default_credentials_with_project():
     return pydata_google_auth.default(scopes=_SCOPES, use_local_webserver=False)
 
@@ -72,6 +74,8 @@ def _get_application_names():
 class ClientsProvider:
     """Provides client instances necessary to perform cloud operations."""
 
+    import bigframes.perf_inspect as perf_inspect
+    @perf_inspect.runtime_logger
     def __init__(
         self,
         project: Optional[str] = None,
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
@@ -30,6 +30,7 @@
 import bigframes.core.schema
 import bigframes.session._io.pandas as io_pandas
 import bigframes.session.execution_spec as ex_spec
+import bigframes.perf_inspect as perf_inspect
 
 _ROW_LIMIT_EXCEEDED_TEMPLATE = (
     "Execution has downloaded {result_rows} rows so far, which exceeds the "
@@ -150,6 +151,7 @@ def to_sql(
         raise NotImplementedError("to_sql not implemented for this executor")
 
     @abc.abstractmethod
+    @perf_inspect.runtime_logger
     def execute(
         self,
         array_value: bigframes.core.ArrayValue,
diff --git a/notebooks/perf_inspect.ipynb b/notebooks/perf_inspect.ipynb