Skip to content

Commit 35a379d

Browse files
committed
Implement execution history
1 parent 8e92bae commit 35a379d

File tree

8 files changed

+314
-38
lines changed

8 files changed

+314
-38
lines changed

packages/bigframes/bigframes/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from bigframes._config.bigquery_options import BigQueryOptions # noqa: E402
4646
from bigframes.core.global_session import ( # noqa: E402
4747
close_session,
48+
execution_history,
4849
get_global_session,
4950
)
5051
from bigframes.session import Session, connect # noqa: E402
@@ -69,6 +70,7 @@ def load_ipython_extension(ipython):
6970
"BigQueryOptions",
7071
"get_global_session",
7172
"close_session",
73+
"execution_history",
7274
"enums",
7375
"exceptions",
7476
"connect",

packages/bigframes/bigframes/core/global_session.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import bigframes.exceptions as bfe
2727

2828
if TYPE_CHECKING:
29+
import pandas
2930
import bigframes.session
3031

3132
_global_session: Optional[bigframes.session.Session] = None
@@ -124,6 +125,14 @@ def with_default_session(func_: Callable[..., _T], *args, **kwargs) -> _T:
124125
return func_(get_global_session(), *args, **kwargs)
125126

126127

128+
def execution_history() -> "pandas.DataFrame":
129+
import pandas # noqa: F401
130+
131+
import bigframes.session
132+
133+
return with_default_session(bigframes.session.Session.execution_history)
134+
135+
127136
class _GlobalSessionContext:
128137
"""
129138
Context manager for testing that sets global session.

packages/bigframes/bigframes/session/__init__.py

Lines changed: 60 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,39 @@
109109
logger = logging.getLogger(__name__)
110110

111111

112+
class _ExecutionHistory(pandas.DataFrame):
113+
@property
114+
def _constructor(self):
115+
return _ExecutionHistory
116+
117+
def _repr_html_(self) -> str | None:
118+
try:
119+
import bigframes.formatting_helpers as formatter
120+
121+
if self.empty:
122+
return "<div>No executions found.</div>"
123+
124+
cols = ["job_id", "status", "total_bytes_processed", "job_url"]
125+
df_display = self[cols].copy()
126+
df_display["total_bytes_processed"] = df_display[
127+
"total_bytes_processed"
128+
].apply(formatter.get_formatted_bytes)
129+
130+
def format_url(url):
131+
return f'<a target="_blank" href="{url}">Open Job</a>' if url else ""
132+
133+
df_display["job_url"] = df_display["job_url"].apply(format_url)
134+
135+
# Rename job_id to query_id to match user expectations
136+
df_display = df_display.rename(columns={"job_id": "query_id"})
137+
138+
compact_html = df_display.to_html(escape=False, index=False)
139+
140+
return compact_html
141+
except Exception:
142+
return super()._repr_html_() # type: ignore
143+
144+
112145
@log_adapter.class_logger
113146
class Session(
114147
third_party_pandas_gbq.GBQIOMixin,
@@ -233,6 +266,7 @@ def __init__(
233266
)
234267

235268
self._metrics = metrics.ExecutionMetrics()
269+
self._publisher.subscribe(self._metrics.on_event)
236270
self._function_session = bff_session.FunctionSession()
237271
self._anon_dataset_manager = anonymous_dataset.AnonymousDatasetManager(
238272
self._clients_provider.bqclient,
@@ -371,6 +405,10 @@ def slot_millis_sum(self):
371405
"""The sum of all slot time used by bigquery jobs in this session."""
372406
return self._metrics.slot_millis
373407

408+
def execution_history(self) -> pandas.DataFrame:
409+
"""Returns a list of underlying BigQuery executions initiated by BigFrames in the current session."""
410+
return _ExecutionHistory([job.__dict__ for job in self._metrics.jobs])
411+
374412
@property
375413
def _allows_ambiguity(self) -> bool:
376414
return self._allow_ambiguity
@@ -432,7 +470,8 @@ def read_gbq( # type: ignore[overload-overlap]
432470
col_order: Iterable[str] = ...,
433471
dry_run: Literal[False] = ...,
434472
allow_large_results: Optional[bool] = ...,
435-
) -> dataframe.DataFrame: ...
473+
) -> dataframe.DataFrame:
474+
...
436475

437476
@overload
438477
def read_gbq(
@@ -448,7 +487,8 @@ def read_gbq(
448487
col_order: Iterable[str] = ...,
449488
dry_run: Literal[True] = ...,
450489
allow_large_results: Optional[bool] = ...,
451-
) -> pandas.Series: ...
490+
) -> pandas.Series:
491+
...
452492

453493
def read_gbq(
454494
self,
@@ -520,7 +560,8 @@ def _read_gbq_colab(
520560
*,
521561
pyformat_args: Optional[Dict[str, Any]] = None,
522562
dry_run: Literal[False] = ...,
523-
) -> dataframe.DataFrame: ...
563+
) -> dataframe.DataFrame:
564+
...
524565

525566
@overload
526567
def _read_gbq_colab(
@@ -529,7 +570,8 @@ def _read_gbq_colab(
529570
*,
530571
pyformat_args: Optional[Dict[str, Any]] = None,
531572
dry_run: Literal[True] = ...,
532-
) -> pandas.Series: ...
573+
) -> pandas.Series:
574+
...
533575

534576
@log_adapter.log_name_override("read_gbq_colab")
535577
def _read_gbq_colab(
@@ -590,7 +632,8 @@ def read_gbq_query( # type: ignore[overload-overlap]
590632
filters: third_party_pandas_gbq.FiltersType = ...,
591633
dry_run: Literal[False] = ...,
592634
allow_large_results: Optional[bool] = ...,
593-
) -> dataframe.DataFrame: ...
635+
) -> dataframe.DataFrame:
636+
...
594637

595638
@overload
596639
def read_gbq_query(
@@ -606,7 +649,8 @@ def read_gbq_query(
606649
filters: third_party_pandas_gbq.FiltersType = ...,
607650
dry_run: Literal[True] = ...,
608651
allow_large_results: Optional[bool] = ...,
609-
) -> pandas.Series: ...
652+
) -> pandas.Series:
653+
...
610654

611655
def read_gbq_query(
612656
self,
@@ -753,7 +797,8 @@ def read_gbq_table( # type: ignore[overload-overlap]
753797
use_cache: bool = ...,
754798
col_order: Iterable[str] = ...,
755799
dry_run: Literal[False] = ...,
756-
) -> dataframe.DataFrame: ...
800+
) -> dataframe.DataFrame:
801+
...
757802

758803
@overload
759804
def read_gbq_table(
@@ -767,7 +812,8 @@ def read_gbq_table(
767812
use_cache: bool = ...,
768813
col_order: Iterable[str] = ...,
769814
dry_run: Literal[True] = ...,
770-
) -> pandas.Series: ...
815+
) -> pandas.Series:
816+
...
771817

772818
def read_gbq_table(
773819
self,
@@ -918,23 +964,26 @@ def read_pandas(
918964
pandas_dataframe: pandas.Index,
919965
*,
920966
write_engine: constants.WriteEngineType = "default",
921-
) -> bigframes.core.indexes.Index: ...
967+
) -> bigframes.core.indexes.Index:
968+
...
922969

923970
@typing.overload
924971
def read_pandas(
925972
self,
926973
pandas_dataframe: pandas.Series,
927974
*,
928975
write_engine: constants.WriteEngineType = "default",
929-
) -> bigframes.series.Series: ...
976+
) -> bigframes.series.Series:
977+
...
930978

931979
@typing.overload
932980
def read_pandas(
933981
self,
934982
pandas_dataframe: pandas.DataFrame,
935983
*,
936984
write_engine: constants.WriteEngineType = "default",
937-
) -> dataframe.DataFrame: ...
985+
) -> dataframe.DataFrame:
986+
...
938987

939988
def read_pandas(
940989
self,

packages/bigframes/bigframes/session/loader.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
import google.cloud.bigquery
5050
import google.cloud.bigquery as bigquery
5151
import google.cloud.bigquery.table
52+
from google.cloud.bigquery.job.load import LoadJob
53+
from google.cloud.bigquery.job.query import QueryJob
5254
import pandas
5355
import pyarrow as pa
5456
from google.cloud import bigquery_storage_v1
@@ -605,6 +607,9 @@ def _start_generic_job(self, job: formatting_helpers.GenericJob):
605607
else:
606608
job.result()
607609

610+
if self._metrics is not None and isinstance(job, (QueryJob, LoadJob)):
611+
self._metrics.count_job_stats(query_job=job)
612+
608613
@overload
609614
def read_gbq_table( # type: ignore[overload-overlap]
610615
self,
@@ -626,7 +631,8 @@ def read_gbq_table( # type: ignore[overload-overlap]
626631
n_rows: Optional[int] = None,
627632
index_col_in_columns: bool = False,
628633
publish_execution: bool = True,
629-
) -> dataframe.DataFrame: ...
634+
) -> dataframe.DataFrame:
635+
...
630636

631637
@overload
632638
def read_gbq_table(
@@ -649,7 +655,8 @@ def read_gbq_table(
649655
n_rows: Optional[int] = None,
650656
index_col_in_columns: bool = False,
651657
publish_execution: bool = True,
652-
) -> pandas.Series: ...
658+
) -> pandas.Series:
659+
...
653660

654661
def read_gbq_table(
655662
self,
@@ -1133,7 +1140,8 @@ def read_gbq_query( # type: ignore[overload-overlap]
11331140
dry_run: Literal[False] = ...,
11341141
force_total_order: Optional[bool] = ...,
11351142
allow_large_results: bool,
1136-
) -> dataframe.DataFrame: ...
1143+
) -> dataframe.DataFrame:
1144+
...
11371145

11381146
@overload
11391147
def read_gbq_query(
@@ -1149,7 +1157,8 @@ def read_gbq_query(
11491157
dry_run: Literal[True] = ...,
11501158
force_total_order: Optional[bool] = ...,
11511159
allow_large_results: bool,
1152-
) -> pandas.Series: ...
1160+
) -> pandas.Series:
1161+
...
11531162

11541163
def read_gbq_query(
11551164
self,

0 commit comments

Comments
 (0)