Skip to content

Commit 30f0a2b

Browse files
committed
refactor: Address review comments from Trevor
1 parent df8dbcd commit 30f0a2b

File tree

2 files changed

+42
-41
lines changed

2 files changed

+42
-41
lines changed

packages/bigframes/bigframes/session/__init__.py

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -109,35 +109,44 @@
109109
logger = logging.getLogger(__name__)
110110

111111

112-
class _ExecutionHistory(pandas.DataFrame):
113-
@property
114-
def _constructor(self):
115-
return _ExecutionHistory
112+
class _ExecutionHistory:
113+
def __init__(self, jobs: list[dict]):
114+
self._df = pandas.DataFrame(jobs)
115+
116+
def to_dataframe(self) -> pandas.DataFrame:
117+
"""Returns the execution history as a pandas DataFrame."""
118+
return self._df
116119

117120
def _repr_html_(self) -> str | None:
118121
import bigframes.formatting_helpers as formatter
119122

120-
if self.empty:
123+
if self._df.empty:
121124
return "<div>No executions found.</div>"
122125

123-
cols = ["job_id", "status", "total_bytes_processed", "job_url"]
126+
cols = ["job_type", "job_id", "status", "total_bytes_processed", "job_url"]
127+
128+
# Filter columns to only those that exist in the dataframe
129+
available_cols = [c for c in cols if c in self._df.columns]
124130

125131
def format_url(url):
126132
return f'<a target="_blank" href="{url}">Open Job</a>' if url else ""
127133

128134
try:
129-
df_display = self[cols].copy()
130-
df_display["total_bytes_processed"] = df_display[
131-
"total_bytes_processed"
132-
].apply(formatter.get_formatted_bytes)
133-
df_display["job_url"] = df_display["job_url"].apply(format_url)
135+
df_display = self._df[available_cols].copy()
136+
if "total_bytes_processed" in df_display.columns:
137+
df_display["total_bytes_processed"] = df_display[
138+
"total_bytes_processed"
139+
].apply(formatter.get_formatted_bytes)
140+
if "job_url" in df_display.columns:
141+
df_display["job_url"] = df_display["job_url"].apply(format_url)
134142

135143
# Rename job_id to query_id to match user expectations
136-
df_display = df_display.rename(columns={"job_id": "query_id"})
144+
if "job_id" in df_display.columns:
145+
df_display = df_display.rename(columns={"job_id": "query_id"})
137146

138147
return df_display.to_html(escape=False, index=False)
139148
except Exception:
140-
return super()._repr_html_() # type: ignore
149+
return self._df._repr_html_()
141150

142151

143152
@log_adapter.class_logger
@@ -403,8 +412,11 @@ def slot_millis_sum(self):
403412
"""The sum of all slot time used by bigquery jobs in this session."""
404413
return self._metrics.slot_millis
405414

406-
def execution_history(self) -> pandas.DataFrame:
407-
"""Returns a list of underlying BigQuery executions initiated by BigFrames in the current session."""
415+
def execution_history(self) -> _ExecutionHistory:
416+
"""Returns the history of executions initiated by BigFrames in the current session.
417+
418+
Use `.to_dataframe()` on the result to get a pandas DataFrame.
419+
"""
408420
return _ExecutionHistory([job.__dict__ for job in self._metrics.jobs])
409421

410422
@property
@@ -468,8 +480,7 @@ def read_gbq( # type: ignore[overload-overlap]
468480
col_order: Iterable[str] = ...,
469481
dry_run: Literal[False] = ...,
470482
allow_large_results: Optional[bool] = ...,
471-
) -> dataframe.DataFrame:
472-
...
483+
) -> dataframe.DataFrame: ...
473484

474485
@overload
475486
def read_gbq(
@@ -485,8 +496,7 @@ def read_gbq(
485496
col_order: Iterable[str] = ...,
486497
dry_run: Literal[True] = ...,
487498
allow_large_results: Optional[bool] = ...,
488-
) -> pandas.Series:
489-
...
499+
) -> pandas.Series: ...
490500

491501
def read_gbq(
492502
self,
@@ -558,8 +568,7 @@ def _read_gbq_colab(
558568
*,
559569
pyformat_args: Optional[Dict[str, Any]] = None,
560570
dry_run: Literal[False] = ...,
561-
) -> dataframe.DataFrame:
562-
...
571+
) -> dataframe.DataFrame: ...
563572

564573
@overload
565574
def _read_gbq_colab(
@@ -568,8 +577,7 @@ def _read_gbq_colab(
568577
*,
569578
pyformat_args: Optional[Dict[str, Any]] = None,
570579
dry_run: Literal[True] = ...,
571-
) -> pandas.Series:
572-
...
580+
) -> pandas.Series: ...
573581

574582
@log_adapter.log_name_override("read_gbq_colab")
575583
def _read_gbq_colab(
@@ -630,8 +638,7 @@ def read_gbq_query( # type: ignore[overload-overlap]
630638
filters: third_party_pandas_gbq.FiltersType = ...,
631639
dry_run: Literal[False] = ...,
632640
allow_large_results: Optional[bool] = ...,
633-
) -> dataframe.DataFrame:
634-
...
641+
) -> dataframe.DataFrame: ...
635642

636643
@overload
637644
def read_gbq_query(
@@ -647,8 +654,7 @@ def read_gbq_query(
647654
filters: third_party_pandas_gbq.FiltersType = ...,
648655
dry_run: Literal[True] = ...,
649656
allow_large_results: Optional[bool] = ...,
650-
) -> pandas.Series:
651-
...
657+
) -> pandas.Series: ...
652658

653659
def read_gbq_query(
654660
self,
@@ -795,8 +801,7 @@ def read_gbq_table( # type: ignore[overload-overlap]
795801
use_cache: bool = ...,
796802
col_order: Iterable[str] = ...,
797803
dry_run: Literal[False] = ...,
798-
) -> dataframe.DataFrame:
799-
...
804+
) -> dataframe.DataFrame: ...
800805

801806
@overload
802807
def read_gbq_table(
@@ -810,8 +815,7 @@ def read_gbq_table(
810815
use_cache: bool = ...,
811816
col_order: Iterable[str] = ...,
812817
dry_run: Literal[True] = ...,
813-
) -> pandas.Series:
814-
...
818+
) -> pandas.Series: ...
815819

816820
def read_gbq_table(
817821
self,
@@ -962,26 +966,23 @@ def read_pandas(
962966
pandas_dataframe: pandas.Index,
963967
*,
964968
write_engine: constants.WriteEngineType = "default",
965-
) -> bigframes.core.indexes.Index:
966-
...
969+
) -> bigframes.core.indexes.Index: ...
967970

968971
@typing.overload
969972
def read_pandas(
970973
self,
971974
pandas_dataframe: pandas.Series,
972975
*,
973976
write_engine: constants.WriteEngineType = "default",
974-
) -> bigframes.series.Series:
975-
...
977+
) -> bigframes.series.Series: ...
976978

977979
@typing.overload
978980
def read_pandas(
979981
self,
980982
pandas_dataframe: pandas.DataFrame,
981983
*,
982984
write_engine: constants.WriteEngineType = "default",
983-
) -> dataframe.DataFrame:
984-
...
985+
) -> dataframe.DataFrame: ...
985986

986987
def read_pandas(
987988
self,

packages/bigframes/tests/system/small/test_polars_execution.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def test_polar_execution_sorted(session_w_polars, scalars_pandas_df_index):
3939
]
4040
bf_result = bf_df.sort_index(ascending=False)[["int64_too", "bool_col"]].to_pandas()
4141

42-
assert session_w_polars._metrics.execution_count == execution_count_before
42+
assert session_w_polars._metrics.execution_count == execution_count_before + 1
4343
assert_frame_equal(bf_result, pd_result)
4444

4545

@@ -56,7 +56,7 @@ def test_polar_execution_sorted_filtered(session_w_polars, scalars_pandas_df_ind
5656
.to_pandas()
5757
)
5858

59-
assert session_w_polars._metrics.execution_count == execution_count_before
59+
assert session_w_polars._metrics.execution_count == execution_count_before + 1
6060
assert_frame_equal(bf_result, pd_result)
6161

6262

@@ -70,7 +70,7 @@ def test_polar_execution_unsupported_sql_fallback(
7070
bf_result = bf_df.to_pandas()
7171

7272
# geo fns not supported by polar engine yet, so falls back to bq execution
73-
assert session_w_polars._metrics.execution_count == (execution_count_before + 1)
73+
assert session_w_polars._metrics.execution_count == (execution_count_before + 2)
7474
assert math.isclose(bf_result.geo_area.sum(), 70.52332050, rel_tol=0.00001)
7575

7676

@@ -87,7 +87,7 @@ def test_polars_execution_history(session_w_polars):
8787
_ = df.to_pandas()
8888

8989
# Verify the execution history captured the local job
90-
history = session_w_polars.execution_history()
90+
history = session_w_polars.execution_history().to_dataframe()
9191

9292
# Verify we have at least one job and logged as polars
9393
assert len(history) > 0

0 commit comments

Comments
 (0)