Skip to content

Commit 2398a67

Browse files
committed
test: fix execution count assertions
1 parent b669473 commit 2398a67

3 files changed

Lines changed: 112 additions & 36 deletions

File tree

packages/bigframes/tests/system/small/session/test_read_gbq_colab.py

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,20 @@ def test_read_gbq_colab_fresh_session_is_hybrid():
8989

9090
assert len(result) == 100
9191
assert session._executor._enable_polars_execution is True # type: ignore
92-
assert executions_after == executions_before_python == 1
92+
assert executions_before_python == 1
93+
assert executions_after == 2
94+
history = session.execution_history().to_dataframe()
95+
assert history.iloc[-1]["job_type"] == "polars"
9396

9497

9598
def test_read_gbq_colab_peek_avoids_requery(maybe_ordered_session):
96-
executions_before_sql = maybe_ordered_session._metrics.execution_count
99+
history_before = maybe_ordered_session.execution_history().to_dataframe()
100+
queries_before = (
101+
len(history_before[history_before["job_type"] == "query"])
102+
if "job_type" in history_before.columns
103+
else 0
104+
)
105+
97106
df = maybe_ordered_session._read_gbq_colab(
98107
"""
99108
SELECT
@@ -107,20 +116,36 @@ def test_read_gbq_colab_peek_avoids_requery(maybe_ordered_session):
107116
LIMIT 300
108117
"""
109118
)
110-
executions_before_python = maybe_ordered_session._metrics.execution_count
119+
120+
history_after_read = maybe_ordered_session.execution_history().to_dataframe()
121+
queries_after_read = len(
122+
history_after_read[history_after_read["job_type"] == "query"]
123+
)
124+
111125
result = df.peek(100)
112-
executions_after = maybe_ordered_session._metrics.execution_count
126+
127+
history_after_peek = maybe_ordered_session.execution_history().to_dataframe()
128+
queries_after_peek = len(
129+
history_after_peek[history_after_peek["job_type"] == "query"]
130+
)
113131

114132
# Ok, this isn't guaranteed by peek, but should happen with read api based impl
115133
# if starts failing, maybe stopped using read api?
116134
assert result["total"].is_monotonic_decreasing
117135

118136
assert len(result) == 100
119-
assert executions_after == executions_before_python == executions_before_sql + 1
137+
assert queries_after_read == queries_before + 1
138+
assert queries_after_peek == queries_after_read
120139

121140

122141
def test_read_gbq_colab_repr_avoids_requery(maybe_ordered_session):
123-
executions_before_sql = maybe_ordered_session._metrics.execution_count
142+
history_before = maybe_ordered_session.execution_history().to_dataframe()
143+
queries_before = (
144+
len(history_before[history_before["job_type"] == "query"])
145+
if "job_type" in history_before.columns
146+
else 0
147+
)
148+
124149
df = maybe_ordered_session._read_gbq_colab(
125150
"""
126151
SELECT
@@ -134,10 +159,21 @@ def test_read_gbq_colab_repr_avoids_requery(maybe_ordered_session):
134159
LIMIT 300
135160
"""
136161
)
137-
executions_before_python = maybe_ordered_session._metrics.execution_count
162+
163+
history_after_read = maybe_ordered_session.execution_history().to_dataframe()
164+
queries_after_read = len(
165+
history_after_read[history_after_read["job_type"] == "query"]
166+
)
167+
138168
_ = repr(df)
139-
executions_after = maybe_ordered_session._metrics.execution_count
140-
assert executions_after == executions_before_python == executions_before_sql + 1
169+
170+
history_after_repr = maybe_ordered_session.execution_history().to_dataframe()
171+
queries_after_repr = len(
172+
history_after_repr[history_after_repr["job_type"] == "query"]
173+
)
174+
175+
assert queries_after_read == queries_before + 1
176+
assert queries_after_repr == queries_after_read
141177

142178

143179
def test_read_gbq_colab_includes_formatted_scalars(session):

packages/bigframes/tests/system/small/test_dataframe.py

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -945,41 +945,55 @@ def test_join_repr(scalars_dfs_maybe_ordered):
945945

946946

947947
def test_repr_w_display_options(scalars_dfs, session):
948-
metrics = session._metrics
949948
scalars_df, _ = scalars_dfs
950949
# get a pandas df of the expected format
951950
df, _ = scalars_df._block.to_pandas()
952951
pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1)
953952
pandas_df.index.name = scalars_df.index.name
954953

955-
executions_pre = metrics.execution_count
954+
history_pre = session.execution_history().to_dataframe()
955+
queries_pre = (
956+
len(history_pre[history_pre["job_type"] == "query"])
957+
if "job_type" in history_pre.columns
958+
else 0
959+
)
960+
956961
with bigframes.option_context(
957962
"display.max_rows", 10, "display.max_columns", 5, "display.max_colwidth", 10
958963
):
959964
# When there are 10 or fewer rows, the outputs should be identical except for the extra note.
960965
actual = scalars_df.head(10).__repr__()
961-
executions_post = metrics.execution_count
966+
967+
history_post = session.execution_history().to_dataframe()
968+
queries_post = len(history_post[history_post["job_type"] == "query"])
962969

963970
with display_options.pandas_repr(bigframes.options.display):
964971
pandas_repr = pandas_df.head(10).__repr__()
965972

966973
assert actual == pandas_repr
967-
assert (executions_post - executions_pre) <= 3
974+
assert (queries_post - queries_pre) <= 2
968975

969976

970977
def test_mimebundle_html_repr_w_all_rows(scalars_dfs, session):
971-
metrics = session._metrics
972978
scalars_df, _ = scalars_dfs
973979
# get a pandas df of the expected format
974980
df, _ = scalars_df._block.to_pandas()
975981
pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1)
976982
pandas_df.index.name = scalars_df.index.name
977983

978-
executions_pre = metrics.execution_count
984+
history_pre = session.execution_history().to_dataframe()
985+
queries_pre = (
986+
len(history_pre[history_pre["job_type"] == "query"])
987+
if "job_type" in history_pre.columns
988+
else 0
989+
)
990+
979991
# When there are 10 or fewer rows, the outputs should be identical except for the extra note.
980992
bundle = scalars_df.head(10)._repr_mimebundle_()
981993
actual = bundle["text/html"]
982-
executions_post = metrics.execution_count
994+
995+
history_post = session.execution_history().to_dataframe()
996+
queries_post = len(history_post[history_post["job_type"] == "query"])
983997

984998
with display_options.pandas_repr(bigframes.options.display):
985999
pandas_repr = pandas_df.head(10)._repr_html_()
@@ -989,7 +1003,7 @@ def test_mimebundle_html_repr_w_all_rows(scalars_dfs, session):
9891003
+ f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]"
9901004
)
9911005
assert actual == expected
992-
assert (executions_post - executions_pre) <= 3
1006+
assert (queries_post - queries_pre) <= 2
9931007

9941008

9951009
def test_df_column_name_with_space(scalars_dfs):
@@ -3094,18 +3108,23 @@ def test_binop_with_self_aggregate(scalars_dfs_maybe_ordered):
30943108

30953109
df_columns = ["int64_col", "float64_col", "int64_too"]
30963110

3097-
# Ensure that this takes the optimized single-query path by counting executions
3098-
execution_count_before = scalars_df._session._metrics.execution_count
3111+
history_before = scalars_df._session.execution_history().to_dataframe()
3112+
queries_before = (
3113+
len(history_before[history_before["job_type"] == "query"])
3114+
if "job_type" in history_before.columns
3115+
else 0
3116+
)
3117+
30993118
bf_df = scalars_df[df_columns]
31003119
bf_result = (bf_df - bf_df.mean()).to_pandas()
3101-
execution_count_after = scalars_df._session._metrics.execution_count
3120+
3121+
history_after = scalars_df._session.execution_history().to_dataframe()
3122+
queries_after = len(history_after[history_after["job_type"] == "query"])
31023123

31033124
pd_df = scalars_pandas_df[df_columns]
31043125
pd_result = pd_df - pd_df.mean()
31053126

3106-
executions = execution_count_after - execution_count_before
3107-
3108-
assert executions == 1
3127+
assert (queries_after - queries_before) == 1
31093128
assert_frame_equal(bf_result, pd_result, check_dtype=False)
31103129

31113130

@@ -3114,18 +3133,23 @@ def test_binop_with_self_aggregate_w_index_reset(scalars_dfs_maybe_ordered):
31143133

31153134
df_columns = ["int64_col", "float64_col", "int64_too"]
31163135

3117-
# Ensure that this takes the optimized single-query path by counting executions
3118-
execution_count_before = scalars_df._session._metrics.execution_count
3136+
history_before = scalars_df._session.execution_history().to_dataframe()
3137+
queries_before = (
3138+
len(history_before[history_before["job_type"] == "query"])
3139+
if "job_type" in history_before.columns
3140+
else 0
3141+
)
3142+
31193143
bf_df = scalars_df[df_columns].reset_index(drop=True)
31203144
bf_result = (bf_df - bf_df.mean()).to_pandas()
3121-
execution_count_after = scalars_df._session._metrics.execution_count
3145+
3146+
history_after = scalars_df._session.execution_history().to_dataframe()
3147+
queries_after = len(history_after[history_after["job_type"] == "query"])
31223148

31233149
pd_df = scalars_pandas_df[df_columns].reset_index(drop=True)
31243150
pd_result = pd_df - pd_df.mean()
31253151

3126-
executions = execution_count_after - execution_count_before
3127-
3128-
assert executions == 1
3152+
assert (queries_after - queries_before) == 1
31293153
pd_result.index = pd_result.index.astype("Int64")
31303154
assert_frame_equal(bf_result, pd_result, check_dtype=False, check_index_type=False)
31313155

@@ -5948,16 +5972,22 @@ def test_dataframe_explode(col_names, ignore_index, session):
59485972
"C": [["a", "b", "c"], np.nan, ["d", "e"]],
59495973
}
59505974

5951-
metrics = session._metrics
59525975
df = bpd.DataFrame(data, session=session)
59535976
pd_df = df.to_pandas()
59545977
pd_result = pd_df.explode(col_names, ignore_index=ignore_index)
59555978
bf_result = df.explode(col_names, ignore_index=ignore_index)
59565979

5957-
# Check that to_pandas() results in at most a single query execution
5958-
execs_pre = metrics.execution_count
5980+
history_pre = session.execution_history().to_dataframe()
5981+
queries_pre = (
5982+
len(history_pre[history_pre["job_type"] == "query"])
5983+
if "job_type" in history_pre.columns
5984+
else 0
5985+
)
5986+
59595987
bf_materialized = bf_result.to_pandas()
5960-
execs_post = metrics.execution_count
5988+
5989+
history_post = session.execution_history().to_dataframe()
5990+
queries_post = len(history_post[history_post["job_type"] == "query"])
59615991

59625992
bigframes.testing.utils.assert_frame_equal(
59635993
bf_materialized,
@@ -5967,7 +5997,7 @@ def test_dataframe_explode(col_names, ignore_index, session):
59675997
)
59685998
# we test this property on this method in particular as compilation
59695999
# is non-deterministic and won't use the query cache as implemented
5970-
assert execs_post - execs_pre <= 1
6000+
assert (queries_post - queries_pre) <= 1
59716001

59726002

59736003
@pytest.mark.parametrize(

packages/bigframes/tests/system/small/test_series_io.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,23 @@ def test_to_pandas_override_global_option(scalars_df_index):
3030
assert table_id is not None
3131

3232
session = bf_series._block.session
33-
execution_count = session._metrics.execution_count
33+
34+
history_before = session.execution_history().to_dataframe()
35+
queries_before = (
36+
len(history_before[history_before["job_type"] == "query"])
37+
if "job_type" in history_before.columns
38+
else 0
39+
)
3440

3541
# When allow_large_results=False, a query_job object should not be created.
3642
# Therefore, the table_id should remain unchanged.
3743
bf_series.to_pandas(allow_large_results=False)
3844
assert bf_series._query_job.destination.table_id == table_id
39-
assert session._metrics.execution_count - execution_count == 1
45+
46+
history_after = session.execution_history().to_dataframe()
47+
queries_after = len(history_after[history_after["job_type"] == "query"])
48+
49+
assert (queries_after - queries_before) == 1
4050

4151

4252
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)