diff --git a/tests/integ/modin/hybrid/conftest.py b/tests/integ/modin/hybrid/conftest.py index 0990bf9add..2a62ab765b 100644 --- a/tests/integ/modin/hybrid/conftest.py +++ b/tests/integ/modin/hybrid/conftest.py @@ -7,7 +7,7 @@ import modin.pandas as pd from modin.config import context as config_context -from tests.utils import IS_WINDOWS +from tests.utils import IS_WINDOWS, Utils if IS_WINDOWS: @@ -28,25 +28,35 @@ def enable_autoswitch(): @pytest.fixture(scope="module") -def init_transaction_tables(): +def module_scoped_test_table_name(session) -> str: + test_table_name = f"{Utils.random_table_name()}TESTTABLENAME" + try: + yield test_table_name + finally: + Utils.drop_table(session, test_table_name) + + +@pytest.fixture(scope="module") +def revenue_transactions(module_scoped_test_table_name): session = pd.session session.sql( - """ - CREATE OR REPLACE TABLE revenue_transactions ( + f""" + CREATE OR REPLACE TEMP TABLE {module_scoped_test_table_name} ( Transaction_ID STRING, Date DATE, Revenue FLOAT );""" ).collect() session.sql( - """INSERT INTO revenue_transactions (Transaction_ID, Date, Revenue) + f"""INSERT INTO {module_scoped_test_table_name} (Transaction_ID, Date, Revenue) SELECT UUID_STRING() AS Transaction_ID, DATEADD(DAY, UNIFORM(0, 800, RANDOM(0)), '2024-01-01') AS Date, UNIFORM(10, 1000, RANDOM(0)) AS Revenue - FROM TABLE(GENERATOR(ROWCOUNT => 10000000)); + FROM TABLE(GENERATOR(ROWCOUNT => 1000000)); """ ).collect() + return module_scoped_test_table_name @pytest.fixture diff --git a/tests/integ/modin/hybrid/test_switch_operations.py b/tests/integ/modin/hybrid/test_switch_operations.py index bb00c2ce17..461da6089c 100644 --- a/tests/integ/modin/hybrid/test_switch_operations.py +++ b/tests/integ/modin/hybrid/test_switch_operations.py @@ -156,8 +156,8 @@ def test_move_to_me_cost_with_incompatible_dtype(caplog): # Newer version of modin switches before the merge @sql_count_checker(query_count=2 if MODIN_IS_AT_LEAST_0_37_0 else 0) -def test_merge(init_transaction_tables, us_holidays_data): - df_transactions = pd.read_snowflake("REVENUE_TRANSACTIONS") +def test_merge(revenue_transactions, us_holidays_data): + df_transactions = pd.read_snowflake(revenue_transactions) df_us_holidays = pd.DataFrame(us_holidays_data, columns=["Holiday", "Date"]) assert df_transactions.get_backend() == "Snowflake" assert df_us_holidays.get_backend() == "Pandas" @@ -174,9 +174,9 @@ def test_merge(init_transaction_tables, us_holidays_data): @sql_count_checker(query_count=2) -def test_filtered_data(init_transaction_tables): +def test_filtered_data(revenue_transactions): # When data is filtered, the engine should change when it is sufficiently small. - df_transactions = pd.read_snowflake("REVENUE_TRANSACTIONS") + df_transactions = pd.read_snowflake(revenue_transactions) assert df_transactions.get_backend() == "Snowflake" # in-place operations that do not change the backend # TODO: the following will result in an align which will grow the @@ -203,7 +203,7 @@ def test_filtered_data(init_transaction_tables): # The SQL here is functionatly the same as above # Unlike in previous iterations of hybrid this does *not* move the data immediately df_transactions_filter2 = pd.read_snowflake( - "SELECT Date, SUM(Revenue) AS REVENUE FROM revenue_transactions WHERE Date >= DATEADD( 'days', -7, '2025-06-09' ) and Date < '2025-06-09' GROUP BY DATE" + f"SELECT Date, SUM(Revenue) AS REVENUE FROM {revenue_transactions} WHERE Date >= DATEADD( 'days', -7, '2025-06-09' ) and Date < '2025-06-09' GROUP BY DATE" ) # We do not know the size of this data yet, because the query is entirely lazy assert df_transactions_filter2.get_backend() == "Snowflake" @@ -227,8 +227,8 @@ def test_filtered_data(init_transaction_tables): @sql_count_checker(query_count=3) -def test_apply(init_transaction_tables, us_holidays_data): - df_transactions = pd.read_snowflake("REVENUE_TRANSACTIONS").head(1000) +def test_apply(revenue_transactions, us_holidays_data): + df_transactions = pd.read_snowflake(revenue_transactions).head(1000) assert df_transactions.get_backend() == "Snowflake" df_us_holidays = pd.DataFrame(us_holidays_data, columns=["Holiday", "Date"]) df_us_holidays["Date"] = pd.to_datetime(df_us_holidays["Date"]) @@ -344,9 +344,9 @@ def test_explain_switch_empty(): # Newer version of modin switches before the merge @sql_count_checker(query_count=2 if MODIN_IS_AT_LEAST_0_37_0 else 0) -def test_explain_switch(init_transaction_tables, us_holidays_data): +def test_explain_switch(revenue_transactions, us_holidays_data): clear_hybrid_switch_log() - df_transactions = pd.read_snowflake("REVENUE_TRANSACTIONS") + df_transactions = pd.read_snowflake(revenue_transactions) df_us_holidays = pd.DataFrame(us_holidays_data, columns=["Holiday", "Date"]) pd.merge(df_us_holidays, df_transactions, left_on="Date", right_on="DATE") assert "decision" in str(pd.explain_switch()) @@ -459,7 +459,7 @@ def test_to_datetime(): high_count_expected=True, high_count_reason="tests queries across different execution modes", ) -def test_query_count_no_switch(init_transaction_tables, use_session_param): +def test_query_count_no_switch(revenue_transactions, use_session_param): """ Tests that when there is no switching behavior the query count is the same under hybrid mode and non-hybrid mode. @@ -471,7 +471,7 @@ def inner_test(df_in): df_result["COUNT"] = df_result.groupby("DATE")["REVENUE"].transform("count") return df_result - df_transactions = pd.read_snowflake("REVENUE_TRANSACTIONS") + df_transactions = pd.read_snowflake(revenue_transactions) inner_test(df_transactions) orig_len = None hybrid_len = None