Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions tests/integ/modin/hybrid/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import modin.pandas as pd
from modin.config import context as config_context

from tests.utils import IS_WINDOWS
from tests.utils import IS_WINDOWS, Utils


if IS_WINDOWS:
Expand All @@ -28,25 +28,35 @@ def enable_autoswitch():


@pytest.fixture(scope="module")
def init_transaction_tables():
def module_scoped_test_table_name(session) -> str:
test_table_name = f"{Utils.random_table_name()}TESTTABLENAME"
try:
yield test_table_name
finally:
Utils.drop_table(session, test_table_name)


@pytest.fixture(scope="module")
def revenue_transactions(module_scoped_test_table_name):
session = pd.session
session.sql(
"""
CREATE OR REPLACE TABLE revenue_transactions (
f"""
CREATE OR REPLACE TEMP TABLE {module_scoped_test_table_name} (

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TEMP is not necessary now that we're using a different table name in each pytest module, but that's ok.

Transaction_ID STRING,
Date DATE,
Revenue FLOAT
);"""
).collect()
session.sql(
"""INSERT INTO revenue_transactions (Transaction_ID, Date, Revenue)
f"""INSERT INTO {module_scoped_test_table_name} (Transaction_ID, Date, Revenue)
SELECT
UUID_STRING() AS Transaction_ID,
DATEADD(DAY, UNIFORM(0, 800, RANDOM(0)), '2024-01-01') AS Date,
UNIFORM(10, 1000, RANDOM(0)) AS Revenue
FROM TABLE(GENERATOR(ROWCOUNT => 10000000));
FROM TABLE(GENERATOR(ROWCOUNT => 1000000));
"""
).collect()
return module_scoped_test_table_name


@pytest.fixture
Expand Down
22 changes: 11 additions & 11 deletions tests/integ/modin/hybrid/test_switch_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ def test_move_to_me_cost_with_incompatible_dtype(caplog):

# Newer version of modin switches before the merge
@sql_count_checker(query_count=2 if MODIN_IS_AT_LEAST_0_37_0 else 0)
def test_merge(init_transaction_tables, us_holidays_data):
df_transactions = pd.read_snowflake("REVENUE_TRANSACTIONS")
def test_merge(revenue_transactions, us_holidays_data):
df_transactions = pd.read_snowflake(revenue_transactions)
df_us_holidays = pd.DataFrame(us_holidays_data, columns=["Holiday", "Date"])
assert df_transactions.get_backend() == "Snowflake"
assert df_us_holidays.get_backend() == "Pandas"
Expand All @@ -174,9 +174,9 @@ def test_merge(init_transaction_tables, us_holidays_data):


@sql_count_checker(query_count=2)
def test_filtered_data(init_transaction_tables):
def test_filtered_data(revenue_transactions):
# When data is filtered, the engine should change when it is sufficiently small.
df_transactions = pd.read_snowflake("REVENUE_TRANSACTIONS")
df_transactions = pd.read_snowflake(revenue_transactions)
assert df_transactions.get_backend() == "Snowflake"
# in-place operations that do not change the backend
# TODO: the following will result in an align which will grow the
Expand All @@ -203,7 +203,7 @@ def test_filtered_data(init_transaction_tables):
# The SQL here is functionatly the same as above
# Unlike in previous iterations of hybrid this does *not* move the data immediately
df_transactions_filter2 = pd.read_snowflake(
"SELECT Date, SUM(Revenue) AS REVENUE FROM revenue_transactions WHERE Date >= DATEADD( 'days', -7, '2025-06-09' ) and Date < '2025-06-09' GROUP BY DATE"
f"SELECT Date, SUM(Revenue) AS REVENUE FROM {revenue_transactions} WHERE Date >= DATEADD( 'days', -7, '2025-06-09' ) and Date < '2025-06-09' GROUP BY DATE"
)
# We do not know the size of this data yet, because the query is entirely lazy
assert df_transactions_filter2.get_backend() == "Snowflake"
Expand All @@ -227,8 +227,8 @@ def test_filtered_data(init_transaction_tables):


@sql_count_checker(query_count=3)
def test_apply(init_transaction_tables, us_holidays_data):
df_transactions = pd.read_snowflake("REVENUE_TRANSACTIONS").head(1000)
def test_apply(revenue_transactions, us_holidays_data):
df_transactions = pd.read_snowflake(revenue_transactions).head(1000)
assert df_transactions.get_backend() == "Snowflake"
df_us_holidays = pd.DataFrame(us_holidays_data, columns=["Holiday", "Date"])
df_us_holidays["Date"] = pd.to_datetime(df_us_holidays["Date"])
Expand Down Expand Up @@ -344,9 +344,9 @@ def test_explain_switch_empty():

# Newer version of modin switches before the merge
@sql_count_checker(query_count=2 if MODIN_IS_AT_LEAST_0_37_0 else 0)
def test_explain_switch(init_transaction_tables, us_holidays_data):
def test_explain_switch(revenue_transactions, us_holidays_data):
clear_hybrid_switch_log()
df_transactions = pd.read_snowflake("REVENUE_TRANSACTIONS")
df_transactions = pd.read_snowflake(revenue_transactions)
df_us_holidays = pd.DataFrame(us_holidays_data, columns=["Holiday", "Date"])
pd.merge(df_us_holidays, df_transactions, left_on="Date", right_on="DATE")
assert "decision" in str(pd.explain_switch())
Expand Down Expand Up @@ -459,7 +459,7 @@ def test_to_datetime():
high_count_expected=True,
high_count_reason="tests queries across different execution modes",
)
def test_query_count_no_switch(init_transaction_tables, use_session_param):
def test_query_count_no_switch(revenue_transactions, use_session_param):
"""
Tests that when there is no switching behavior the query count is the
same under hybrid mode and non-hybrid mode.
Expand All @@ -471,7 +471,7 @@ def inner_test(df_in):
df_result["COUNT"] = df_result.groupby("DATE")["REVENUE"].transform("count")
return df_result

df_transactions = pd.read_snowflake("REVENUE_TRANSACTIONS")
df_transactions = pd.read_snowflake(revenue_transactions)
inner_test(df_transactions)
orig_len = None
hybrid_len = None
Expand Down