|
2 | 2 | # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved. |
3 | 3 | # |
4 | 4 |
|
| 5 | +import copy |
5 | 6 | import modin.pandas as pd |
6 | 7 | import pandas as native_pd |
7 | 8 |
|
8 | 9 | from snowflake.snowpark._internal.utils import TempObjectType |
9 | 10 | import snowflake.snowpark.modin.plugin # noqa: F401 |
| 11 | +from snowflake.snowpark.session import ( |
| 12 | + _SNOWPARK_PANDAS_DUMMY_ROW_POS_OPTIMIZATION_ENABLED, |
| 13 | + Session, |
| 14 | +) |
10 | 15 | from tests.integ.modin.utils import assert_frame_equal, assert_index_equal |
11 | 16 | from tests.integ.utils.sql_counter import sql_count_checker |
12 | 17 | from tests.utils import Utils |
@@ -129,3 +134,60 @@ def test_read_filter_groupby_agg(session): |
129 | 134 |
|
130 | 135 | # compare results |
131 | 136 | assert_frame_equal(snow_result, native_result) |
| 137 | + |
| 138 | + |
| 139 | +@sql_count_checker(query_count=5, join_count=1) |
| 140 | +def test_read_filter_join_flag_disabled(session): |
| 141 | + # test a chain of operations that are fully supported in faster pandas |
| 142 | + # but with the dummy_row_pos_optimization_enabled flag turned off |
| 143 | + session.dummy_row_pos_optimization_enabled = False |
| 144 | + |
| 145 | + # create tables |
| 146 | + table_name1 = Utils.random_name_for_temp_object(TempObjectType.TABLE) |
| 147 | + session.create_dataframe( |
| 148 | + native_pd.DataFrame([[1, 11], [2, 12], [3, 13]], columns=["A", "B"]) |
| 149 | + ).write.save_as_table(table_name1, table_type="temp") |
| 150 | + table_name2 = Utils.random_name_for_temp_object(TempObjectType.TABLE) |
| 151 | + session.create_dataframe( |
| 152 | + native_pd.DataFrame([[1, 21], [2, 22], [3, 23]], columns=["C", "D"]) |
| 153 | + ).write.save_as_table(table_name2, table_type="temp") |
| 154 | + |
| 155 | + # create snow dataframes |
| 156 | + df1 = pd.read_snowflake(table_name1) |
| 157 | + df2 = pd.read_snowflake(table_name2) |
| 158 | + snow_result = df1[df1["B"] > 11].merge( |
| 159 | + df2[df2["D"] == 22], left_on="A", right_on="C" |
| 160 | + ) |
| 161 | + |
| 162 | + # verify that the input dataframes have an empty relaxed query compiler |
| 163 | + assert df1._query_compiler._relaxed_query_compiler is None |
| 164 | + assert df2._query_compiler._relaxed_query_compiler is None |
| 165 | + # verify that the output dataframe also has an empty relaxed query compiler |
| 166 | + assert snow_result._query_compiler._relaxed_query_compiler is None |
| 167 | + |
| 168 | + # create pandas dataframes |
| 169 | + native_df1 = df1.to_pandas() |
| 170 | + native_df2 = df2.to_pandas() |
| 171 | + native_result = native_df1[native_df1["B"] > 11].merge( |
| 172 | + native_df2[native_df2["D"] == 22], left_on="A", right_on="C" |
| 173 | + ) |
| 174 | + |
| 175 | + # compare results |
| 176 | + assert_frame_equal(snow_result, native_result) |
| 177 | + |
| 178 | + |
| 179 | +@sql_count_checker(query_count=0) |
| 180 | +def test_dummy_row_pos_optimization_enabled_on_session(db_parameters): |
| 181 | + with Session.builder.configs(db_parameters).create() as new_session: |
| 182 | + default_value = new_session.dummy_row_pos_optimization_enabled |
| 183 | + new_session.dummy_row_pos_optimization_enabled = not default_value |
| 184 | + assert new_session.dummy_row_pos_optimization_enabled is not default_value |
| 185 | + new_session.dummy_row_pos_optimization_enabled = default_value |
| 186 | + assert new_session.dummy_row_pos_optimization_enabled is default_value |
| 187 | + |
| 188 | + parameters = copy.deepcopy(db_parameters) |
| 189 | + parameters["session_parameters"] = { |
| 190 | + _SNOWPARK_PANDAS_DUMMY_ROW_POS_OPTIMIZATION_ENABLED: not default_value |
| 191 | + } |
| 192 | + with Session.builder.configs(parameters).create() as new_session2: |
| 193 | + assert new_session2.dummy_row_pos_optimization_enabled is not default_value |
0 commit comments