Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@

#### New Features

### Snowpark pandas API Updates

#### New Features

#### Improvements
- Hybrid execution mode is now enabled by default. Certain operations on smaller data will now automatically execute in native pandas in-memory. Use `from modin.config import AutoSwitchBackend; AutoSwitchBackend.disable()` to turn this off and force all execution to occur in Snowflake.
- Removed an unnecessary `SHOW OBJECTS` query issued from `read_snowflake` under certain conditions.

## 1.39.0 (YYYY-MM-DD)

### Snowpark Python API Updates
Expand Down Expand Up @@ -73,7 +81,6 @@

#### Improvements

- Hybrid execution mode is now enabled by default. Certain operations on smaller data will now automatically execute in native pandas in-memory. Use `from modin.config import AutoSwitchBackend; AutoSwitchBackend.disable()` to turn this off and force all execution to occur in Snowflake.
- Downgraded to level `logging.DEBUG - 1` the log message saying that the
Snowpark `DataFrame` reference of an internal `DataFrameReference` object
has changed.
Expand Down
13 changes: 11 additions & 2 deletions src/snowflake/snowpark/modin/plugin/_internal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,9 @@ def _create_read_only_table(
def create_initial_ordered_dataframe(
table_name_or_query: Union[str, Iterable[str]],
enforce_ordering: bool,
*,
dummy_row_pos_mode: bool = False,
row_count_hint: Optional[int] = None,
) -> tuple[OrderedDataFrame, str]:
"""
create read only temp table on top of the existing table or Snowflake query if required, and create a OrderedDataFrame
Expand All @@ -334,6 +336,11 @@ def create_initial_ordered_dataframe(
enforce_ordering: If True, create a read only temp table on top of the existing table or Snowflake query,
and create the OrderedDataFrame using the read only temp table created.
Otherwise, directly using the existing table.
dummy_row_pos_mode: If True, uses "dummy" row position columns to avoid a potentially
expensive ROW_NUMBER() query.
row_count_hint: An optional hint for the exact row count of the frame. This is used in scenarios
where we have already performed a query for the size of the underlying data, and can re-use
the value.

Returns:
OrderedDataFrame with row position column.
Expand Down Expand Up @@ -502,8 +509,10 @@ def create_initial_ordered_dataframe(
ordered_dataframe.row_position_snowflake_quoted_identifier
)

materialized_row_count = None
if not is_query:
if row_count_hint is not None:
ordered_dataframe.row_count = row_count_hint
ordered_dataframe.row_count_upper_bound = row_count_hint
elif not is_query:
materialized_row_count = get_object_metadata_row_count(table_name_or_query)
ordered_dataframe.row_count = materialized_row_count
ordered_dataframe.row_count_upper_bound = materialized_row_count
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1479,6 +1479,11 @@ def from_snowflake(
table_name_or_query=name_or_query,
enforce_ordering=enforce_ordering,
dummy_row_pos_mode=dummy_row_pos_mode,
row_count_hint=(
relaxed_query_compiler._modin_frame.ordered_dataframe.row_count
if relaxed_query_compiler is not None
else None
),
)
pandas_labels_to_snowflake_quoted_identifiers_map = {
# pandas labels of resulting Snowpark pandas dataframe will be snowflake identifier
Expand Down
Loading