Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@

#### Improvements

- Removed unnecessary warnings about local package version mismatch when using `session.read.option('rowTag', <tag_name>).xml(<stage_file_path>)` or `xpath` functions.
- Improved `DataFrameReader.dbapi` (PuPr) reading performance by setting the default `fetch_size` parameter value to 100000.

### Snowpark pandas API Updates
Expand Down
7 changes: 7 additions & 0 deletions src/snowflake/snowpark/_internal/udf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,6 +1134,7 @@ def resolve_imports_and_packages(
skip_upload_on_content_match: bool = False,
is_permanent: bool = False,
force_inline_code: bool = False,
**kwargs,
) -> Tuple[
Optional[str],
Optional[str],
Expand Down Expand Up @@ -1167,6 +1168,9 @@ def resolve_imports_and_packages(
packages,
include_pandas=is_pandas_udf,
statement_params=statement_params,
_suppress_local_package_warnings=kwargs.get(
"_suppress_local_package_warnings", False
),
)
if packages is not None
else session._resolve_packages(
Expand All @@ -1175,6 +1179,9 @@ def resolve_imports_and_packages(
validate_package=False,
include_pandas=is_pandas_udf,
statement_params=statement_params,
_suppress_local_package_warnings=kwargs.get(
"_suppress_local_package_warnings", False
),
)
)

Expand Down
6 changes: 1 addition & 5 deletions src/snowflake/snowpark/dataframe_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1407,11 +1407,6 @@ def _read_semi_structured_file(self, path: str, format: str) -> DataFrame:
metadata_project, metadata_schema = self._get_metadata_project_and_schema()

if format == "XML" and XML_ROW_TAG_STRING in self._cur_options:
warning(
"rowTag",
"rowTag for reading XML file is in private preview since 1.31.0. Do not use it in production.",
)

if is_in_stored_procedure(): # pragma: no cover
# create a temp stage for udtf import files
# we have to use "temp" object instead of "scoped temp" object in stored procedure
Expand Down Expand Up @@ -1447,6 +1442,7 @@ def _read_semi_structured_file(self, path: str, format: str) -> DataFrame:
input_types=input_types,
packages=["snowflake-snowpark-python", "lxml<6"],
replace=True,
_suppress_local_package_warnings=True,
)
else:
xml_reader_udtf = None
Expand Down
39 changes: 24 additions & 15 deletions src/snowflake/snowpark/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1884,6 +1884,7 @@ def _get_dependency_packages(
package_table: str,
current_packages: Dict[str, str],
statement_params: Optional[Dict[str, str]] = None,
suppress_local_package_warnings: bool = False,
) -> List[Requirement]:
# Keep track of any package errors
errors = []
Expand Down Expand Up @@ -1979,24 +1980,27 @@ def is_valid_version(
if not is_valid_version(
package_name, package_client_version, valid_packages
):
if not suppress_local_package_warnings:
_logger.warning(
f"The version of package '{package_name}' in the local environment is "
f"{package_client_version}, which does not fit the criteria for the "
f"requirement '{package}'. Your UDF might not work when the package version "
f"is different between the server and your local environment."
)
except importlib.metadata.PackageNotFoundError:
if not suppress_local_package_warnings:
_logger.warning(
f"The version of package '{package_name}' in the local environment is "
f"{package_client_version}, which does not fit the criteria for the "
f"requirement '{package}'. Your UDF might not work when the package version "
f"is different between the server and your local environment."
f"Package '{package_name}' is not installed in the local environment. "
f"Your UDF might not work when the package is installed on the server "
f"but not on your local environment."
)
except importlib.metadata.PackageNotFoundError:
_logger.warning(
f"Package '{package_name}' is not installed in the local environment. "
f"Your UDF might not work when the package is installed on the server "
f"but not on your local environment."
)
except Exception as ex: # pragma: no cover
_logger.warning(
"Failed to get the local distribution of package %s: %s",
package_name,
ex,
)
if not suppress_local_package_warnings:
_logger.warning(
"Failed to get the local distribution of package %s: %s",
package_name,
ex,
)

if package_name in current_packages:
if current_packages[package_name] != package:
Expand Down Expand Up @@ -2071,6 +2075,7 @@ def _resolve_packages(
include_pandas: bool = False,
statement_params: Optional[Dict[str, str]] = None,
artifact_repository: Optional[str] = None,
**kwargs,
) -> List[str]:
"""
Given a list of packages to add, this method will
Expand Down Expand Up @@ -2154,6 +2159,9 @@ def _resolve_packages(
package_table,
result_dict,
statement_params=statement_params,
suppress_local_package_warnings=kwargs.get(
"_suppress_local_package_warnings", False
),
)

# Add dependency packages
Expand Down Expand Up @@ -4802,6 +4810,7 @@ def _get_or_register_xpath_udf(
packages=["snowflake-snowpark-python", "lxml<6"],
replace=True,
_emit_ast=False,
_suppress_local_package_warnings=True,
)

self._xpath_udf_cache[cache_key] = xpath_udf
Expand Down
1 change: 1 addition & 0 deletions src/snowflake/snowpark/stored_procedure.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,6 +955,7 @@ def _do_register_sp(
is_permanent=is_permanent,
force_inline_code=force_inline_code,
artifact_repository=artifact_repository,
**kwargs,
)

runtime_version_from_requirement = None
Expand Down
1 change: 1 addition & 0 deletions src/snowflake/snowpark/udaf.py
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,7 @@ def _do_register_udaf(
skip_upload_on_content_match=skip_upload_on_content_match,
is_permanent=is_permanent,
artifact_repository=artifact_repository,
**kwargs,
)

runtime_version_from_requirement = None
Expand Down
1 change: 1 addition & 0 deletions src/snowflake/snowpark/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,6 +994,7 @@ def _do_register_udf(
skip_upload_on_content_match=skip_upload_on_content_match,
is_permanent=is_permanent,
artifact_repository=artifact_repository,
**kwargs,
)

runtime_version_from_requirement = None
Expand Down
1 change: 1 addition & 0 deletions src/snowflake/snowpark/udtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1056,6 +1056,7 @@ def _do_register_udtf(
skip_upload_on_content_match=skip_upload_on_content_match,
is_permanent=is_permanent,
artifact_repository=artifact_repository,
**kwargs,
)

runtime_version_from_requirement = None
Expand Down
14 changes: 14 additions & 0 deletions tests/integ/test_xml_reader_row_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
#

import logging
import json
import pytest

Expand Down Expand Up @@ -398,6 +399,19 @@ def test_read_xml_ignore_surrounding_whitespace(
Utils.check_answer(df, [expected_row])


def test_read_xml_warning_local_package(session, caplog):
row_tag = "book"
caplog.clear()
with caplog.at_level(logging.WARNING):
session.read.option("rowTag", row_tag).xml(
f"@{tmp_stage_name}/{test_file_books_xml}"
)
assert (
"Your UDF might not work when the package version is different between the server and your local environment"
not in caplog.text
)


def test_read_xml_row_validation_xsd_path(session):
row_tag = "book"
df = (
Expand Down
14 changes: 14 additions & 0 deletions tests/integ/test_xpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
#

import logging
import json
import pytest

Expand Down Expand Up @@ -329,3 +330,16 @@ def test_xpath_return_types(session):
assert isinstance(schema["BOOL_COL"].datatype, BooleanType)
assert isinstance(schema["FLOAT_COL"].datatype, DoubleType)
assert isinstance(schema["INT_COL"].datatype, LongType)


def test_xpath_warning_local_package(session, caplog):
caplog.clear()
with caplog.at_level(logging.WARNING):
df = session.create_dataframe(
[["<root><a>1</a><a>2</a><a>3</a></root>"]], schema=["xml"]
)
df.select(xpath("xml", "//a/text()").alias("values")).collect()
assert (
"Your UDF might not work when the package version is different between the server and your local environment"
not in caplog.text
)
29 changes: 29 additions & 0 deletions tests/unit/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,35 @@ def mock_get_information_schema_packages(table_name: str, _emit_ast: bool = True
assert len(existing_packages) == 1, existing_packages


def test_resolve_packages_suppresses_internal_warning(mock_server_connection, caplog):
session = Session(mock_server_connection)

def mock_get_information_schema_packages(table_name: str, _emit_ast: bool = True):
result = MagicMock()
result.filter().group_by().agg()._internal_collect_with_tag.return_value = [
("snowflake-snowpark-python", json.dumps(["1.0.0"]))
]
return result

caplog.clear()
with mock.patch.object(
session, "table", side_effect=mock_get_information_schema_packages
), mock.patch("importlib.metadata.version", return_value="0.0.1"), mock.patch(
"snowflake.snowpark.session._logger"
) as mock_logger, caplog.at_level(
logging.WARNING
):
session._resolve_packages(
["snowflake-snowpark-python"],
validate_package=True,
include_pandas=False,
_suppress_local_package_warnings=True,
)

mock_logger.warning.assert_not_called()
assert caplog.text == ""


@pytest.mark.skipif(not is_pandas_available, reason="requires pandas for write_pandas")
def test_write_pandas_wrong_table_type(mock_server_connection):
session = Session(mock_server_connection)
Expand Down