Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 26 additions & 27 deletions .github/workflows/precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -187,31 +187,31 @@ jobs:
# Specify SNOWFLAKE_IS_PYTHON_RUNTIME_TEST: 1 when adding >= python3.13 with no server-side support
# For example, see https://github.com/snowflakedb/snowpark-python/pull/681
shell: bash
# do not run other tests for macos
- if: ${{ matrix.os != 'macos-latest' && matrix.python-version != '3.14' }}
name: Run tests (excluding doctests)
run: python -m tox -e "py${PYTHON_VERSION/\./}-notdoctest-ci"
env:
PYTHON_VERSION: ${{ matrix.python-version }}
cloud_provider: ${{ matrix.cloud-provider }}
PYTEST_ADDOPTS: --color=yes --tb=short
TOX_PARALLEL_NO_SPINNER: 1
SNOWPARK_PYTHON_API_TEST_BUCKET_PATH: ${{ secrets.SNOWPARK_PYTHON_API_TEST_BUCKET_PATH }}
SNOWPARK_PYTHON_API_S3_STORAGE_INTEGRATION: ${{ vars.SNOWPARK_PYTHON_API_S3_STORAGE_INTEGRATION }}
shell: bash
# TODO: Remove the test below and run udf tests for 3.14
# for 3.14, skip udf, doctest
- if: ${{ matrix.os != 'macos-latest' && matrix.python-version == '3.14' }}
name: Run tests (excluding udf, doctests)
run: python -m tox -e "py${PYTHON_VERSION/\./}-notudfdoctest-ci"
env:
PYTHON_VERSION: ${{ matrix.python-version }}
cloud_provider: ${{ matrix.cloud-provider }}
PYTEST_ADDOPTS: --color=yes --tb=short
TOX_PARALLEL_NO_SPINNER: 1
SNOWPARK_PYTHON_API_TEST_BUCKET_PATH: ${{ secrets.SNOWPARK_PYTHON_API_TEST_BUCKET_PATH }}
SNOWPARK_PYTHON_API_S3_STORAGE_INTEGRATION: ${{ vars.SNOWPARK_PYTHON_API_S3_STORAGE_INTEGRATION }}
shell: bash
# # do not run other tests for macos
# - if: ${{ matrix.os != 'macos-latest' && matrix.python-version != '3.14' }}
# name: Run tests (excluding doctests)
# run: python -m tox -e "py${PYTHON_VERSION/\./}-notdoctest-ci"
# env:
# PYTHON_VERSION: ${{ matrix.python-version }}
# cloud_provider: ${{ matrix.cloud-provider }}
# PYTEST_ADDOPTS: --color=yes --tb=short
# TOX_PARALLEL_NO_SPINNER: 1
# SNOWPARK_PYTHON_API_TEST_BUCKET_PATH: ${{ secrets.SNOWPARK_PYTHON_API_TEST_BUCKET_PATH }}
# SNOWPARK_PYTHON_API_S3_STORAGE_INTEGRATION: ${{ vars.SNOWPARK_PYTHON_API_S3_STORAGE_INTEGRATION }}
# shell: bash
# # TODO: Remove the test below and run udf tests for 3.14
# # for 3.14, skip udf, doctest
# - if: ${{ matrix.os != 'macos-latest' && matrix.python-version == '3.14' }}
# name: Run tests (excluding udf, doctests)
# run: python -m tox -e "py${PYTHON_VERSION/\./}-notudfdoctest-ci"
# env:
# PYTHON_VERSION: ${{ matrix.python-version }}
# cloud_provider: ${{ matrix.cloud-provider }}
# PYTEST_ADDOPTS: --color=yes --tb=short
# TOX_PARALLEL_NO_SPINNER: 1
# SNOWPARK_PYTHON_API_TEST_BUCKET_PATH: ${{ secrets.SNOWPARK_PYTHON_API_TEST_BUCKET_PATH }}
# SNOWPARK_PYTHON_API_S3_STORAGE_INTEGRATION: ${{ vars.SNOWPARK_PYTHON_API_S3_STORAGE_INTEGRATION }}
# shell: bash
- name: Install MS ODBC Driver (Ubuntu only)
if: ${{ contains(matrix.os, 'ubuntu') }}
run: |
Expand All @@ -222,8 +222,7 @@ jobs:
shell: bash
- name: Run data source tests
# psycopg2 is not supported on macos 3.9
# TODO: enable datasource tests for 3.14
if: ${{ !(matrix.os == 'macos-latest' && matrix.python-version == '3.9') && !(matrix.python-version == '3.14') }}
if: ${{ !(matrix.os == 'macos-latest' && matrix.python-version == '3.9') }}
run: python -m tox -e datasource
env:
PYTHON_VERSION: ${{ matrix.python-version }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,14 @@ def udtf_ingestion(
statement_params: Optional[Dict[str, str]] = None,
_emit_ast: bool = True,
) -> "snowflake.snowpark.DataFrame":
from snowflake.snowpark._internal.data_source.utils import UDTF_PACKAGE_MAP
from snowflake.snowpark._internal.data_source.utils import (
resolve_udtf_packages,
)

resolved_packages = packages or resolve_udtf_packages(
self.dbms_type,
artifact_repository or session._get_default_artifact_repository(),
)

udtf_name = random_name_for_temp_object(TempObjectType.FUNCTION)
with measure_time() as udtf_register_time:
Expand All @@ -186,7 +193,7 @@ def udtf_ingestion(
]
),
external_access_integrations=[external_access_integrations],
packages=packages or UDTF_PACKAGE_MAP.get(self.dbms_type),
packages=resolved_packages,
imports=imports,
artifact_repository=artifact_repository,
statement_params=statement_params,
Expand Down
73 changes: 71 additions & 2 deletions src/snowflake/snowpark/_internal/data_source/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@
from snowflake.snowpark._internal.data_source import DataSourceReader
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
from snowflake.snowpark._internal.utils import get_temp_type_for_object
from snowflake.snowpark.exceptions import SnowparkDataframeReaderException
from snowflake.snowpark.context import _PYPI_SHARED_REPOSITORY
from snowflake.snowpark.exceptions import (
SnowparkClientException,
SnowparkDataframeReaderException,
)
from snowflake.snowpark.types import StructType

from typing import TYPE_CHECKING
Expand Down Expand Up @@ -98,7 +102,11 @@ class DRIVER_TYPE(str, Enum):
DRIVER_TYPE.PYMYSQL: PymysqlDriver,
}

UDTF_PACKAGE_MAP = {
# Default UDTF package list, suitable for Snowflake's Anaconda shared
# repository. The Snowflake Anaconda channel ships conda builds of these
# packages with the necessary native libraries (e.g., libpq for psycopg2,
# msodbcsql for pyodbc) bundled, so the source distribution names work.
_ANACONDA_UDTF_PACKAGE_MAP = {
DBMS_TYPE.ORACLE_DB: ["oracledb>=2.0.0,<4.0.0", "snowflake-snowpark-python"],
DBMS_TYPE.SQLITE_DB: ["snowflake-snowpark-python"],
DBMS_TYPE.SQL_SERVER_DB: [
Expand All @@ -114,6 +122,67 @@ class DRIVER_TYPE(str, Enum):
DBMS_TYPE.MYSQL_DB: ["pymysql>=1.0.0,<2.0.0", "snowflake-snowpark-python"],
}

# UDTF package list when using the PyPI shared repository. The server-side
# UDTF install sandbox refuses to compile source distributions (sdists), so
# every package here must be wheel-installable from PyPI. Differences from
# the Anaconda map:
# - Postgres uses ``psycopg2-binary`` because ``psycopg2`` on PyPI is
# sdist-only; ``psycopg2-binary`` is the wheel-packaged equivalent.
# - SQL Server has no PyPI-installable equivalent of ``msodbcsql`` (it is
# Microsoft's ODBC driver, distributed as a system package), so the
# UDTF path cannot work on PyPI today.
# - Databricks depends on ``databricks-sql-connector``, which transitively
# requires ``thrift``; ``thrift`` on PyPI is sdist-only for every
# version, so the server cannot install it.
# These PyPI gaps are independent of the Python version; they apply to any
# session whose default artifact repository is PyPI (most commonly Python
# 3.14+, where PyPI is the global default).
_PYPI_UDTF_PACKAGE_MAP = {
DBMS_TYPE.ORACLE_DB: ["oracledb>=2.0.0,<4.0.0", "snowflake-snowpark-python"],
DBMS_TYPE.SQLITE_DB: ["snowflake-snowpark-python"],
DBMS_TYPE.POSTGRES_DB: [
"psycopg2-binary>=2.0.0,<3.0.0",
"snowflake-snowpark-python",
],
DBMS_TYPE.MYSQL_DB: ["pymysql>=1.0.0,<2.0.0", "snowflake-snowpark-python"],
# SQL_SERVER_DB and DATABRICKS_DB intentionally omitted - see
# resolve_udtf_packages for the user-facing error.
}

# Backwards-compatible alias for callers (and external code) that imported
# the old map. New code should use :func:`resolve_udtf_packages`.
UDTF_PACKAGE_MAP = _ANACONDA_UDTF_PACKAGE_MAP


def resolve_udtf_packages(
dbms_type: "DBMS_TYPE", artifact_repository: Optional[str]
) -> List[str]:
"""Return the default UDTF package list for ``dbms_type``.

Picks the package list appropriate for ``artifact_repository``. When the
repository is the PyPI shared repository, some DBMSes have no working
package set (their dependencies are not wheel-installable from PyPI, and
the server-side UDTF sandbox refuses to compile sdists); this raises
:class:`SnowparkClientException` with guidance to switch repositories.
"""
if artifact_repository == _PYPI_SHARED_REPOSITORY:
packages = _PYPI_UDTF_PACKAGE_MAP.get(dbms_type)
if packages is None:
raise SnowparkClientException(
f"DataFrameReader.dbapi server-side UDTF ingestion for "
f"{dbms_type.value} is not supported when the session's "
f"default artifact repository is PyPI: the required "
f"packages are not wheel-installable from PyPI, and the "
f"server-side UDTF install sandbox refuses to compile "
f"source distributions. Switch to the Anaconda artifact "
f"repository (on Python 3.14+, PyPI is the client-side "
f"default; on older Python versions, Anaconda is the "
f"default but may have been overridden at the account, "
f"database, or schema level)."
)
return packages
return _ANACONDA_UDTF_PACKAGE_MAP.get(dbms_type)


def get_jdbc_dbms(jdbc_url: str) -> str:
"""
Expand Down
4 changes: 2 additions & 2 deletions src/snowflake/snowpark/_internal/udf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1247,7 +1247,7 @@ def resolve_imports_and_packages(
if artifact_repository != _ANACONDA_SHARED_REPOSITORY:
# Non-conda artifact repository - skip conda-based package resolution
resolved_packages = []
if not packages and session:
if packages is None and session:
resolved_packages = list(
session._resolve_packages(
[],
Expand All @@ -1256,7 +1256,7 @@ def resolve_imports_and_packages(
include_pandas=is_pandas_udf,
)
)
elif packages:
elif packages is not None:
if not all(isinstance(package, str) for package in packages):
raise TypeError(
"Non-conda artifact repository requires that all packages be passed as str."
Expand Down
11 changes: 9 additions & 2 deletions src/snowflake/snowpark/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

"""Context module for Snowpark."""
import logging
import sys
from typing import Callable, Optional

import snowflake.snowpark
Expand Down Expand Up @@ -168,8 +169,14 @@

# The fully qualified name of the Anaconda shared repository (conda channel).
_ANACONDA_SHARED_REPOSITORY = "snowflake.snowpark.anaconda_shared_repository"
# In case of failures or the current default artifact repository is unset, we fallback to this
_DEFAULT_ARTIFACT_REPOSITORY = _ANACONDA_SHARED_REPOSITORY
# The fully qualified name of the PyPI shared repository (pypi channel).
_PYPI_SHARED_REPOSITORY = "snowflake.snowpark.pypi_shared_repository"
# In case of failures and for routing to the right session package store, we use this
_DEFAULT_ARTIFACT_REPOSITORY = (
_ANACONDA_SHARED_REPOSITORY
if sys.version_info < (3, 14)
else _PYPI_SHARED_REPOSITORY
)


def configure_development_features(
Expand Down
8 changes: 8 additions & 0 deletions tests/integ/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,14 @@ def test_schema(connection, local_testing_mode) -> None:
cursor.execute(
f"GRANT ALL PRIVILEGES ON SCHEMA {TEST_SCHEMA} TO ROLE PUBLIC"
)
# TODO: delete once pypi becomes default for 3.14
if sys.version_info.major == 3 and sys.version_info.minor == 14:
cursor.execute(
"alter session set ENABLE_DEFAULT_PYTHON_ARTIFACT_REPOSITORY=true"
)
cursor.execute(
"alter schema set DEFAULT_PYTHON_ARTIFACT_REPOSITORY=snowflake.snowpark.pypi_shared_repository"
)
yield
cursor.execute(f"DROP SCHEMA IF EXISTS {TEST_SCHEMA}")

Expand Down
10 changes: 9 additions & 1 deletion tests/integ/datasource/test_databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
databricks_unicode_schema,
databricks_double_quoted_schema,
)
from tests.utils import IS_IN_STORED_PROC, IS_MACOS, Utils
from tests.utils import IS_IN_STORED_PROC, IS_MACOS, Utils, IS_PY314

DEPENDENCIES_PACKAGE_UNAVAILABLE = True
try:
Expand Down Expand Up @@ -177,6 +177,10 @@ def test_double_quoted_column_databricks(session, custom_schema):
[("table", TEST_TABLE_NAME), ("query", f"(SELECT * FROM {TEST_TABLE_NAME})")],
)
@pytest.mark.udf
@pytest.mark.skipif(
IS_PY314,
reason="databricks-sql-connector's thrift dependency has no Python 3.14 wheel on PyPI; server-side UDTF install fails on the default repo.",
)
def test_udtf_ingestion_databricks(session, input_type, input_value, caplog):
# we define here to avoid test_databricks.py to be pickled and unpickled in UDTF
def local_create_databricks_connection():
Expand Down Expand Up @@ -286,6 +290,10 @@ def test_session_init(session):
)


@pytest.mark.skipif(
IS_PY314,
reason="databricks-sql-connector's thrift dependency has no Python 3.14 wheel on PyPI; server-side UDTF install fails on the default repo.",
)
def test_session_init_udtf(session):
udtf_configs = {
"external_access_integration": DATABRICKS_TEST_EXTERNAL_ACCESS_INTEGRATION
Expand Down
45 changes: 39 additions & 6 deletions tests/integ/datasource/test_sql_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,14 @@
from snowflake.snowpark._internal.data_source.utils import DBMS_TYPE

from tests.parameters import SQL_SERVER_CONNECTION_PARAMETERS
from tests.utils import IS_IN_STORED_PROC, Utils, IS_WINDOWS, IS_MACOS, RUNNING_ON_GH
from tests.utils import (
IS_IN_STORED_PROC,
Utils,
IS_WINDOWS,
IS_MACOS,
RUNNING_ON_GH,
IS_PY314,
)
from tests.resources.test_data_source_dir.test_sql_server_data import (
SQL_SERVER_TABLE_NAME,
EXPECTED_TEST_DATA,
Expand Down Expand Up @@ -145,6 +152,10 @@ def test_sql_server_ingestion(
),
],
)
@pytest.mark.skipif(
IS_PY314,
reason="msodbcsql is not available on PyPI, so the server-side UDTF install fails on Python 3.14's default PyPI artifact repository.",
)
def test_sql_server_udtf_ingestion(
session, input_type, table_name, expected_data, expected_schema, apply_order
):
Expand Down Expand Up @@ -182,20 +193,24 @@ def local_create_connection_sql_server():
[
("table", "NONEXISTTABLE", "Invalid object name", None),
("query", "SELEC ** FORM TABLE", "Incorrect syntax near", None),
(
pytest.param(
"table",
"NONEXISTTABLE",
"Invalid object name",
SQL_SERVER_TEST_EXTERNAL_ACCESS_INTEGRATION,
),
(
pytest.param(
"query",
"SELEC ** FORM TABLE",
"Incorrect syntax near",
SQL_SERVER_TEST_EXTERNAL_ACCESS_INTEGRATION,
),
],
)
@pytest.mark.skipif(
IS_PY314,
reason="msodbcsql is not available on PyPI, so the server-side UDTF install fails on Python 3.14's default PyPI artifact repository.",
)
def test_error_case(session, input_type, input_value, error_message, udtf_configs):
# Use local connection function when udtf_configs is provided
if udtf_configs:
Expand Down Expand Up @@ -229,9 +244,15 @@ def connection_func():
"udtf_configs",
[
None,
SQL_SERVER_TEST_EXTERNAL_ACCESS_INTEGRATION,
pytest.param(
SQL_SERVER_TEST_EXTERNAL_ACCESS_INTEGRATION,
),
],
)
@pytest.mark.skipif(
IS_PY314,
reason="msodbcsql is not available on PyPI, so the server-side UDTF install fails on Python 3.14's default PyPI artifact repository.",
)
def test_partitions_and_predicates(session, udtf_configs):
# Use local connection function when udtf_configs is provided
if udtf_configs:
Expand Down Expand Up @@ -298,9 +319,15 @@ def connection_func():
"udtf_configs",
[
None,
SQL_SERVER_TEST_EXTERNAL_ACCESS_INTEGRATION,
pytest.param(
SQL_SERVER_TEST_EXTERNAL_ACCESS_INTEGRATION,
),
],
)
@pytest.mark.skipif(
IS_PY314,
reason="msodbcsql is not available on PyPI, so the server-side UDTF install fails on Python 3.14's default PyPI artifact repository.",
)
def test_session_init_statement(session, udtf_configs):
# Use local connection function when udtf_configs is provided
if udtf_configs:
Expand Down Expand Up @@ -360,9 +387,15 @@ def test_pyodbc_driver_class_builder():
"udtf_configs",
[
None,
SQL_SERVER_TEST_EXTERNAL_ACCESS_INTEGRATION,
pytest.param(
SQL_SERVER_TEST_EXTERNAL_ACCESS_INTEGRATION,
),
],
)
@pytest.mark.skipif(
IS_PY314,
reason="msodbcsql is not available on PyPI, so the server-side UDTF install fails on Python 3.14's default PyPI artifact repository.",
)
def test_sql_server_with_connection_parameters(session, udtf_configs):
"""Test connection_parameters with local/default ingestion and UDTF ingestion."""

Expand Down
Loading
Loading