Skip to content

Commit 02e594e

Browse files
authored
SNOW-2356405: dbapi pyodbc and udtf fixes (#3805)
1 parent 921bc70 commit 02e594e

12 files changed

Lines changed: 750 additions & 36 deletions

File tree

127 Bytes
Binary file not shown.

.github/workflows/precommit.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,14 @@ jobs:
207207
SNOWPARK_PYTHON_API_S3_STORAGE_INTEGRATION: ${{ vars.SNOWPARK_PYTHON_API_S3_STORAGE_INTEGRATION }}
208208
TOX_PARALLEL_NO_SPINNER: 1
209209
shell: bash
210+
- name: Install MS ODBC Driver (Ubuntu only)
211+
if: ${{ contains(matrix.os, 'ubuntu') }}
212+
run: |
213+
curl https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add -
214+
curl https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list
215+
sudo apt-get update
216+
sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18 unixodbc-dev
217+
shell: bash
210218
- name: Run data source tests
211219
# psycopg2 is not supported on macos 3.9
212220
# SNOW-2213578: Re-enable the test for 3.13

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,16 @@
7777
- `st_asgeojson`
7878
- `st_aswkb`
7979

80+
#### Bug Fixes
81+
82+
- Fixed multiple bugs in `DataFrameReader.dbapi` (PuPr):
83+
- Fixed UDTF ingestion failure with `pyodbc` driver caused by unprocessed row data.
84+
- Fixed SQL Server query input failure due to incorrect select query generation.
85+
- Fixed UDTF ingestion not preserving column nullability in the output schema.
86+
87+
#### Improvements
88+
89+
- Improved `DataFrameReader.dbapi` (PuPr) reading performance by setting the default `fetch_size` parameter value to 100000.
8090

8191
### Snowpark pandas API Updates
8292

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,37 @@
11
#
22
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
33
#
4+
from typing import List
5+
46
from snowflake.snowpark._internal.data_source.dbms_dialects import BaseDialect
7+
from snowflake.snowpark._internal.data_source.dbms_dialects.base_dialect import (
8+
QUERY_TEMPLATE,
9+
)
10+
from snowflake.snowpark._internal.utils import quote_name
11+
from snowflake.snowpark.types import StructType
512

613

714
class SqlServerDialect(BaseDialect):
8-
pass
15+
def generate_select_query(
16+
self,
17+
table_or_query: str,
18+
schema: StructType,
19+
raw_schema: List[tuple],
20+
is_query: bool,
21+
query_input_alias: str,
22+
) -> str:
23+
cols = []
24+
for _field, raw_field in zip(schema.fields, raw_schema):
25+
field_name = (
26+
f"{query_input_alias}.{quote_name(raw_field[0], keep_case=True)}"
27+
if is_query
28+
else f"{quote_name(raw_field[0], keep_case=True)}"
29+
)
30+
cols.append(f"{field_name} AS {raw_field[0]}") if is_query else cols.append(
31+
field_name
32+
)
33+
return QUERY_TEMPLATE.format(
34+
cols=", ".join(cols),
35+
table_or_query=f"({table_or_query})" if is_query else table_or_query,
36+
query_input_alias=query_input_alias if is_query else "",
37+
)

src/snowflake/snowpark/_internal/data_source/drivers/base_driver.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,10 @@ def to_result_snowpark_df_udtf(
274274
res_df[field.name].cast(field.datatype).alias(field.name)
275275
for field in schema.fields
276276
]
277-
return res_df.select(cols, _emit_ast=_emit_ast)
277+
selected_df = res_df.select(cols, _emit_ast=_emit_ast)
278+
for attr, source_field in zip(selected_df._plan.attributes, schema.fields):
279+
attr.nullable = source_field.nullable
280+
return selected_df
278281

279282
def get_server_cursor_if_supported(self, conn: "Connection") -> "Cursor":
280283
"""

src/snowflake/snowpark/_internal/data_source/drivers/pyodbc_driver.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def process(self, query: str):
114114
rows = cursor.fetchmany(fetch_size)
115115
if not rows:
116116
break
117-
yield from rows
117+
yield from map(tuple, rows)
118118

119119
return UDTFIngestion
120120

src/snowflake/snowpark/dataframe_reader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1695,7 +1695,7 @@ def dbapi(
16951695
num_partitions: Optional[int] = None,
16961696
max_workers: Optional[int] = None,
16971697
query_timeout: Optional[int] = 0,
1698-
fetch_size: Optional[int] = 1000,
1698+
fetch_size: Optional[int] = 100000,
16991699
custom_schema: Optional[Union[str, StructType]] = None,
17001700
predicates: Optional[List[str]] = None,
17011701
session_init_statement: Optional[Union[str, List[str]]] = None,

0 commit comments

Comments
 (0)