From 0eabd8d213c41744857aa922ff79786b6ec3a0c1 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Wed, 3 Sep 2025 15:07:02 -0700 Subject: [PATCH 1/6] add test --- tests/integ/datasource/test_data_source_sp.py | 361 ++++++++++++++++++ 1 file changed, 361 insertions(+) create mode 100644 tests/integ/datasource/test_data_source_sp.py diff --git a/tests/integ/datasource/test_data_source_sp.py b/tests/integ/datasource/test_data_source_sp.py new file mode 100644 index 0000000000..5c80f35e16 --- /dev/null +++ b/tests/integ/datasource/test_data_source_sp.py @@ -0,0 +1,361 @@ +# +# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved. +# + +import datetime +import functools +import logging +import os +import tempfile + +import pytest + +from tests.integ.conftest import RUNNING_ON_GH +from tests.resources.test_data_source_dir.test_data_source_data import ( + sqlite3_db, + create_connection_to_sqlite3_db, + SQLITE3_DB_CUSTOM_SCHEMA_STRING, +) + + +pytestmark = [ + pytest.mark.skipif( + "config.getoption('local_testing_mode', default=False)", + reason="feature not available in local testing", + ), + pytest.mark.skipif( + RUNNING_ON_GH, + reason="tests only suppose to run on snowfort", + ), +] + + +@pytest.mark.parametrize("fetch_with_process", [True, False]) +def test_dbapi_local(session, caplog, fetch_with_process): + with tempfile.TemporaryDirectory() as temp_dir: + dbpath = os.path.join(temp_dir, "testsqlite3.db") + table_name, _, _, assert_data = sqlite3_db(dbpath) + with caplog.at_level(logging.DEBUG): + df = session.read.dbapi( + functools.partial(create_connection_to_sqlite3_db, dbpath), + table=table_name, + custom_schema=SQLITE3_DB_CUSTOM_SCHEMA_STRING, + fetch_size=2, + fetch_merge_count=2, + fetch_with_process=fetch_with_process, + ) + assert df.order_by("ID").collect() == assert_data + # 2 batch + 2 fetch size = 2 parquet file + assert caplog.text.count("Retrieved BytesIO parquet from queue") == 2 + + +def test_dbapi_udtf(session, caplog): + udtf_configs = { + "external_access_integration": "snowpark_dbapi_oracledb_test_integration" + } + test_datetime = datetime.datetime(2021, 1, 2, 12, 34, 56) + test_date = test_datetime.date() + test_time = test_datetime.time() + table_name = "PrimitiveTypes" + example_data = [ + ( + 1, + 42, + 3.14, + "Hello, world!", + b"\x00\x01\x02\x03".hex(), + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "1", + ), + ( + 2, + -10, + 2.718, + "SQLite", + b"\x04\x05\x06\x07".hex(), + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "2", + ), + ( + 3, + 9999, + -0.99, + "Python", + b"\x08\x09\x0A\x0B".hex(), + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "3", + ), + ( + 4, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F".hex(), + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "4", + ), + ( + 5, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F".hex(), + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "5", + ), + ( + 6, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F".hex(), + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "6", + ), + ( + 7, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F".hex(), + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "7", + ), + ] + expected_data = [ + ( + 1, + 42, + 3.14, + "Hello, world!", + b"\x00\x01\x02\x03", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"1"', + ), + ( + 2, + -10, + 2.718, + "SQLite", + b"\x04\x05\x06\x07", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"2"', + ), + ( + 3, + 9999, + -0.99, + "Python", + b"\x08\x09\x0A\x0B", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"3"', + ), + ( + 4, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"4"', + ), + ( + 5, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"5"', + ), + ( + 6, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"6"', + ), + ( + 7, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"7"', + ), + ] + + def create_connection_sqlite3(): + import sqlite3 + + conn = sqlite3.connect(":memory:") + cursor = conn.cursor() + # Create a table with different primitive types + # sqlite3 only supports 5 types: NULL, INTEGER, REAL, TEXT, BLOB + cursor.execute( + f""" + CREATE TABLE IF NOT EXISTS {table_name} ( + id INTEGER PRIMARY KEY, -- Auto-incrementing primary key + int_col INTEGER, -- Integer column + real_col REAL, -- Floating point column + text_col TEXT, -- String column + blob_col BLOB, -- Binary data column + null_col NULL, -- Explicit NULL type (for testing purposes) + ts_col TEXT, -- Timestamp column in TEXT format + date_col TEXT, -- Date column in TEXT format + time_col TEXT, -- Time column in TEXT format + short_col INTEGER, -- Short integer column + long_col INTEGER, -- Long integer column + double_col REAL, -- Double column + decimal_col REAL, -- Decimal column + map_col TEXT, -- Map column in TEXT format + array_col TEXT, -- Array column in TEXT format + var_col TEXT -- Variant column in TEXT format + ) + """ + ) + + cursor.executemany( + f"INSERT INTO {table_name} VALUES ({','.join('?' * 16)})", example_data + ) + conn.commit() + return conn + + with caplog.at_level(logging.DEBUG): + df = session.read.dbapi( + create_connection_sqlite3, + table=table_name, + custom_schema=SQLITE3_DB_CUSTOM_SCHEMA_STRING, + fetch_size=2, + udtf_configs=udtf_configs, + ) + assert df.order_by("ID").collect() == expected_data From 22537090918f4dcb4fdac11c03f42732a44cfcd4 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Thu, 4 Sep 2025 15:50:02 -0700 Subject: [PATCH 2/6] fix test --- tests/integ/datasource/test_data_source_sp.py | 327 +++++++++++++++++- 1 file changed, 321 insertions(+), 6 deletions(-) diff --git a/tests/integ/datasource/test_data_source_sp.py b/tests/integ/datasource/test_data_source_sp.py index 5c80f35e16..2b77ccd475 100644 --- a/tests/integ/datasource/test_data_source_sp.py +++ b/tests/integ/datasource/test_data_source_sp.py @@ -9,13 +9,328 @@ import tempfile import pytest +import sqlite3 -from tests.integ.conftest import RUNNING_ON_GH -from tests.resources.test_data_source_dir.test_data_source_data import ( - sqlite3_db, - create_connection_to_sqlite3_db, - SQLITE3_DB_CUSTOM_SCHEMA_STRING, -) +from tests.utils import RUNNING_ON_GH + +SQLITE3_DB_CUSTOM_SCHEMA_STRING = "id INTEGER, int_col INTEGER, real_col FLOAT, text_col STRING, blob_col BINARY, null_col STRING, ts_col TIMESTAMP, date_col DATE, time_col TIME, short_col SHORT, long_col LONG, double_col DOUBLE, decimal_col DECIMAL, map_col MAP, array_col ARRAY, var_col VARIANT" + + +def create_connection_to_sqlite3_db(db_path): + return sqlite3.connect(db_path) + + +def sqlite3_db(db_path): + conn = create_connection_to_sqlite3_db(db_path) + cursor = conn.cursor() + table_name = "PrimitiveTypes" + columns = [ + "id", + "int_col", + "real_col", + "text_col", + "blob_col", + "null_col", + "ts_col", + "date_col", + "time_col", + "short_col", + "long_col", + "double_col", + "decimal_col", + "map_col", + "array_col", + "var_col", + ] + # Create a table with different primitive types + # sqlite3 only supports 5 types: NULL, INTEGER, REAL, TEXT, BLOB + cursor.execute( + f""" + CREATE TABLE IF NOT EXISTS {table_name} ( + id INTEGER PRIMARY KEY, -- Auto-incrementing primary key + int_col INTEGER, -- Integer column + real_col REAL, -- Floating point column + text_col TEXT, -- String column + blob_col BLOB, -- Binary data column + null_col NULL, -- Explicit NULL type (for testing purposes) + ts_col TEXT, -- Timestamp column in TEXT format + date_col TEXT, -- Date column in TEXT format + time_col TEXT, -- Time column in TEXT format + short_col INTEGER, -- Short integer column + long_col INTEGER, -- Long integer column + double_col REAL, -- Double column + decimal_col REAL, -- Decimal column + map_col TEXT, -- Map column in TEXT format + array_col TEXT, -- Array column in TEXT format + var_col TEXT -- Variant column in TEXT format + ) + """ + ) + test_datetime = datetime.datetime(2021, 1, 2, 12, 34, 56) + test_date = test_datetime.date() + test_time = test_datetime.time() + example_data = [ + ( + 1, + 42, + 3.14, + "Hello, world!", + b"\x00\x01\x02\x03", + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "1", + ), + ( + 2, + -10, + 2.718, + "SQLite", + b"\x04\x05\x06\x07", + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "2", + ), + ( + 3, + 9999, + -0.99, + "Python", + b"\x08\x09\x0A\x0B", + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "3", + ), + ( + 4, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F", + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "4", + ), + ( + 5, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F", + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "5", + ), + ( + 6, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F", + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "6", + ), + ( + 7, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F", + None, + test_datetime.isoformat(), + test_date.isoformat(), + test_time.isoformat(), + 1, + 2, + 3.0, + 4.0, + '{"a": 1, "b": 2}', + "[1, 2, 3]", + "7", + ), + ] + assert_data = [ + ( + 1, + 42, + 3.14, + "Hello, world!", + b"\x00\x01\x02\x03", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"1"', + ), + ( + 2, + -10, + 2.718, + "SQLite", + b"\x04\x05\x06\x07", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"2"', + ), + ( + 3, + 9999, + -0.99, + "Python", + b"\x08\x09\x0A\x0B", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"3"', + ), + ( + 4, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"4"', + ), + ( + 5, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"5"', + ), + ( + 6, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"6"', + ), + ( + 7, + 0, + 123.456, + "Data", + b"\x0C\x0D\x0E\x0F", + None, + test_datetime, + test_date, + test_time, + 1, + 2, + 3.0, + 4.0, + '{\n "a": 1,\n "b": 2\n}', + '[\n "[1, 2, 3]"\n]', + '"7"', + ), + ] + cursor.executemany( + f"INSERT INTO {table_name} VALUES ({','.join('?' * 16)})", example_data + ) + conn.commit() + conn.close() + return table_name, columns, example_data, assert_data pytestmark = [ From b3dd4c469c1b4087a70935bcb89a81a3f2e9b922 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Tue, 9 Sep 2025 14:23:46 -0700 Subject: [PATCH 3/6] remove integration --- tests/integ/datasource/test_data_source_sp.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integ/datasource/test_data_source_sp.py b/tests/integ/datasource/test_data_source_sp.py index 2b77ccd475..57394ccb1e 100644 --- a/tests/integ/datasource/test_data_source_sp.py +++ b/tests/integ/datasource/test_data_source_sp.py @@ -365,9 +365,7 @@ def test_dbapi_local(session, caplog, fetch_with_process): def test_dbapi_udtf(session, caplog): - udtf_configs = { - "external_access_integration": "snowpark_dbapi_oracledb_test_integration" - } + udtf_configs = {"external_access_integration": ""} test_datetime = datetime.datetime(2021, 1, 2, 12, 34, 56) test_date = test_datetime.date() test_time = test_datetime.time() From e482ca8c42a8cc22debe57e97edc57f27b68927d Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Wed, 10 Sep 2025 09:54:46 -0700 Subject: [PATCH 4/6] fix test --- tests/integ/datasource/test_data_source_sp.py | 536 ++++-------------- 1 file changed, 122 insertions(+), 414 deletions(-) diff --git a/tests/integ/datasource/test_data_source_sp.py b/tests/integ/datasource/test_data_source_sp.py index 57394ccb1e..e649803144 100644 --- a/tests/integ/datasource/test_data_source_sp.py +++ b/tests/integ/datasource/test_data_source_sp.py @@ -4,13 +4,13 @@ import datetime import functools -import logging import os import tempfile import pytest import sqlite3 +from snowflake.snowpark import Row from tests.utils import RUNNING_ON_GH SQLITE3_DB_CUSTOM_SCHEMA_STRING = "id INTEGER, int_col INTEGER, real_col FLOAT, text_col STRING, blob_col BINARY, null_col STRING, ts_col TIMESTAMP, date_col DATE, time_col TIME, short_col SHORT, long_col LONG, double_col DOUBLE, decimal_col DECIMAL, map_col MAP, array_col ARRAY, var_col VARIANT" @@ -124,205 +124,61 @@ def sqlite3_db(db_path): "[1, 2, 3]", "3", ), - ( - 4, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F", - None, - test_datetime.isoformat(), - test_date.isoformat(), - test_time.isoformat(), - 1, - 2, - 3.0, - 4.0, - '{"a": 1, "b": 2}', - "[1, 2, 3]", - "4", - ), - ( - 5, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F", - None, - test_datetime.isoformat(), - test_date.isoformat(), - test_time.isoformat(), - 1, - 2, - 3.0, - 4.0, - '{"a": 1, "b": 2}', - "[1, 2, 3]", - "5", - ), - ( - 6, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F", - None, - test_datetime.isoformat(), - test_date.isoformat(), - test_time.isoformat(), - 1, - 2, - 3.0, - 4.0, - '{"a": 1, "b": 2}', - "[1, 2, 3]", - "6", - ), - ( - 7, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F", - None, - test_datetime.isoformat(), - test_date.isoformat(), - test_time.isoformat(), - 1, - 2, - 3.0, - 4.0, - '{"a": 1, "b": 2}', - "[1, 2, 3]", - "7", - ), ] assert_data = [ - ( - 1, - 42, - 3.14, - "Hello, world!", - b"\x00\x01\x02\x03", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"1"', + Row( + ID=1, + INT_COL=42, + REAL_COL=3.14, + TEXT_COL="Hello, world!", + BLOB_COL=bytearray(b"\x00\x01\x02\x03"), + NULL_COL=None, + TS_COL=datetime.datetime(2021, 1, 2, 12, 34, 56), + DATE_COL=datetime.date(2021, 1, 2), + TIME_COL=datetime.time(12, 34, 56), + SHORT_COL=1, + LONG_COL=2, + DOUBLE_COL=3.0, + DECIMAL_COL=4, + MAP_COL='{\n "a": 1,\n "b": 2\n}', + ARRAY_COL='[\n "[1, 2, 3]"\n]', + VAR_COL='"1"', ), - ( - 2, - -10, - 2.718, - "SQLite", - b"\x04\x05\x06\x07", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"2"', + Row( + ID=2, + INT_COL=-10, + REAL_COL=2.718, + TEXT_COL="SQLite", + BLOB_COL=bytearray(b"\x04\x05\x06\x07"), + NULL_COL=None, + TS_COL=datetime.datetime(2021, 1, 2, 12, 34, 56), + DATE_COL=datetime.date(2021, 1, 2), + TIME_COL=datetime.time(12, 34, 56), + SHORT_COL=1, + LONG_COL=2, + DOUBLE_COL=3.0, + DECIMAL_COL=4, + MAP_COL='{\n "a": 1,\n "b": 2\n}', + ARRAY_COL='[\n "[1, 2, 3]"\n]', + VAR_COL='"2"', ), - ( - 3, - 9999, - -0.99, - "Python", - b"\x08\x09\x0A\x0B", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"3"', - ), - ( - 4, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"4"', - ), - ( - 5, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"5"', - ), - ( - 6, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"6"', - ), - ( - 7, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"7"', + Row( + ID=3, + INT_COL=9999, + REAL_COL=-0.99, + TEXT_COL="Python", + BLOB_COL=bytearray(b"\x08\t\n\x0b"), + NULL_COL=None, + TS_COL=datetime.datetime(2021, 1, 2, 12, 34, 56), + DATE_COL=datetime.date(2021, 1, 2), + TIME_COL=datetime.time(12, 34, 56), + SHORT_COL=1, + LONG_COL=2, + DOUBLE_COL=3.0, + DECIMAL_COL=4, + MAP_COL='{\n "a": 1,\n "b": 2\n}', + ARRAY_COL='[\n "[1, 2, 3]"\n]', + VAR_COL='"3"', ), ] cursor.executemany( @@ -346,25 +202,22 @@ def sqlite3_db(db_path): @pytest.mark.parametrize("fetch_with_process", [True, False]) -def test_dbapi_local(session, caplog, fetch_with_process): +def test_dbapi_local(session, fetch_with_process): with tempfile.TemporaryDirectory() as temp_dir: dbpath = os.path.join(temp_dir, "testsqlite3.db") table_name, _, _, assert_data = sqlite3_db(dbpath) - with caplog.at_level(logging.DEBUG): - df = session.read.dbapi( - functools.partial(create_connection_to_sqlite3_db, dbpath), - table=table_name, - custom_schema=SQLITE3_DB_CUSTOM_SCHEMA_STRING, - fetch_size=2, - fetch_merge_count=2, - fetch_with_process=fetch_with_process, - ) - assert df.order_by("ID").collect() == assert_data - # 2 batch + 2 fetch size = 2 parquet file - assert caplog.text.count("Retrieved BytesIO parquet from queue") == 2 + df = session.read.dbapi( + functools.partial(create_connection_to_sqlite3_db, dbpath), + table=table_name, + custom_schema=SQLITE3_DB_CUSTOM_SCHEMA_STRING, + fetch_size=2, + fetch_merge_count=2, + fetch_with_process=fetch_with_process, + ) + assert df.order_by("ID").collect() == assert_data -def test_dbapi_udtf(session, caplog): +def test_dbapi_udtf(session): udtf_configs = {"external_access_integration": ""} test_datetime = datetime.datetime(2021, 1, 2, 12, 34, 56) test_date = test_datetime.date() @@ -425,205 +278,61 @@ def test_dbapi_udtf(session, caplog): "[1, 2, 3]", "3", ), - ( - 4, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F".hex(), - None, - test_datetime.isoformat(), - test_date.isoformat(), - test_time.isoformat(), - 1, - 2, - 3.0, - 4.0, - '{"a": 1, "b": 2}', - "[1, 2, 3]", - "4", - ), - ( - 5, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F".hex(), - None, - test_datetime.isoformat(), - test_date.isoformat(), - test_time.isoformat(), - 1, - 2, - 3.0, - 4.0, - '{"a": 1, "b": 2}', - "[1, 2, 3]", - "5", - ), - ( - 6, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F".hex(), - None, - test_datetime.isoformat(), - test_date.isoformat(), - test_time.isoformat(), - 1, - 2, - 3.0, - 4.0, - '{"a": 1, "b": 2}', - "[1, 2, 3]", - "6", - ), - ( - 7, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F".hex(), - None, - test_datetime.isoformat(), - test_date.isoformat(), - test_time.isoformat(), - 1, - 2, - 3.0, - 4.0, - '{"a": 1, "b": 2}', - "[1, 2, 3]", - "7", - ), ] expected_data = [ - ( - 1, - 42, - 3.14, - "Hello, world!", - b"\x00\x01\x02\x03", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"1"', + Row( + ID=1, + INT_COL=42, + REAL_COL=3.14, + TEXT_COL="Hello, world!", + BLOB_COL=bytearray(b"\x00\x01\x02\x03"), + NULL_COL=None, + TS_COL=datetime.datetime(2021, 1, 2, 12, 34, 56), + DATE_COL=datetime.date(2021, 1, 2), + TIME_COL=datetime.time(12, 34, 56), + SHORT_COL=1, + LONG_COL=2, + DOUBLE_COL=3.0, + DECIMAL_COL=4, + MAP_COL='{\n "a": 1,\n "b": 2\n}', + ARRAY_COL='[\n "[1, 2, 3]"\n]', + VAR_COL='"1"', ), - ( - 2, - -10, - 2.718, - "SQLite", - b"\x04\x05\x06\x07", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"2"', + Row( + ID=2, + INT_COL=-10, + REAL_COL=2.718, + TEXT_COL="SQLite", + BLOB_COL=bytearray(b"\x04\x05\x06\x07"), + NULL_COL=None, + TS_COL=datetime.datetime(2021, 1, 2, 12, 34, 56), + DATE_COL=datetime.date(2021, 1, 2), + TIME_COL=datetime.time(12, 34, 56), + SHORT_COL=1, + LONG_COL=2, + DOUBLE_COL=3.0, + DECIMAL_COL=4, + MAP_COL='{\n "a": 1,\n "b": 2\n}', + ARRAY_COL='[\n "[1, 2, 3]"\n]', + VAR_COL='"2"', ), - ( - 3, - 9999, - -0.99, - "Python", - b"\x08\x09\x0A\x0B", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"3"', - ), - ( - 4, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"4"', - ), - ( - 5, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"5"', - ), - ( - 6, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"6"', - ), - ( - 7, - 0, - 123.456, - "Data", - b"\x0C\x0D\x0E\x0F", - None, - test_datetime, - test_date, - test_time, - 1, - 2, - 3.0, - 4.0, - '{\n "a": 1,\n "b": 2\n}', - '[\n "[1, 2, 3]"\n]', - '"7"', + Row( + ID=3, + INT_COL=9999, + REAL_COL=-0.99, + TEXT_COL="Python", + BLOB_COL=bytearray(b"\x08\t\n\x0b"), + NULL_COL=None, + TS_COL=datetime.datetime(2021, 1, 2, 12, 34, 56), + DATE_COL=datetime.date(2021, 1, 2), + TIME_COL=datetime.time(12, 34, 56), + SHORT_COL=1, + LONG_COL=2, + DOUBLE_COL=3.0, + DECIMAL_COL=4, + MAP_COL='{\n "a": 1,\n "b": 2\n}', + ARRAY_COL='[\n "[1, 2, 3]"\n]', + VAR_COL='"3"', ), ] @@ -663,12 +372,11 @@ def create_connection_sqlite3(): conn.commit() return conn - with caplog.at_level(logging.DEBUG): - df = session.read.dbapi( - create_connection_sqlite3, - table=table_name, - custom_schema=SQLITE3_DB_CUSTOM_SCHEMA_STRING, - fetch_size=2, - udtf_configs=udtf_configs, - ) - assert df.order_by("ID").collect() == expected_data + df = session.read.dbapi( + create_connection_sqlite3, + table=table_name, + custom_schema=SQLITE3_DB_CUSTOM_SCHEMA_STRING, + fetch_size=2, + udtf_configs=udtf_configs, + ) + assert df.order_by("ID").collect() == expected_data From 4eaa33dd6cdbb89e48b50acd29b07ade3a4208e4 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Fri, 12 Sep 2025 10:46:33 -0700 Subject: [PATCH 5/6] fix test --- tests/integ/datasource/test_data_source_sp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/datasource/test_data_source_sp.py b/tests/integ/datasource/test_data_source_sp.py index e649803144..29cc65c83a 100644 --- a/tests/integ/datasource/test_data_source_sp.py +++ b/tests/integ/datasource/test_data_source_sp.py @@ -13,7 +13,7 @@ from snowflake.snowpark import Row from tests.utils import RUNNING_ON_GH -SQLITE3_DB_CUSTOM_SCHEMA_STRING = "id INTEGER, int_col INTEGER, real_col FLOAT, text_col STRING, blob_col BINARY, null_col STRING, ts_col TIMESTAMP, date_col DATE, time_col TIME, short_col SHORT, long_col LONG, double_col DOUBLE, decimal_col DECIMAL, map_col MAP, array_col ARRAY, var_col VARIANT" +SQLITE3_DB_CUSTOM_SCHEMA_STRING = "id INTEGER, int_col INTEGER, real_col FLOAT, text_col STRING, blob_col BINARY, null_col STRING, ts_col TIMESTAMP_NTZ, date_col DATE, time_col TIME, short_col SHORT, long_col LONG, double_col DOUBLE, decimal_col DECIMAL, map_col MAP, array_col ARRAY, var_col VARIANT" def create_connection_to_sqlite3_db(db_path): From bd5c3f31a672e8cc10e021ff9471ec6662881705 Mon Sep 17 00:00:00 2001 From: Yuyang Wang Date: Mon, 15 Sep 2025 10:26:20 -0700 Subject: [PATCH 6/6] reformat schema string --- tests/integ/datasource/test_data_source_sp.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/integ/datasource/test_data_source_sp.py b/tests/integ/datasource/test_data_source_sp.py index 29cc65c83a..6ab6954172 100644 --- a/tests/integ/datasource/test_data_source_sp.py +++ b/tests/integ/datasource/test_data_source_sp.py @@ -13,7 +13,24 @@ from snowflake.snowpark import Row from tests.utils import RUNNING_ON_GH -SQLITE3_DB_CUSTOM_SCHEMA_STRING = "id INTEGER, int_col INTEGER, real_col FLOAT, text_col STRING, blob_col BINARY, null_col STRING, ts_col TIMESTAMP_NTZ, date_col DATE, time_col TIME, short_col SHORT, long_col LONG, double_col DOUBLE, decimal_col DECIMAL, map_col MAP, array_col ARRAY, var_col VARIANT" +SQLITE3_DB_CUSTOM_SCHEMA_STRING = """ +id INTEGER, +int_col INTEGER, +real_col FLOAT, +text_col STRING, +blob_col BINARY, +null_col STRING, +ts_col TIMESTAMP_NTZ, +date_col DATE, +time_col TIME, +short_col SHORT, +long_col LONG, +double_col DOUBLE, +decimal_col DECIMAL, +map_col MAP, +array_col ARRAY, +var_col VARIANT +""" def create_connection_to_sqlite3_db(db_path):