Skip to content

Commit 01c4d59

Browse files
committed
refactor: upgrade to python 3.13
1 parent 767bd16 commit 01c4d59

29 files changed

Lines changed: 205 additions & 133 deletions

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ source venv/bin/activate
101101

102102
_On Windows_
103103
```powershell
104-
py -3.12 -m venv venv
104+
py -3.13 -m venv venv
105105
venv\Scripts\activate
106106
```
107107

deploy/install_linuxodbc.sh

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,16 @@ fi
3131
openssl x509 -inform DER -in cert.crt -out /usr/local/share/ca-certificates/microsoft_tls_g2_ecc_ocsp_02.pem
3232
update-ca-certificates
3333

34-
# Download the desired packages
34+
# Download the ODBC driver (msodbcsql18) only — mssql-tools18 (sqlcmd, bcp, iusql)
35+
# is not needed at runtime and triggers false-positive secret findings in security scans
3536
curl -O https://download.microsoft.com/download/9dcab408-e0d4-4571-a81a-5a0951e3445f/msodbcsql18_18.6.1.1-1_$architecture.apk
36-
curl -O https://download.microsoft.com/download/b60bb8b6-d398-4819-9950-2e30cf725fb0/mssql-tools18_18.6.1.1-1_$architecture.apk
3737

3838
# Verify signature, if 'gpg' is missing install it using 'apk add gnupg':
3939
curl -O https://download.microsoft.com/download/9dcab408-e0d4-4571-a81a-5a0951e3445f/msodbcsql18_18.6.1.1-1_$architecture.sig
40-
curl -O https://download.microsoft.com/download/b60bb8b6-d398-4819-9950-2e30cf725fb0/mssql-tools18_18.6.1.1-1_$architecture.sig
4140

4241
curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg
4342
gpgv --keyring ./microsoft.gpg msodbcsql18_*.sig msodbcsql18_*.apk
44-
gpgv --keyring ./microsoft.gpg mssql-tools18_*.sig mssql-tools18_*.apk
4543

46-
# Install the packages
44+
# Install the ODBC driver
4745
apk add --no-cache --allow-untrusted msodbcsql18_18.6.1.1-1_$architecture.apk
48-
apk add --no-cache --allow-untrusted mssql-tools18_18.6.1.1-1_$architecture.apk
4946
)

deploy/testgen-base.dockerfile

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:3.12-alpine3.23
1+
FROM python:3.13-alpine3.23
22

33
ENV LANG=C.UTF-8
44
ENV LC_ALL=C.UTF-8
@@ -47,12 +47,12 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip==26.0
4747
# We download the wheel for the correct arch, then extract it directly into site-packages
4848
# (wheels are zip files). gcompat provides the glibc shim needed at runtime.
4949
RUN ARCH=$(uname -m) && \
50-
pip download --platform manylinux2014_${ARCH} --python-version 3.12 --only-binary :all: \
50+
pip download --platform manylinux2014_${ARCH} --python-version 3.13 --only-binary :all: \
5151
--no-deps -d /tmp/wheels hdbcli==2.25.31 && \
52-
python3 -m zipfile -e /tmp/wheels/hdbcli-*.whl /dk/lib/python3.12/site-packages/ && \
52+
python3 -m zipfile -e /tmp/wheels/hdbcli-*.whl /dk/lib/python3.13/site-packages/ && \
5353
# Copy dist-info to system site-packages so pip sees hdbcli as installed during
5454
# dependency resolution (sqlalchemy-hana transitively depends on hdbcli~=2.10)
55-
cp -r /dk/lib/python3.12/site-packages/hdbcli-*.dist-info \
55+
cp -r /dk/lib/python3.13/site-packages/hdbcli-*.dist-info \
5656
"$(python3 -c 'import sysconfig; print(sysconfig.get_path("purelib"))')"/ && \
5757
rm -rf /tmp/wheels
5858

@@ -78,4 +78,8 @@ RUN apk del \
7878
unixodbc-dev \
7979
apache-arrow-dev
8080

81+
# Remove interactive ODBC tools — not needed at runtime, and iusql triggers
82+
# false-positive secret detection in security scanners (SECRET-3010)
83+
RUN rm -f /usr/bin/iusql /usr/bin/isql
84+
8185
RUN rm -rf /root/.cache/pip /tmp/dk/install_linuxodbc.sh

deploy/testgen.dockerfile

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,15 @@ ARG TESTGEN_VERSION
77
ARG TESTGEN_DOCKER_HUB_REPO
88
ARG TESTGEN_SUPPORT_EMAIL
99

10-
ENV PYTHONPATH=/dk/lib/python3.12/site-packages
10+
ENV PYTHONPATH=/dk/lib/python3.13/site-packages
1111
ENV PATH=$PATH:/dk/bin
1212

1313
RUN apk upgrade
1414

15+
# Remove interactive ODBC tools — not needed at runtime, and iusql triggers
16+
# false-positive secret detection in security scanners (SECRET-3010)
17+
RUN rm -f /usr/bin/iusql /usr/bin/isql
18+
1519
# Now install everything (hdbcli is pre-installed in the base image via manual wheel extraction)
1620
COPY . /tmp/dk/
1721
RUN sed -i '/hdbcli/d' /tmp/dk/pyproject.toml /tmp/dk/testgen/pyproject.toml 2>/dev/null; \
@@ -20,7 +24,7 @@ RUN sed -i '/hdbcli/d' /tmp/dk/pyproject.toml /tmp/dk/testgen/pyproject.toml 2>/
2024
# Generate third-party license notices from installed packages
2125
RUN pip install --no-cache-dir pip-licenses \
2226
&& SCRIPT=$(find /tmp/dk -name generate_third_party_notices.py | head -1) \
23-
&& PYTHONPATH=/dk/lib/python3.12/site-packages python3 "$SCRIPT" --output /dk/THIRD-PARTY-NOTICES \
27+
&& PYTHONPATH=/dk/lib/python3.13/site-packages python3 "$SCRIPT" --output /dk/THIRD-PARTY-NOTICES \
2428
&& pip uninstall -y pip-licenses
2529

2630
RUN rm -Rf /tmp/dk /root/.cache/pip
@@ -31,7 +35,7 @@ RUN addgroup -S testgen && adduser -S testgen -G testgen
3135

3236
# Streamlit has to be able to write to these dirs
3337
RUN mkdir /var/lib/testgen
34-
RUN chown -R testgen:testgen /var/lib/testgen /dk/lib/python3.12/site-packages/streamlit/static /dk/lib/python3.12/site-packages/testgen/ui/components/frontend
38+
RUN chown -R testgen:testgen /var/lib/testgen /dk/lib/python3.13/site-packages/streamlit/static /dk/lib/python3.13/site-packages/testgen/ui/components/frontend
3539

3640
ENV TESTGEN_VERSION=${TESTGEN_VERSION}
3741
ENV TESTGEN_DOCKER_HUB_REPO=${TESTGEN_DOCKER_HUB_REPO}

docs/local_development.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@ From the root of your local repository, create and activate a virtual environmen
2727

2828
_On Linux/Mac_
2929
```shell
30-
python3.12 -m venv venv
30+
python3.13 -m venv venv
3131
source venv/bin/activate
3232
```
3333

3434
_On Windows_
3535
```powershell
36-
py -3.12 -m venv venv
36+
py -3.13 -m venv venv
3737
venv\Scripts\activate
3838
```
3939

pyproject.toml

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,30 +21,31 @@ classifiers = [
2121
"License :: OSI Approved :: Apache Software License",
2222
"Development Status :: 5 - Production/Stable",
2323
"Operating System :: OS Independent",
24-
"Programming Language :: Python :: 3.12",
24+
"Programming Language :: Python :: 3.13",
2525
"Topic :: System :: Monitoring",
2626
]
2727
keywords = [ "dataops", "data", "quality", "testing", "database", "profiling" ]
2828
requires-python = ">=3.12"
2929

3030
dependencies = [
31-
"PyYAML==6.0.1",
32-
"click==8.1.3",
33-
"sqlalchemy==1.4.46",
34-
"databricks-sql-connector==2.9.3",
31+
"PyYAML==6.0.3",
32+
"click==8.3.1",
33+
"sqlalchemy==2.0.48",
34+
"databricks-sql-connector==4.2.5",
35+
"databricks-sqlalchemy==2.0.9",
3536
"databricks-sdk>=0.20.0",
3637
"snowflake-sqlalchemy==1.9.0",
37-
"sqlalchemy-bigquery==1.14.1",
38+
"sqlalchemy-bigquery==1.16.0",
3839
"oracledb==3.4.0",
3940
"hdbcli==2.25.31",
40-
"sqlalchemy-hana==2.1.0",
41-
"pyodbc==5.0.0",
42-
"psycopg2-binary==2.9.9",
41+
"sqlalchemy-hana==4.4.0",
42+
"pyodbc==5.2.0",
43+
"psycopg2-binary==2.9.11",
4344
"pycryptodome==3.21",
4445
"prettytable==3.7.0",
4546
"requests_extensions==1.1.3",
46-
"numpy==1.26.4",
47-
"pandas==2.1.4",
47+
"numpy==2.1.3",
48+
"pandas==2.2.3",
4849
"streamlit==1.55.0",
4950
"streamlit-extras==0.3.0",
5051
"streamlit-aggrid==0.3.4.post3",
@@ -169,7 +170,7 @@ filterwarnings = [
169170
# for an explanation of their functionality.
170171
# WARNING: When changing mypy configurations, be sure to test them after removing your .mypy_cache
171172
[tool.mypy]
172-
python_version = "3.12"
173+
python_version = "3.13"
173174
check_untyped_defs = true
174175
disallow_untyped_decorators = true
175176
disallow_untyped_defs = true

testgen/commands/queries/refresh_data_chars_query.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def get_row_counts(self, table_names: Iterable[str]) -> list[tuple[str, None]]:
113113
schema = self.table_group.table_group_schema
114114
quote = self.flavor_service.quote_character
115115
count_queries = [
116-
f"SELECT '{table}', COUNT(*) FROM {quote}{schema}{quote}.{quote}{table}{quote}"
116+
f"SELECT '{table}' AS table_name, COUNT(*) AS row_count FROM {quote}{schema}{quote}.{quote}{table}{quote}"
117117
for table in table_names
118118
]
119119
chunked_queries = chunk_queries(count_queries, " UNION ALL ", self.connection.max_query_chars)

testgen/commands/run_refresh_data_chars.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def run_data_chars_refresh(connection: Connection, table_group: TableGroup, run_
3535
count_queries, use_target_db=True, max_threads=connection.max_threads,
3636
)
3737

38-
count_map = dict(count_results)
38+
count_map = {row["table_name"]: row["row_count"] for row in count_results}
3939
for column in data_chars:
4040
column.record_ct = count_map.get(column.table_name)
4141

testgen/common/database/database_service.py

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import csv
33
import importlib
44
import logging
5+
import math
56
import re
67
from collections.abc import Callable, Iterable
78
from contextlib import suppress
@@ -10,11 +11,10 @@
1011
from urllib.parse import quote_plus
1112

1213
import psycopg2.sql
13-
from sqlalchemy import create_engine, text
14-
from sqlalchemy.engine import LegacyRow, RowMapping
15-
from sqlalchemy.engine.base import Connection, Engine
14+
from sqlalchemy import Connection, Engine, Row, create_engine, text
15+
from sqlalchemy.engine import RowMapping
1616
from sqlalchemy.exc import ProgrammingError, SQLAlchemyError
17-
from sqlalchemy.pool.base import _ConnectionFairy
17+
from sqlalchemy.pool import PoolProxiedConnection
1818

1919
from testgen import settings
2020
from testgen.common.credentials import (
@@ -32,8 +32,9 @@
3232
SQLFlavor,
3333
resolve_connection_params,
3434
)
35-
from testgen.common.standalone_postgres import get_connection_string as get_standalone_connection_string, is_standalone_mode
3635
from testgen.common.read_file import get_template_files
36+
from testgen.common.standalone_postgres import get_connection_string as get_standalone_connection_string
37+
from testgen.common.standalone_postgres import is_standalone_mode
3738
from testgen.utils import get_exception_message
3839

3940
LOG = logging.getLogger("testgen")
@@ -103,12 +104,14 @@ def create_database(
103104
) -> None:
104105
LOG.debug("DB operation: create_database on App database (User type = database_admin)")
105106

107+
# DDL like CREATE/DROP DATABASE cannot run inside a transaction.
108+
# Use AUTOCOMMIT isolation so each statement commits immediately.
106109
connection = _init_db_connection(
107110
user_override=params["TESTGEN_ADMIN_USER"],
108111
password_override=params["TESTGEN_ADMIN_PASSWORD"],
109112
user_type="database_admin",
110113
)
111-
connection.execute("commit")
114+
connection = connection.execution_options(isolation_level="AUTOCOMMIT")
112115

113116
with connection:
114117
if drop_existing:
@@ -118,20 +121,16 @@ def create_database(
118121
),
119122
{"database_name": database_name},
120123
)
121-
connection.execute("commit")
122-
connection.execute(f"DROP DATABASE IF EXISTS {database_name}")
123-
connection.execute("commit")
124+
connection.execute(text(f"DROP DATABASE IF EXISTS {database_name}"))
124125
if drop_users_and_roles:
125126
if user := params.get("TESTGEN_USER"):
126-
connection.execute(f"DROP USER IF EXISTS {user}")
127+
connection.execute(text(f"DROP USER IF EXISTS {user}"))
127128
if report_user := params.get("TESTGEN_REPORT_USER"):
128-
connection.execute(f"DROP USER IF EXISTS {report_user}")
129-
connection.execute("DROP ROLE IF EXISTS testgen_execute_role")
130-
connection.execute("DROP ROLE IF EXISTS testgen_report_role")
131-
connection.execute("commit")
129+
connection.execute(text(f"DROP USER IF EXISTS {report_user}"))
130+
connection.execute(text("DROP ROLE IF EXISTS testgen_execute_role"))
131+
connection.execute(text("DROP ROLE IF EXISTS testgen_report_role"))
132132
with suppress(ProgrammingError):
133-
connection.execute(f"CREATE DATABASE {database_name}")
134-
connection.close()
133+
connection.execute(text(f"CREATE DATABASE {database_name}"))
135134

136135

137136
def execute_db_queries(
@@ -150,7 +149,6 @@ def execute_db_queries(
150149
LOG.debug("No queries to process")
151150
for index, (query, params) in enumerate(queries):
152151
LOG.debug(f"Query {index + 1} of {len(queries)}: {query}")
153-
transaction = connection.begin()
154152
result = connection.execute(text(query), params)
155153
row_counts.append(result.rowcount)
156154
if result.rowcount == -1:
@@ -163,7 +161,7 @@ def execute_db_queries(
163161
except Exception:
164162
return_values.append(None)
165163

166-
transaction.commit()
164+
connection.commit()
167165
LOG.debug(message)
168166

169167
return return_values, row_counts
@@ -180,28 +178,28 @@ def fetch_from_db_threaded(
180178
use_target_db: bool = False,
181179
max_threads: int = 4,
182180
progress_callback: Callable[[ThreadedProgress], None] | None = None,
183-
) -> tuple[list[LegacyRow], list[str], dict[int, str]]:
181+
) -> tuple[list[RowMapping], list[str], dict[int, str]]:
184182
LOG.debug(f"DB operation: fetch_from_db_threaded ({len(queries)}) on {'Target' if use_target_db else 'App'} database (User type = normal)")
185183

186-
def fetch_data(query: str, params: dict | None, index: int) -> tuple[list[LegacyRow], list[str], int, str | None]:
184+
def fetch_data(query: str, params: dict | None, index: int) -> tuple[list[RowMapping], list[str], int, str | None]:
187185
LOG.debug(f"Query: {query}")
188-
row_data: list[LegacyRow] = []
186+
row_data: list[RowMapping] = []
189187
column_names: list[str] = []
190188
error = None
191189

192190
try:
193191
with _init_db_connection(use_target_db) as connection:
194192
result = connection.execute(text(query), params)
195193
LOG.debug(f"{result.rowcount} records retrieved")
196-
row_data = result.fetchall()
194+
row_data = result.mappings().fetchall()
197195
column_names = list(result.keys())
198196
except Exception as e:
199197
error = get_exception_message(e)
200198
LOG.exception(f"Failed to execute threaded query: {query}")
201199

202200
return row_data, column_names, index, error
203201

204-
result_data: list[LegacyRow] = []
202+
result_data: list[RowMapping] = []
205203
result_columns: list[str] = []
206204
error_data: dict[int, str] = {}
207205

@@ -241,7 +239,7 @@ def fetch_data(query: str, params: dict | None, index: int) -> tuple[list[Legacy
241239

242240
def fetch_list_from_db(
243241
query: str, params: dict | None = None, use_target_db: bool = False
244-
) -> tuple[list[LegacyRow], list[str]]:
242+
) -> tuple[list[Row], list[str]]:
245243
LOG.debug(f"DB operation: fetch_list_from_db on {'Target' if use_target_db else 'App'} database (User type = normal)")
246244

247245
with _init_db_connection(use_target_db) as connection:
@@ -263,21 +261,29 @@ def fetch_dict_from_db(
263261
LOG.debug(f"Query: {query}")
264262
result = connection.execute(text(query), params)
265263
LOG.debug(f"{result.rowcount} records retrieved")
266-
# Creates list of dictionaries so records are addressible by column name
267-
return [row._mapping for row in result]
264+
return result.mappings().all()
268265

269266

270-
def write_to_app_db(data: list[LegacyRow], column_names: Iterable[str], table_name: str) -> None:
267+
def write_to_app_db(data: list[Row], column_names: Iterable[str], table_name: str) -> None:
271268
LOG.debug("DB operation: write_to_app_db on App database (User type = normal)")
272269

273270
# use_raw is required to make use of the copy_expert method for fast batch ingestion
274271
connection = _init_db_connection(use_raw=True)
275272
cursor = connection.cursor()
276273

277274
# Write List to CSV in memory
275+
# Sanitize NaN → None: some DB connectors (e.g. Databricks via Arrow) return
276+
# float('nan') for NULL integers. CSV would serialize these as "nan" which
277+
# PostgreSQL rejects for numeric columns.
278+
# RowMapping objects iterate over keys, not values — extract values explicitly.
279+
def _row_values(row):
280+
values = row.values() if isinstance(row, RowMapping) else row
281+
return tuple(None if isinstance(v, float) and math.isnan(v) else v for v in values)
282+
283+
sanitized = [_row_values(row) for row in data]
278284
buffer = FilteredStringIO(["\x00"])
279285
writer = csv.writer(buffer, quoting=csv.QUOTE_MINIMAL)
280-
writer.writerows(data)
286+
writer.writerows(sanitized)
281287
buffer.seek(0)
282288

283289
# List should have same column names as destination table, though not all columns in table are required
@@ -362,7 +368,7 @@ def _init_app_db_connection(
362368
password_override: str | None = None,
363369
user_type: UserType = "normal",
364370
use_raw: bool = False,
365-
) -> Connection | _ConnectionFairy:
371+
) -> Connection | PoolProxiedConnection:
366372
database_name = "postgres" if user_type == "database_admin" else get_tg_db()
367373
is_admin = user_type == "database_admin" or user_type == "schema_admin"
368374

@@ -399,7 +405,7 @@ def _init_app_db_connection(
399405
try:
400406
schema_name = "public" if is_admin else get_tg_schema()
401407
if use_raw:
402-
connection: _ConnectionFairy = engine.raw_connection()
408+
connection: PoolProxiedConnection = engine.raw_connection()
403409
with connection.cursor() as cursor:
404410
cursor.execute(
405411
"SET SEARCH_PATH = %(schema_name)s",

testgen/common/database/flavor/mssql_flavor_service.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from urllib.parse import quote_plus
2-
31
from sqlalchemy.engine import URL
42

53
from testgen import settings
@@ -17,7 +15,7 @@ def get_connection_string_from_fields(self, params: ResolvedConnectionParams) ->
1715
connection_url = URL.create(
1816
self.url_scheme,
1917
username=params.username,
20-
password=quote_plus(params.password or ""),
18+
password=params.password or "",
2119
host=params.host,
2220
port=int(params.port or 1443),
2321
database=params.dbname,

0 commit comments

Comments
 (0)