Skip to content

Commit 1d21ca5

Browse files
authored
Merge pull request #63 from DataKitchen/standalone
Standalone install + Python 3.13 upgrade
2 parents c2294a6 + 01c4d59 commit 1d21ca5

38 files changed

Lines changed: 660 additions & 237 deletions

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ source venv/bin/activate
101101

102102
_On Windows_
103103
```powershell
104-
py -3.12 -m venv venv
104+
py -3.13 -m venv venv
105105
venv\Scripts\activate
106106
```
107107

deploy/install_linuxodbc.sh

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,16 @@ fi
3131
openssl x509 -inform DER -in cert.crt -out /usr/local/share/ca-certificates/microsoft_tls_g2_ecc_ocsp_02.pem
3232
update-ca-certificates
3333

34-
# Download the desired packages
34+
# Download the ODBC driver (msodbcsql18) only — mssql-tools18 (sqlcmd, bcp, iusql)
35+
# is not needed at runtime and triggers false-positive secret findings in security scans
3536
curl -O https://download.microsoft.com/download/9dcab408-e0d4-4571-a81a-5a0951e3445f/msodbcsql18_18.6.1.1-1_$architecture.apk
36-
curl -O https://download.microsoft.com/download/b60bb8b6-d398-4819-9950-2e30cf725fb0/mssql-tools18_18.6.1.1-1_$architecture.apk
3737

3838
# Verify signature, if 'gpg' is missing install it using 'apk add gnupg':
3939
curl -O https://download.microsoft.com/download/9dcab408-e0d4-4571-a81a-5a0951e3445f/msodbcsql18_18.6.1.1-1_$architecture.sig
40-
curl -O https://download.microsoft.com/download/b60bb8b6-d398-4819-9950-2e30cf725fb0/mssql-tools18_18.6.1.1-1_$architecture.sig
4140

4241
curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg
4342
gpgv --keyring ./microsoft.gpg msodbcsql18_*.sig msodbcsql18_*.apk
44-
gpgv --keyring ./microsoft.gpg mssql-tools18_*.sig mssql-tools18_*.apk
4543

46-
# Install the packages
44+
# Install the ODBC driver
4745
apk add --no-cache --allow-untrusted msodbcsql18_18.6.1.1-1_$architecture.apk
48-
apk add --no-cache --allow-untrusted mssql-tools18_18.6.1.1-1_$architecture.apk
4946
)

deploy/testgen-base.dockerfile

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:3.12-alpine3.23
1+
FROM python:3.13-alpine3.23
22

33
ENV LANG=C.UTF-8
44
ENV LC_ALL=C.UTF-8
@@ -47,12 +47,12 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip==26.0
4747
# We download the wheel for the correct arch, then extract it directly into site-packages
4848
# (wheels are zip files). gcompat provides the glibc shim needed at runtime.
4949
RUN ARCH=$(uname -m) && \
50-
pip download --platform manylinux2014_${ARCH} --python-version 3.12 --only-binary :all: \
50+
pip download --platform manylinux2014_${ARCH} --python-version 3.13 --only-binary :all: \
5151
--no-deps -d /tmp/wheels hdbcli==2.25.31 && \
52-
python3 -m zipfile -e /tmp/wheels/hdbcli-*.whl /dk/lib/python3.12/site-packages/ && \
52+
python3 -m zipfile -e /tmp/wheels/hdbcli-*.whl /dk/lib/python3.13/site-packages/ && \
5353
# Copy dist-info to system site-packages so pip sees hdbcli as installed during
5454
# dependency resolution (sqlalchemy-hana transitively depends on hdbcli~=2.10)
55-
cp -r /dk/lib/python3.12/site-packages/hdbcli-*.dist-info \
55+
cp -r /dk/lib/python3.13/site-packages/hdbcli-*.dist-info \
5656
"$(python3 -c 'import sysconfig; print(sysconfig.get_path("purelib"))')"/ && \
5757
rm -rf /tmp/wheels
5858

@@ -78,4 +78,8 @@ RUN apk del \
7878
unixodbc-dev \
7979
apache-arrow-dev
8080

81+
# Remove interactive ODBC tools — not needed at runtime, and iusql triggers
82+
# false-positive secret detection in security scanners (SECRET-3010)
83+
RUN rm -f /usr/bin/iusql /usr/bin/isql
84+
8185
RUN rm -rf /root/.cache/pip /tmp/dk/install_linuxodbc.sh

deploy/testgen.dockerfile

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,15 @@ ARG TESTGEN_VERSION
77
ARG TESTGEN_DOCKER_HUB_REPO
88
ARG TESTGEN_SUPPORT_EMAIL
99

10-
ENV PYTHONPATH=/dk/lib/python3.12/site-packages
10+
ENV PYTHONPATH=/dk/lib/python3.13/site-packages
1111
ENV PATH=$PATH:/dk/bin
1212

1313
RUN apk upgrade
1414

15+
# Remove interactive ODBC tools — not needed at runtime, and iusql triggers
16+
# false-positive secret detection in security scanners (SECRET-3010)
17+
RUN rm -f /usr/bin/iusql /usr/bin/isql
18+
1519
# Now install everything (hdbcli is pre-installed in the base image via manual wheel extraction)
1620
COPY . /tmp/dk/
1721
RUN sed -i '/hdbcli/d' /tmp/dk/pyproject.toml /tmp/dk/testgen/pyproject.toml 2>/dev/null; \
@@ -20,7 +24,7 @@ RUN sed -i '/hdbcli/d' /tmp/dk/pyproject.toml /tmp/dk/testgen/pyproject.toml 2>/
2024
# Generate third-party license notices from installed packages
2125
RUN pip install --no-cache-dir pip-licenses \
2226
&& SCRIPT=$(find /tmp/dk -name generate_third_party_notices.py | head -1) \
23-
&& PYTHONPATH=/dk/lib/python3.12/site-packages python3 "$SCRIPT" --output /dk/THIRD-PARTY-NOTICES \
27+
&& PYTHONPATH=/dk/lib/python3.13/site-packages python3 "$SCRIPT" --output /dk/THIRD-PARTY-NOTICES \
2428
&& pip uninstall -y pip-licenses
2529

2630
RUN rm -Rf /tmp/dk /root/.cache/pip
@@ -31,7 +35,7 @@ RUN addgroup -S testgen && adduser -S testgen -G testgen
3135

3236
# Streamlit has to be able to write to these dirs
3337
RUN mkdir /var/lib/testgen
34-
RUN chown -R testgen:testgen /var/lib/testgen /dk/lib/python3.12/site-packages/streamlit/static /dk/lib/python3.12/site-packages/testgen/ui/components/frontend
38+
RUN chown -R testgen:testgen /var/lib/testgen /dk/lib/python3.13/site-packages/streamlit/static /dk/lib/python3.13/site-packages/testgen/ui/components/frontend
3539

3640
ENV TESTGEN_VERSION=${TESTGEN_VERSION}
3741
ENV TESTGEN_DOCKER_HUB_REPO=${TESTGEN_DOCKER_HUB_REPO}

docs/local_development.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@ From the root of your local repository, create and activate a virtual environmen
2727

2828
_On Linux/Mac_
2929
```shell
30-
python3.12 -m venv venv
30+
python3.13 -m venv venv
3131
source venv/bin/activate
3232
```
3333

3434
_On Windows_
3535
```powershell
36-
py -3.12 -m venv venv
36+
py -3.13 -m venv venv
3737
venv\Scripts\activate
3838
```
3939

pyproject.toml

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,30 +21,31 @@ classifiers = [
2121
"License :: OSI Approved :: Apache Software License",
2222
"Development Status :: 5 - Production/Stable",
2323
"Operating System :: OS Independent",
24-
"Programming Language :: Python :: 3.12",
24+
"Programming Language :: Python :: 3.13",
2525
"Topic :: System :: Monitoring",
2626
]
2727
keywords = [ "dataops", "data", "quality", "testing", "database", "profiling" ]
2828
requires-python = ">=3.12"
2929

3030
dependencies = [
31-
"PyYAML==6.0.1",
32-
"click==8.1.3",
33-
"sqlalchemy==1.4.46",
34-
"databricks-sql-connector==2.9.3",
31+
"PyYAML==6.0.3",
32+
"click==8.3.1",
33+
"sqlalchemy==2.0.48",
34+
"databricks-sql-connector==4.2.5",
35+
"databricks-sqlalchemy==2.0.9",
3536
"databricks-sdk>=0.20.0",
3637
"snowflake-sqlalchemy==1.9.0",
37-
"sqlalchemy-bigquery==1.14.1",
38+
"sqlalchemy-bigquery==1.16.0",
3839
"oracledb==3.4.0",
3940
"hdbcli==2.25.31",
40-
"sqlalchemy-hana==2.1.0",
41-
"pyodbc==5.0.0",
42-
"psycopg2-binary==2.9.9",
41+
"sqlalchemy-hana==4.4.0",
42+
"pyodbc==5.2.0",
43+
"psycopg2-binary==2.9.11",
4344
"pycryptodome==3.21",
4445
"prettytable==3.7.0",
4546
"requests_extensions==1.1.3",
46-
"numpy==1.26.4",
47-
"pandas==2.1.4",
47+
"numpy==2.1.3",
48+
"pandas==2.2.3",
4849
"streamlit==1.55.0",
4950
"streamlit-extras==0.3.0",
5051
"streamlit-aggrid==0.3.4.post3",
@@ -87,6 +88,10 @@ dependencies = [
8788
]
8889

8990
[project.optional-dependencies]
91+
standalone = [
92+
"pixeltable-pgserver>=0.5.1",
93+
]
94+
9095
dev = [
9196
"invoke==2.2.0",
9297
"ruff==0.4.1",
@@ -165,7 +170,7 @@ filterwarnings = [
165170
# for an explanation of their functionality.
166171
# WARNING: When changing mypy configurations, be sure to test them after removing your .mypy_cache
167172
[tool.mypy]
168-
python_version = "3.12"
173+
python_version = "3.13"
169174
check_untyped_defs = true
170175
disallow_untyped_decorators = true
171176
disallow_untyped_defs = true

testgen/__main__.py

Lines changed: 136 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1+
import base64
2+
import importlib
13
import logging
24
import os
5+
import platform
6+
import secrets
37
import signal
48
import subprocess
59
import sys
610
from dataclasses import dataclass, field
711
from datetime import UTC, datetime, timedelta
12+
from importlib.metadata import version as pkg_version
13+
from pathlib import Path
814

915
import click
1016
from click.core import Context
@@ -42,6 +48,13 @@
4248
get_tg_schema,
4349
version_service,
4450
)
51+
from testgen.common.standalone_postgres import (
52+
STANDALONE_URI_ENV_VAR,
53+
get_home_dir as get_testgen_home,
54+
get_server_uri,
55+
is_standalone_mode,
56+
start_server as start_standalone_postgres,
57+
)
4558
from testgen.common.models import with_database_session
4659
from testgen.common.models.profiling_run import ProfilingRun
4760
from testgen.common.models.settings import PersistedSetting
@@ -99,19 +112,23 @@ def invoke(self, ctx: Context):
99112
)
100113
@click.pass_context
101114
def cli(ctx: Context, verbose: bool):
115+
if is_standalone_mode():
116+
start_standalone_postgres()
117+
102118
if verbose:
103119
configure_logging(level=logging.DEBUG)
104120
else:
105121
configure_logging(level=logging.INFO)
106122

107123
ctx.obj = Configuration(verbose=verbose)
108-
status_ok, message = docker_service.check_basic_configuration()
109-
if not status_ok:
110-
click.secho(message, fg="red")
111-
sys.exit(1)
124+
if not is_standalone_mode() and ctx.invoked_subcommand != "standalone-setup":
125+
status_ok, message = docker_service.check_basic_configuration()
126+
if not status_ok:
127+
click.secho(message, fg="red")
128+
sys.exit(1)
112129

113130
if (
114-
ctx.invoked_subcommand not in ["run-app", "ui", "setup-system-db", "upgrade-system-version", "quick-start"]
131+
ctx.invoked_subcommand not in ["run-app", "ui", "setup-system-db", "upgrade-system-version", "quick-start", "standalone-setup"]
115132
and not is_db_revision_up_to_date()
116133
):
117134
click.secho("The system database schema is outdated. Automatically running the following command:", fg="red")
@@ -472,6 +489,110 @@ def quick_start(
472489
click.echo("Quick start has successfully finished.")
473490

474491

492+
@cli.command("standalone-setup", help="Set up TestGen for standalone use with embedded PostgreSQL (no Docker required).")
493+
@click.option("--username", prompt="Admin username", default="admin", help="Username for the TestGen web UI.")
494+
@click.option(
495+
"--password", prompt="Admin password", hide_input=True, confirmation_prompt=True,
496+
default="testgen", help="Password for the TestGen web UI.",
497+
)
498+
def setup_standalone(username: str, password: str):
499+
config_dir = get_testgen_home()
500+
config_path = config_dir / "config.env"
501+
502+
if config_path.exists():
503+
if not click.confirm(f"Config already exists at {config_path}. Overwrite?"):
504+
click.echo("Aborted.")
505+
return
506+
507+
# Generate secrets (same approach as dk-installer)
508+
def generate_secret(length: int = 12) -> str:
509+
alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
510+
return "".join(secrets.choice(alphabet) for _ in range(length))
511+
512+
jwt_key = base64.b64encode(secrets.token_bytes(32)).decode()
513+
decrypt_salt = generate_secret()
514+
decrypt_password = generate_secret()
515+
log_dir = str(config_dir / "log")
516+
517+
config_dir.mkdir(parents=True, exist_ok=True)
518+
519+
config_lines = [
520+
"# TestGen standalone configuration",
521+
"# Generated by: testgen standalone-setup",
522+
"",
523+
"# Standalone mode (embedded PostgreSQL)",
524+
"TG_STANDALONE_MODE=yes",
525+
"",
526+
"# UI credentials",
527+
f"TESTGEN_USERNAME={username}",
528+
f"TESTGEN_PASSWORD={password}",
529+
"",
530+
"# Encryption keys",
531+
f"TG_DECRYPT_SALT={decrypt_salt}",
532+
f"TG_DECRYPT_PASSWORD={decrypt_password}",
533+
f"TG_JWT_HASHING_KEY={jwt_key}",
534+
"",
535+
"# Logging",
536+
f"TESTGEN_LOG_FILE_PATH={log_dir}",
537+
"",
538+
"# Analytics",
539+
"TG_ANALYTICS=yes",
540+
"",
541+
"# Trust target database certificates (for SQL Server, etc.)",
542+
"TG_TARGET_DB_TRUST_SERVER_CERTIFICATE=yes",
543+
"TG_EXPORT_TO_OBSERVABILITY_VERIFY_SSL=no",
544+
]
545+
config_path.write_text("\n".join(config_lines) + "\n")
546+
click.echo(f"Config written to {config_path}")
547+
548+
# Reload settings — the module was already evaluated at import time
549+
# before the config file existed. Reloading re-reads the new file
550+
# and re-evaluates all module-level variables.
551+
importlib.reload(settings)
552+
553+
# Patch Streamlit to support editable-install component resolution
554+
click.echo("Patching Streamlit...")
555+
from testgen.ui.scripts.patch_streamlit import patch as patch_streamlit
556+
patch_streamlit(dev=True)
557+
558+
# Start embedded PostgreSQL (standalone mode is now active via config)
559+
start_standalone_postgres()
560+
561+
# Initialize the database
562+
click.echo("Initializing database...")
563+
run_launch_db_config(delete_db=False)
564+
565+
# Send analytics event for pip install tracking
566+
try:
567+
from testgen.common.mixpanel_service import MixpanelService
568+
569+
mp = MixpanelService()
570+
mp.send_event(
571+
"standalone_setup",
572+
username=username,
573+
install_type="standalone",
574+
version=pkg_version("dataops-testgen"),
575+
python_info=f"{platform.python_implementation()} {platform.python_version()}",
576+
**{"$os": platform.system()},
577+
os_version=platform.release(),
578+
os_arch=platform.machine(),
579+
)
580+
except Exception: # noqa: S110
581+
pass
582+
583+
click.echo("")
584+
click.echo(click.style("TestGen is ready!", fg="green", bold=True))
585+
click.echo("")
586+
click.echo(" To load demo data (optional):")
587+
click.echo(" testgen quick-start")
588+
click.echo("")
589+
click.echo(" Start the application:")
590+
click.echo(" testgen run-app")
591+
click.echo("")
592+
click.echo(" Then open http://localhost:8501 in your browser.")
593+
click.echo(f" Log in with username: {username}")
594+
595+
475596
@cli.command("setup-system-db", help="Use to initialize the TestGen system database.")
476597
@click.option(
477598
"--delete-db",
@@ -728,6 +849,15 @@ def init_ui():
728849
init_ui()
729850

730851
app_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ui/app.py")
852+
853+
# In standalone mode, pass the pgserver URI to the Streamlit subprocess
854+
# so it can connect without acquiring the pgserver file lock.
855+
child_env = {**os.environ, "TG_JOB_SOURCE": "UI"}
856+
if is_standalone_mode():
857+
server_uri = get_server_uri()
858+
if server_uri:
859+
child_env = {**os.environ, "TG_JOB_SOURCE": "UI", STANDALONE_URI_ENV_VAR: server_uri}
860+
731861
process= subprocess.Popen(
732862
[ # noqa: S607
733863
"streamlit",
@@ -742,7 +872,7 @@ def init_ui():
742872
"--",
743873
f"{'--debug' if settings.IS_DEBUG else ''}",
744874
],
745-
env={**os.environ, "TG_JOB_SOURCE": "UI"}
875+
env=child_env,
746876
)
747877
def term_ui(signum, _):
748878
LOG.info(f"Sending termination signal {signum} to Testgen UI")

testgen/commands/queries/refresh_data_chars_query.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def get_row_counts(self, table_names: Iterable[str]) -> list[tuple[str, None]]:
113113
schema = self.table_group.table_group_schema
114114
quote = self.flavor_service.quote_character
115115
count_queries = [
116-
f"SELECT '{table}', COUNT(*) FROM {quote}{schema}{quote}.{quote}{table}{quote}"
116+
f"SELECT '{table}' AS table_name, COUNT(*) AS row_count FROM {quote}{schema}{quote}.{quote}{table}{quote}"
117117
for table in table_names
118118
]
119119
chunked_queries = chunk_queries(count_queries, " UNION ALL ", self.connection.max_query_chars)

0 commit comments

Comments
 (0)