Merge branch 'fix/TG-1101-sqlserver-profiling-and-freshness' into 'enterprise'

ci bot · ci bot · commit b2e9ca606c74 · 2026-06-02T17:27:27.000Z
fix: SQL Server profiling crash and Freshness_Trend generation fixes

See merge request dkinternal/testgen/dataops-testgen!532
diff --git a/pyproject.toml b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "dataops-testgen"
-version = "5.32.2"
+version = "5.33.3"
 description = "DataKitchen's Data Quality DataOps TestGen"
 authors = [
     { "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },
diff --git a/testgen/common/date_service.py b/testgen/common/date_service.py
@@ -62,7 +62,7 @@ def parse_since(since: str, *, today: date | None = None) -> date:
 
 def parse_fuzzy_date(value: str | int) -> datetime | None:
     if type(value) == str:
-        return datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
+        return datetime.fromisoformat(value)
     elif type(value) == int or type(value) == float:
         ts = int(value)
         if ts >= 1e11:
diff --git a/testgen/common/source_data_service.py b/testgen/common/source_data_service.py
@@ -127,7 +127,11 @@ def build_hygiene_query(issue_data: dict, limit: int = DEFAULT_LIMIT) -> str | N
         "TABLE_NAME": issue_data["table_name"],
         "COLUMN_NAME": issue_data["column_name"],
         "DETAIL_EXPRESSION": issue_data["detail"],
-        "PROFILE_RUN_DATE": issue_data["profiling_starttime"],
+        # Date-only string: Oracle/HANA templates use TO_DATE(..., 'YYYY-MM-DD'), which rejects a time
+        # component, and the anomaly criteria boundary is date-based (CURRENT_DATE + INTERVAL '30 year').
+        "PROFILE_RUN_DATE": parsed_run_date.strftime("%Y-%m-%d")
+        if (parsed_run_date := parse_fuzzy_date(issue_data["profiling_starttime"]))
+        else None,
         "LIMIT": limit,
         "LIMIT_2": int(limit / 2),
         "LIMIT_4": int(limit / 4),
diff --git a/testgen/mcp/tools/test_definitions.py b/testgen/mcp/tools/test_definitions.py
@@ -35,7 +35,7 @@
 )
 from testgen.mcp.tools.markdown import MdDoc
 
-_DOC_GROUP = DocGroup.DISCOVER
+_DOC_GROUP = DocGroup.INVESTIGATE
 
 _VALID_SCOPES = {"column", "table", "referential", "custom"}
 
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Column_Pattern_Mismatch.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Column_Pattern_Mismatch.yaml
@@ -15,17 +15,17 @@ profile_anomaly_types:
        AND STRPOS(p.top_patterns, 'N') > 0
        AND (
              ( (STRPOS(p.top_patterns, 'A') > 0 OR STRPOS(p.top_patterns, 'a') > 0)
-               AND SPLIT_PART(p.top_patterns, '|', 3)::NUMERIC / SPLIT_PART(p.top_patterns, '|', 1)::NUMERIC < 0.05)
+               AND NULLIF(SPLIT_PART(p.top_patterns, '|', 3), '')::NUMERIC / NULLIF(SPLIT_PART(p.top_patterns, '|', 1), '')::NUMERIC < 0.05)
             OR
-             SPLIT_PART(p.top_patterns, '|', 3)::NUMERIC / SPLIT_PART(p.top_patterns, '|', 1)::NUMERIC < 0.1
+             NULLIF(SPLIT_PART(p.top_patterns, '|', 3), '')::NUMERIC / NULLIF(SPLIT_PART(p.top_patterns, '|', 1), '')::NUMERIC < 0.1
         )
   detail_expression: |-
     'Patterns: ' || p.top_patterns
   issue_likelihood: Likely
   suggested_action: |-
     Review the values for any data that doesn't conform to the most common pattern and correct any data errors.
   dq_score_prevalence_formula: |-
-    (p.record_ct - SPLIT_PART(p.top_patterns, '|', 1)::BIGINT)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT
+    (p.record_ct - NULLIF(SPLIT_PART(p.top_patterns, '|', 1), '')::BIGINT)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT
   dq_score_risk_factor: '0.66'
   dq_dimension: Validity
   impact_dimension: Usability
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Invalid_Zip3_USA.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Invalid_Zip3_USA.yaml
@@ -9,14 +9,14 @@ profile_anomaly_types:
     p.distinct_pattern_ct > 1
        AND (p.column_name ilike '%zip%' OR p.column_name ILIKE '%postal%')
        AND SPLIT_PART(p.top_patterns, ' | ', 2) = 'NNN'
-       AND SPLIT_PART(p.top_patterns, ' | ', 1)::FLOAT/NULLIF(value_ct, 0)::FLOAT > 0.50
+       AND NULLIF(SPLIT_PART(p.top_patterns, ' | ', 1), '')::FLOAT/NULLIF(value_ct, 0)::FLOAT > 0.50
   detail_expression: |-
     'Pattern: ' || p.top_patterns
   issue_likelihood: Definite
   suggested_action: |-
     Review your source data, ingestion process, and any processing steps that update this column.
   dq_score_prevalence_formula: |-
-    (NULLIF(p.record_ct, 0)::INT - SPLIT_PART(p.top_patterns, ' | ', 1)::BIGINT)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT
+    (NULLIF(p.record_ct, 0)::INT - NULLIF(SPLIT_PART(p.top_patterns, ' | ', 1), '')::BIGINT)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT
   dq_score_risk_factor: '1'
   dq_dimension: Validity
   impact_dimension: Conformance
diff --git a/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Table_Pattern_Mismatch.yaml b/testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Table_Pattern_Mismatch.yaml
@@ -13,7 +13,7 @@ profile_anomaly_types:
                    AND m.max_pattern_ct = 1
                    AND m.column_ct > 1
                    AND SPLIT_PART(p.top_patterns, '|', 2) <> SPLIT_PART(m.very_top_pattern, '|', 2)
-                   AND SPLIT_PART(p.top_patterns, '|', 1)::NUMERIC / SPLIT_PART(m.very_top_pattern, '|', 1)::NUMERIC < 0.1
+                   AND NULLIF(SPLIT_PART(p.top_patterns, '|', 1), '')::NUMERIC / NULLIF(SPLIT_PART(m.very_top_pattern, '|', 1), '')::NUMERIC < 0.1
   detail_expression: |-
     'Patterns: ' || SPLIT_PART(p.top_patterns, '|', 2) || ', ' || SPLIT_PART(ltrim(m.very_top_pattern, '0'), '|', 2)
   issue_likelihood: Likely
diff --git a/testgen/template/flavors/bigquery/gen_query_tests/gen_Freshness_Trend.sql b/testgen/template/flavors/bigquery/gen_query_tests/gen_Freshness_Trend.sql
@@ -75,9 +75,11 @@ tran_date_cols AS (
     ) AS rank
   FROM latest_results
   WHERE general_type IN ('A', 'D', 'N')
-    AND functional_data_type ILIKE 'transactional date%'
-    OR functional_data_type ILIKE 'period%'
-    OR functional_data_type = 'timestamp'
+    AND (
+      functional_data_type ILIKE 'transactional date%'
+      OR functional_data_type ILIKE 'period%'
+      OR functional_data_type = 'timestamp'
+    )
 ),
 -- Numeric Measures
 numeric_cols AS (
diff --git a/testgen/template/flavors/databricks/gen_query_tests/gen_Freshness_Trend.sql b/testgen/template/flavors/databricks/gen_query_tests/gen_Freshness_Trend.sql
@@ -75,9 +75,11 @@ tran_date_cols AS (
     ) AS rank
   FROM latest_results
   WHERE general_type IN ('A', 'D', 'N')
-    AND functional_data_type ILIKE 'transactional date%'
-    OR functional_data_type ILIKE 'period%'
-    OR functional_data_type = 'timestamp'
+    AND (
+      functional_data_type ILIKE 'transactional date%'
+      OR functional_data_type ILIKE 'period%'
+      OR functional_data_type = 'timestamp'
+    )
 ),
 -- Numeric Measures
 numeric_cols AS (
diff --git a/testgen/template/flavors/mssql/gen_query_tests/gen_Freshness_Trend.sql b/testgen/template/flavors/mssql/gen_query_tests/gen_Freshness_Trend.sql
@@ -75,9 +75,11 @@ tran_date_cols AS (
     ) AS rank
   FROM latest_results
   WHERE general_type IN ('A', 'D', 'N')
-    AND functional_data_type ILIKE 'transactional date%'
-    OR functional_data_type ILIKE 'period%'
-    OR functional_data_type = 'timestamp'
+    AND (
+      functional_data_type ILIKE 'transactional date%'
+      OR functional_data_type ILIKE 'period%'
+      OR functional_data_type = 'timestamp'
+    )
 ),
 -- Numeric Measures
 numeric_cols AS (
diff --git a/testgen/template/flavors/oracle/gen_query_tests/gen_Freshness_Trend.sql b/testgen/template/flavors/oracle/gen_query_tests/gen_Freshness_Trend.sql
@@ -75,9 +75,11 @@ tran_date_cols AS (
     ) AS rank
   FROM latest_results
   WHERE general_type IN ('A', 'D', 'N')
-    AND functional_data_type ILIKE 'transactional date%'
-    OR functional_data_type ILIKE 'period%'
-    OR functional_data_type = 'timestamp'
+    AND (
+      functional_data_type ILIKE 'transactional date%'
+      OR functional_data_type ILIKE 'period%'
+      OR functional_data_type = 'timestamp'
+    )
 ),
 -- Numeric Measures
 numeric_cols AS (
diff --git a/testgen/template/flavors/salesforce_data360/gen_query_tests/gen_Freshness_Trend.sql b/testgen/template/flavors/salesforce_data360/gen_query_tests/gen_Freshness_Trend.sql
@@ -75,9 +75,11 @@ tran_date_cols AS (
     ) AS rank
   FROM latest_results
   WHERE general_type IN ('A', 'D', 'N')
-    AND functional_data_type ILIKE 'transactional date%'
-    OR functional_data_type ILIKE 'period%'
-    OR functional_data_type = 'timestamp'
+    AND (
+      functional_data_type ILIKE 'transactional date%'
+      OR functional_data_type ILIKE 'period%'
+      OR functional_data_type = 'timestamp'
+    )
 ),
 -- Numeric Measures
 numeric_cols AS (
diff --git a/testgen/template/flavors/sap_hana/gen_query_tests/gen_Freshness_Trend.sql b/testgen/template/flavors/sap_hana/gen_query_tests/gen_Freshness_Trend.sql
@@ -75,9 +75,11 @@ tran_date_cols AS (
     ) AS rank
   FROM latest_results
   WHERE general_type IN ('A', 'D', 'N')
-    AND functional_data_type ILIKE 'transactional date%'
-    OR functional_data_type ILIKE 'period%'
-    OR functional_data_type = 'timestamp'
+    AND (
+      functional_data_type ILIKE 'transactional date%'
+      OR functional_data_type ILIKE 'period%'
+      OR functional_data_type = 'timestamp'
+    )
 ),
 -- Numeric Measures
 numeric_cols AS (
diff --git a/testgen/template/gen_query_tests/gen_Freshness_Trend.sql b/testgen/template/gen_query_tests/gen_Freshness_Trend.sql
@@ -75,9 +75,11 @@ tran_date_cols AS (
     ) AS rank
   FROM latest_results
   WHERE general_type IN ('A', 'D', 'N')
-    AND functional_data_type ILIKE 'transactional date%'
-    OR functional_data_type ILIKE 'period%'
-    OR functional_data_type = 'timestamp'
+    AND (
+      functional_data_type ILIKE 'transactional date%'
+      OR functional_data_type ILIKE 'period%'
+      OR functional_data_type = 'timestamp'
+    )
 ),
 -- Numeric Measures
 numeric_cols AS (
diff --git a/testgen/ui/app.py b/testgen/ui/app.py
@@ -1,6 +1,7 @@
 import logging
 import os
 from urllib.parse import urlparse
+from uuid import uuid4
 
 import streamlit as st
 
@@ -16,6 +17,8 @@
 from testgen.ui.services.query_cache import select_projects_where
 from testgen.ui.session import session
 
+LOG = logging.getLogger("testgen")
+
 if is_standalone_mode() and (standalone_uri := os.environ.get(STANDALONE_URI_ENV_VAR)):
     ensure_standalone_setup(standalone_uri)
 
@@ -84,6 +87,20 @@ def render(log_level: int = logging.INFO):
                     )
 
         application.router.run()
+    except Exception:
+        # Log the full traceback (tagged with a reference the user can quote) so it lands in app.log,
+        # which the in-app Application Logs dialog reads -- letting users download and share UI errors
+        # instead of needing container logs. Streamlit's rerun/stop signals are BaseException
+        # subclasses, so they pass through uncaught.
+        error_reference = uuid4().hex[:8].upper()
+        LOG.exception(
+            "Unhandled error rendering page '%s' [ref=%s]", session.current_page or "unknown", error_reference
+        )
+        try:
+            _render_error_message(error_reference)
+        except Exception:
+            # Never let the error message itself break the run -- fall back to a bare message.
+            st.error("Something went wrong. Use the menu on the left to navigate to another page.")
     finally:
         # Safety net: commit any flushed-but-uncommitted work (e.g., PersistedSetting writes)
         # before RerunException propagates and bypasses database_session()'s normal commit.
@@ -97,6 +114,18 @@ def render(log_level: int = logging.INFO):
                 db_session.rollback()
 
 
+def _render_error_message(reference: str) -> None:
+    support_email = settings.SUPPORT_EMAIL
+    st.error(
+        "**Something went wrong.**\n\n"
+        "An unexpected error occurred while loading this page. Use the menu on the left to navigate to "
+        "another page.\n\n"
+        "If this keeps happening, download the logs from **Help → Application Logs** and send them to "
+        f"[{support_email}](mailto:{support_email}) with this reference: **{reference}**.",
+        icon=":material/error:",
+    )
+
+
 @st.cache_resource(validate=lambda _: not settings.IS_DEBUG, show_spinner=False)
 def get_application(log_level: int = logging.INFO):
     return bootstrap.run(log_level=log_level)
diff --git a/testgen/ui/components/frontend/js/pages/score_details.js b/testgen/ui/components/frontend/js/pages/score_details.js
@@ -70,7 +70,7 @@ const ScoreDetails = (/** @type {Properties} */ props) => {
                 () => {
                     const score = getValue(props.score);
                     return getValue(props.permissions)?.can_edit ?? false ? div(
-                        { class: 'flex-row tg-test-suites--card-actions' },
+                        { class: 'flex-row tg-score-details--card-actions' },
                         Button({ type: 'icon', icon: 'notifications', tooltip: 'Configure Notifications', onclick: () => emit('EditNotifications', {}) }),
                         Button({ type: 'icon', icon: 'edit', tooltip: 'Edit Scorecard', onclick: () => emit('LinkClicked', { href: 'quality-dashboard:explorer', params: { definition_id: score.id, project_code: score.project_code } }) }),
                         Button({ type: 'icon', icon: 'delete', tooltip: 'Delete Scorecard', onclick: () => { deleteDialogOpen.val = true; } }),
@@ -171,6 +171,10 @@ stylesheet.replace(`
 .tg-score-details {
     min-height: 900px;
 }
+ 
+.tg-score-details--card-actions {
+    margin-top: -10px;
+}
 `);
 
 export { ScoreDetails };
diff --git a/testgen/ui/components/frontend/standalone/project_settings/index.js b/testgen/ui/components/frontend/standalone/project_settings/index.js
@@ -201,16 +201,16 @@ const ProjectSettings = (props) => {
                 content: div(
                     { class: 'flex-column fx-gap-3' },
                     Checkbox({
-                        label: 'Enable data retention',
+                        label: 'Automatically delete old profiling and test history',
                         checked: form.data_retention_enabled,
-                        help: 'Automatically delete old profiling and test run data to keep your database lean. The most recent run in each suite or table group is always preserved.',
+                        help: 'Old profiling and test runs are permanently deleted to keep the database from growing without bound. The most recent run in each test suite and table group is always kept.',
                         onChange: (checked) => { form.data_retention_enabled.val = checked; },
                     }),
                     () => form.data_retention_enabled.val
                         ? div(
                             { class: 'flex-column fx-gap-3' },
                             Input({
-                                label: 'Retention period (days)',
+                                label: 'Delete history older than (days)',
                                 value: form.data_retention_days,
                                 type: 'number',
                                 step: 1,
diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py
@@ -59,7 +59,7 @@ def build_summary_table(document, tr_data):
             *[
                 (cmd[0], *coords, *cmd[1:])
                 for coords in (
-                    ((3, 3), (3, -3)),
+                    ((3, 3), (3, -4)),
                     ((0, 0), (0, -2))
                 )
                 for cmd in (
@@ -83,10 +83,11 @@ def build_summary_table(document, tr_data):
             ("SPAN", (1, 6), (2, 6)),
             ("SPAN", (4, 6), (5, 6)),
             ("SPAN", (1, 7), (5, 7)),
-            ("SPAN", (0, 8), (5, 8)),
+            ("SPAN", (1, 8), (5, 8)),
+            ("SPAN", (0, 9), (5, 9)),
 
             # Link cell
-            ("BACKGROUND", (0, 8), (5, 8), colors.white),
+            ("BACKGROUND", (0, 9), (5, 9), colors.white),
 
             # Measure cell
             ("FONT", (1, 1), (1, 1), "Helvetica-Bold"),
diff --git a/testgen/ui/static/js/components/score_card.js b/testgen/ui/static/js/components/score_card.js
@@ -90,7 +90,7 @@ const ScoreCard = (score, actions, options) => {
                     : '',
                 (score_.cde_score && categories.length > 0) ? i({ class: 'mr-4 ml-4' }) : '',
                 categories.length > 0 ? div(
-                    { class: 'flex-column' },
+                    { class: 'flex-column tg-score-card--breakdown' },
                     span({ class: 'mb-2 text-caption' }, categoriesLabel),
                     div(
                         { class: 'tg-score-card--categories' },
@@ -164,13 +164,17 @@ stylesheet.replace(`
     margin-bottom: unset !important;
 }
 
+.tg-score-card--breakdown {
+    margin-top: -12px;
+}
+
 .tg-score-card--categories {
     display: flex;
     flex-direction: column;
     flex-wrap: wrap;
-    row-gap: 8px;
+    row-gap: 4px;
     column-gap: 16px;
-    max-height: 100px;
+    max-height: 140px;
     overflow-y: auto;
 }
 .tg-score-card--categories > div {
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
@@ -260,7 +260,7 @@ def on_view_source_data(row_id: str) -> None:
             anomaly_df = profiling_queries.get_profiling_anomalies_by_ids([row_id])
             if anomaly_df.empty:
                 return
-            row = make_json_safe(anomaly_df.where(anomaly_df.notna(), None).to_dict(orient="records")[0])
+            row = anomaly_df.where(anomaly_df.notna(), None).to_dict(orient="records")[0]
 
             MixpanelService().send_event(
                 "view-source-data",
@@ -335,10 +335,7 @@ def on_download_report(payload: dict) -> None:
             anomaly_df = profiling_queries.get_profiling_anomalies_by_ids(ids)
             if anomaly_df.empty:
                 return
-            selected_items = [
-                make_json_safe(record)
-                for record in anomaly_df.where(anomaly_df.notna(), None).to_dict(orient="records")
-            ]
+            selected_items = anomaly_df.where(anomaly_df.notna(), None).to_dict(orient="records")
 
             MixpanelService().send_event(
                 "download-issue-report",
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
@@ -28,7 +28,7 @@
 from testgen.ui.services.query_cache import get_profiling_run_minimal
 from testgen.ui.session import session
 from testgen.ui.views.data_catalog import get_preview_data
-from testgen.utils import make_json_safe
+from testgen.utils import dataframe_to_json_records, make_json_safe
 
 PAGE_SIZE = 500
 
@@ -163,15 +163,14 @@ def render(
             pii_columns = get_pii_columns(str(run.table_groups_id))
             mask_profiling_pii(df, pii_columns)
 
-        # Use pandas JSON serialization to safely handle NaN/NaT -> null, timestamps -> epoch seconds
-        items = json.loads(df.to_json(orient="records", date_unit="s"))
+        items = dataframe_to_json_records(df)
 
         selected_item = st.session_state.get(SELECTED_ITEM_KEY)
         # Load selected item if URL has a selection but session cache is missing or stale
         if selected and (selected_item is None or selected_item.get("id") != selected):
             row_df = df[df["id"] == selected]
             if not row_df.empty:
-                row = json.loads(row_df.to_json(orient="records", date_unit="s"))[0]
+                row = dataframe_to_json_records(row_df)[0]
                 row["hygiene_issues"] = profiling_queries.get_hygiene_issues(
                     run_id, row["table_name"], row.get("column_name")
                 )
@@ -189,7 +188,7 @@ def on_row_selected(item_id: str) -> None:
             row_df = df[df["id"] == item_id]
             if row_df.empty:
                 return
-            row = json.loads(row_df.to_json(orient="records", date_unit="s"))[0]
+            row = dataframe_to_json_records(row_df)[0]
             row["hygiene_issues"] = profiling_queries.get_hygiene_issues(
                 run_id, row["table_name"], row.get("column_name")
             )
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
diff --git a/testgen/utils/__init__.py b/testgen/utils/__init__.py
diff --git a/tests/unit/common/test_date_service.py b/tests/unit/common/test_date_service.py
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py

Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,7 @@`
`35`	`35`	`)`
`36`	`36`	`from testgen.mcp.tools.markdown import MdDoc`
`37`	`37`
`38`		`-_DOC_GROUP = DocGroup.DISCOVER`
	`38`	`+_DOC_GROUP = DocGroup.INVESTIGATE`
`39`	`39`
`40`	`40`	`_VALID_SCOPES = {"column", "table", "referential", "custom"}`
`41`	`41`