Skip to content

Commit b2e9ca6

Browse files
author
ci bot
committed
Merge branch 'fix/TG-1101-sqlserver-profiling-and-freshness' into 'enterprise'
fix: SQL Server profiling crash and Freshness_Trend generation fixes See merge request dkinternal/testgen/dataops-testgen!532
2 parents b2a0a0e + 6c95948 commit b2e9ca6

26 files changed

Lines changed: 168 additions & 63 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
88

99
[project]
1010
name = "dataops-testgen"
11-
version = "5.32.2"
11+
version = "5.33.3"
1212
description = "DataKitchen's Data Quality DataOps TestGen"
1313
authors = [
1414
{ "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },

testgen/common/date_service.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def parse_since(since: str, *, today: date | None = None) -> date:
6262

6363
def parse_fuzzy_date(value: str | int) -> datetime | None:
6464
if type(value) == str:
65-
return datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
65+
return datetime.fromisoformat(value)
6666
elif type(value) == int or type(value) == float:
6767
ts = int(value)
6868
if ts >= 1e11:

testgen/common/source_data_service.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,11 @@ def build_hygiene_query(issue_data: dict, limit: int = DEFAULT_LIMIT) -> str | N
127127
"TABLE_NAME": issue_data["table_name"],
128128
"COLUMN_NAME": issue_data["column_name"],
129129
"DETAIL_EXPRESSION": issue_data["detail"],
130-
"PROFILE_RUN_DATE": issue_data["profiling_starttime"],
130+
# Date-only string: Oracle/HANA templates use TO_DATE(..., 'YYYY-MM-DD'), which rejects a time
131+
# component, and the anomaly criteria boundary is date-based (CURRENT_DATE + INTERVAL '30 year').
132+
"PROFILE_RUN_DATE": parsed_run_date.strftime("%Y-%m-%d")
133+
if (parsed_run_date := parse_fuzzy_date(issue_data["profiling_starttime"]))
134+
else None,
131135
"LIMIT": limit,
132136
"LIMIT_2": int(limit / 2),
133137
"LIMIT_4": int(limit / 4),

testgen/mcp/tools/test_definitions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
)
3636
from testgen.mcp.tools.markdown import MdDoc
3737

38-
_DOC_GROUP = DocGroup.DISCOVER
38+
_DOC_GROUP = DocGroup.INVESTIGATE
3939

4040
_VALID_SCOPES = {"column", "table", "referential", "custom"}
4141

testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Column_Pattern_Mismatch.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,17 @@ profile_anomaly_types:
1515
AND STRPOS(p.top_patterns, 'N') > 0
1616
AND (
1717
( (STRPOS(p.top_patterns, 'A') > 0 OR STRPOS(p.top_patterns, 'a') > 0)
18-
AND SPLIT_PART(p.top_patterns, '|', 3)::NUMERIC / SPLIT_PART(p.top_patterns, '|', 1)::NUMERIC < 0.05)
18+
AND NULLIF(SPLIT_PART(p.top_patterns, '|', 3), '')::NUMERIC / NULLIF(SPLIT_PART(p.top_patterns, '|', 1), '')::NUMERIC < 0.05)
1919
OR
20-
SPLIT_PART(p.top_patterns, '|', 3)::NUMERIC / SPLIT_PART(p.top_patterns, '|', 1)::NUMERIC < 0.1
20+
NULLIF(SPLIT_PART(p.top_patterns, '|', 3), '')::NUMERIC / NULLIF(SPLIT_PART(p.top_patterns, '|', 1), '')::NUMERIC < 0.1
2121
)
2222
detail_expression: |-
2323
'Patterns: ' || p.top_patterns
2424
issue_likelihood: Likely
2525
suggested_action: |-
2626
Review the values for any data that doesn't conform to the most common pattern and correct any data errors.
2727
dq_score_prevalence_formula: |-
28-
(p.record_ct - SPLIT_PART(p.top_patterns, '|', 1)::BIGINT)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT
28+
(p.record_ct - NULLIF(SPLIT_PART(p.top_patterns, '|', 1), '')::BIGINT)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT
2929
dq_score_risk_factor: '0.66'
3030
dq_dimension: Validity
3131
impact_dimension: Usability

testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Invalid_Zip3_USA.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@ profile_anomaly_types:
99
p.distinct_pattern_ct > 1
1010
AND (p.column_name ilike '%zip%' OR p.column_name ILIKE '%postal%')
1111
AND SPLIT_PART(p.top_patterns, ' | ', 2) = 'NNN'
12-
AND SPLIT_PART(p.top_patterns, ' | ', 1)::FLOAT/NULLIF(value_ct, 0)::FLOAT > 0.50
12+
AND NULLIF(SPLIT_PART(p.top_patterns, ' | ', 1), '')::FLOAT/NULLIF(value_ct, 0)::FLOAT > 0.50
1313
detail_expression: |-
1414
'Pattern: ' || p.top_patterns
1515
issue_likelihood: Definite
1616
suggested_action: |-
1717
Review your source data, ingestion process, and any processing steps that update this column.
1818
dq_score_prevalence_formula: |-
19-
(NULLIF(p.record_ct, 0)::INT - SPLIT_PART(p.top_patterns, ' | ', 1)::BIGINT)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT
19+
(NULLIF(p.record_ct, 0)::INT - NULLIF(SPLIT_PART(p.top_patterns, ' | ', 1), '')::BIGINT)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT
2020
dq_score_risk_factor: '1'
2121
dq_dimension: Validity
2222
impact_dimension: Conformance

testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Table_Pattern_Mismatch.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ profile_anomaly_types:
1313
AND m.max_pattern_ct = 1
1414
AND m.column_ct > 1
1515
AND SPLIT_PART(p.top_patterns, '|', 2) <> SPLIT_PART(m.very_top_pattern, '|', 2)
16-
AND SPLIT_PART(p.top_patterns, '|', 1)::NUMERIC / SPLIT_PART(m.very_top_pattern, '|', 1)::NUMERIC < 0.1
16+
AND NULLIF(SPLIT_PART(p.top_patterns, '|', 1), '')::NUMERIC / NULLIF(SPLIT_PART(m.very_top_pattern, '|', 1), '')::NUMERIC < 0.1
1717
detail_expression: |-
1818
'Patterns: ' || SPLIT_PART(p.top_patterns, '|', 2) || ', ' || SPLIT_PART(ltrim(m.very_top_pattern, '0'), '|', 2)
1919
issue_likelihood: Likely

testgen/template/flavors/bigquery/gen_query_tests/gen_Freshness_Trend.sql

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,11 @@ tran_date_cols AS (
7575
) AS rank
7676
FROM latest_results
7777
WHERE general_type IN ('A', 'D', 'N')
78-
AND functional_data_type ILIKE 'transactional date%'
79-
OR functional_data_type ILIKE 'period%'
80-
OR functional_data_type = 'timestamp'
78+
AND (
79+
functional_data_type ILIKE 'transactional date%'
80+
OR functional_data_type ILIKE 'period%'
81+
OR functional_data_type = 'timestamp'
82+
)
8183
),
8284
-- Numeric Measures
8385
numeric_cols AS (

testgen/template/flavors/databricks/gen_query_tests/gen_Freshness_Trend.sql

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,11 @@ tran_date_cols AS (
7575
) AS rank
7676
FROM latest_results
7777
WHERE general_type IN ('A', 'D', 'N')
78-
AND functional_data_type ILIKE 'transactional date%'
79-
OR functional_data_type ILIKE 'period%'
80-
OR functional_data_type = 'timestamp'
78+
AND (
79+
functional_data_type ILIKE 'transactional date%'
80+
OR functional_data_type ILIKE 'period%'
81+
OR functional_data_type = 'timestamp'
82+
)
8183
),
8284
-- Numeric Measures
8385
numeric_cols AS (

testgen/template/flavors/mssql/gen_query_tests/gen_Freshness_Trend.sql

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,11 @@ tran_date_cols AS (
7575
) AS rank
7676
FROM latest_results
7777
WHERE general_type IN ('A', 'D', 'N')
78-
AND functional_data_type ILIKE 'transactional date%'
79-
OR functional_data_type ILIKE 'period%'
80-
OR functional_data_type = 'timestamp'
78+
AND (
79+
functional_data_type ILIKE 'transactional date%'
80+
OR functional_data_type ILIKE 'period%'
81+
OR functional_data_type = 'timestamp'
82+
)
8183
),
8284
-- Numeric Measures
8385
numeric_cols AS (

0 commit comments

Comments
 (0)