You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: testgen/template/dbsetup_anomaly_types/profile_anomaly_types_Standardized_Value_Matches.yaml
+10-10Lines changed: 10 additions & 10 deletions
Original file line number
Diff line number
Diff line change
@@ -31,12 +31,12 @@ profile_anomaly_types:
31
31
GROUP BY possible_standard_value
32
32
HAVING COUNT(DISTINCT `{COLUMN_NAME}`) > 1
33
33
)
34
-
SELECT DISTINCT a.`{COLUMN_NAME}`, COUNT(*) AS count
34
+
SELECT a.`{COLUMN_NAME}`, COUNT(*) AS count
35
35
FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` a
36
36
JOIN cte b
37
37
ON UPPER(REGEXP_REPLACE(CAST(a.`{COLUMN_NAME}` AS STRING), r"[ '\.\-\,]", '')) = b.possible_standard_value
38
38
GROUP BY a.`{COLUMN_NAME}`
39
-
ORDER BY possible_standard_value ASC, count DESC
39
+
ORDER BY UPPER(REGEXP_REPLACE(CAST(a.`{COLUMN_NAME}` AS STRING), r"[ '\.\-\,]", '')) ASC, count DESC
40
40
LIMIT {LIMIT};
41
41
error_type: Profile Anomaly
42
42
- id: '1289'
@@ -45,61 +45,61 @@ profile_anomaly_types:
45
45
sql_flavor: databricks
46
46
lookup_type: null
47
47
lookup_query: |-
48
-
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE(`{COLUMN_NAME}`, ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT `{COLUMN_NAME}`) FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` GROUP BY UPPER(TRANSLATE(`{COLUMN_NAME}`, ' '',.-', '')) HAVING COUNT(DISTINCT `{COLUMN_NAME}`) > 1 ) SELECT DISTINCT a.`{COLUMN_NAME}`, COUNT(*) AS count FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` a, cte b WHERE UPPER(TRANSLATE(a.`{COLUMN_NAME}`, ' '',.-', '')) = b.possible_standard_value GROUP BY a.`{COLUMN_NAME}` ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
48
+
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE(`{COLUMN_NAME}`, ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT `{COLUMN_NAME}`) FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` GROUP BY UPPER(TRANSLATE(`{COLUMN_NAME}`, ' '',.-', '')) HAVING COUNT(DISTINCT `{COLUMN_NAME}`) > 1 ) SELECT a.`{COLUMN_NAME}`, COUNT(*) AS count FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}` a, cte b WHERE UPPER(TRANSLATE(a.`{COLUMN_NAME}`, ' '',.-', '')) = b.possible_standard_value GROUP BY a.`{COLUMN_NAME}` ORDER BY UPPER(TRANSLATE(a.`{COLUMN_NAME}`, ' '',.-', '')) ASC, count DESC LIMIT {LIMIT};
49
49
error_type: Profile Anomaly
50
50
- id: '1131'
51
51
test_id: '1017'
52
52
test_type: Standardized_Value_Matches
53
53
sql_flavor: mssql
54
54
lookup_type: null
55
55
lookup_query: |-
56
-
WITH CTE AS ( SELECT DISTINCT TOP {LIMIT} UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") as distinct_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC;
56
+
WITH CTE AS ( SELECT DISTINCT TOP {LIMIT} UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") as distinct_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(REPLACE(TRANSLATE("{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(REPLACE(TRANSLATE(a."{COLUMN_NAME}",' '''',.-',REPLICATE(' ', LEN(' '''',.-'))),' ','')) ASC, count DESC;
57
57
error_type: Profile Anomaly
58
58
- id: '1074'
59
59
test_id: '1017'
60
60
test_type: Standardized_Value_Matches
61
61
sql_flavor: postgresql
62
62
lookup_type: null
63
63
lookup_query: |-
64
-
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
64
+
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) ASC, count DESC LIMIT {LIMIT};
65
65
error_type: Profile Anomaly
66
66
- id: '1049'
67
67
test_id: '1017'
68
68
test_type: Standardized_Value_Matches
69
69
sql_flavor: redshift
70
70
lookup_type: null
71
71
lookup_query: |-
72
-
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
72
+
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) ASC, count DESC LIMIT {LIMIT};
73
73
error_type: Profile Anomaly
74
74
- id: '1449'
75
75
test_id: '1017'
76
76
test_type: Standardized_Value_Matches
77
77
sql_flavor: redshift_spectrum
78
78
lookup_type: null
79
79
lookup_query: |-
80
-
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
80
+
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) ASC, count DESC LIMIT {LIMIT};
81
81
error_type: Profile Anomaly
82
82
- id: '1188'
83
83
test_id: '1017'
84
84
test_type: Standardized_Value_Matches
85
85
sql_flavor: snowflake
86
86
lookup_type: null
87
87
lookup_query: |-
88
-
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT};
88
+
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", ' '',.-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(TRANSLATE(a."{COLUMN_NAME}", ' '',.-', '')) ASC, count DESC LIMIT {LIMIT};
89
89
error_type: Profile Anomaly
90
90
- id: '1516'
91
91
test_id: '1017'
92
92
test_type: Standardized_Value_Matches
93
93
sql_flavor: oracle
94
94
lookup_type: null
95
95
lookup_query: |-
96
-
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", 'X '',.-', 'X')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") AS cnt FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", 'X '',.-', 'X')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", 'X '',.-', 'X')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC FETCH FIRST {LIMIT} ROWS ONLY
96
+
WITH CTE AS ( SELECT DISTINCT UPPER(TRANSLATE("{COLUMN_NAME}", 'X '',.-', 'X')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") AS cnt FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(TRANSLATE("{COLUMN_NAME}", 'X '',.-', 'X')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(TRANSLATE(a."{COLUMN_NAME}", 'X '',.-', 'X')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(TRANSLATE(a."{COLUMN_NAME}", 'X '',.-', 'X')) ASC, count DESC FETCH FIRST {LIMIT} ROWS ONLY
97
97
error_type: Profile Anomaly
98
98
- id: '1516'
99
99
test_id: '1017'
100
100
test_type: Standardized_Value_Matches
101
101
sql_flavor: sap_hana
102
102
lookup_type: null
103
103
lookup_query: |-
104
-
WITH CTE AS ( SELECT DISTINCT UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") AS cnt FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT DISTINCT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(a."{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY possible_standard_value ASC, count DESC LIMIT {LIMIT}
104
+
WITH CTE AS ( SELECT DISTINCT UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) as possible_standard_value, COUNT(DISTINCT "{COLUMN_NAME}") AS cnt FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" GROUP BY UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE("{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) HAVING COUNT(DISTINCT "{COLUMN_NAME}") > 1 ) SELECT a."{COLUMN_NAME}", COUNT(*) AS count FROM "{TARGET_SCHEMA}"."{TABLE_NAME}" a, cte b WHERE UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(a."{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) = b.possible_standard_value GROUP BY a."{COLUMN_NAME}" ORDER BY UPPER(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(a."{COLUMN_NAME}", ' ', ''), '''', ''), ',', ''), '.', ''), '-', '')) ASC, count DESC LIMIT {LIMIT}
0 commit comments