22from collections .abc import Iterable
33from dataclasses import dataclass
44from datetime import datetime
5+ from enum import StrEnum
56from typing import Self
67from uuid import UUID , uuid4
78
2122PII_RISK_RE = re .compile (r"Risk: (MODERATE|HIGH)," )
2223
2324
25+ class Disposition (StrEnum ):
26+ """Stored disposition values for ``profile_anomaly_results.disposition`` and
27+ ``test_results.disposition``. The user-facing label for ``INACTIVE`` is "Muted"."""
28+ CONFIRMED = "Confirmed"
29+ DISMISSED = "Dismissed"
30+ INACTIVE = "Inactive"
31+
32+
33+ class IssueLikelihood (StrEnum ):
34+ """Stored ``profile_anomaly_types.issue_likelihood`` values."""
35+ DEFINITE = "Definite"
36+ LIKELY = "Likely"
37+ POSSIBLE = "Possible"
38+ POTENTIAL_PII = "Potential PII"
39+
40+
41+ class PiiRisk (StrEnum ):
42+ """Risk level extracted from PII issue ``detail`` strings via ``priority`` hybrid."""
43+ HIGH = "High"
44+ MODERATE = "Moderate"
45+
46+
2447@dataclass
2548class IssueLikelihoodCounts :
2649 """Counts of hygiene issues by likelihood category, with dismissed/inactive separated."""
@@ -51,6 +74,7 @@ class HygieneIssueListRow:
5174 schema_name : str
5275 table_name : str
5376 column_name : str
77+ impact_dimension : str | None
5478 dq_dimension : str | None
5579 disposition : str
5680 priority : str | None
@@ -72,6 +96,7 @@ class HygieneIssueSearchRow:
7296 schema_name : str
7397 table_name : str
7498 column_name : str
99+ impact_dimension : str | None
75100 dq_dimension : str | None
76101 disposition : str
77102 priority : str | None
@@ -92,7 +117,6 @@ class HygieneIssueDetail:
92117 schema_name : str
93118 table_name : str
94119 column_name : str
95- db_data_type : str | None
96120 dq_dimension : str | None
97121 impact_dimension : str | None
98122 disposition : str
@@ -150,13 +174,12 @@ class HygieneIssue(Entity):
150174 schema_name : str = Column (String , nullable = False )
151175 table_name : str = Column (String , nullable = False )
152176 column_name : str = Column (String , nullable = False )
153- db_data_type : str = Column (String )
154177
155178 detail : str = Column (String , nullable = False )
156179 disposition : str = Column (String )
157180 impact_dimension : str = Column (String )
158181
159- # Unmapped: column_type, dq_prevalence.
182+ # Unmapped: column_type, db_data_type, dq_prevalence.
160183
161184 @hybrid_property
162185 def priority (self ):
@@ -237,12 +260,12 @@ def _priority_order(cls):
237260 @classmethod
238261 def list_for_run (
239262 cls ,
240- profile_run_id : UUID ,
263+ job_execution_id : UUID ,
241264 * clauses ,
242265 page : int = 1 ,
243266 limit : int = 50 ,
244267 ) -> tuple [list [HygieneIssueListRow ], int ]:
245- """Paginated hygiene issues for a single profiling run.
268+ """Paginated hygiene issues for a single profiling run, scoped by its job_execution_id .
246269
247270 Caller-supplied ``*clauses`` carry every WHERE filter (project scoping, disposition,
248271 likelihood / pii_risk, table / column / dq_dimension / issue_type filters).
@@ -255,14 +278,16 @@ def list_for_run(
255278 cls .schema_name .label ("schema_name" ),
256279 cls .table_name .label ("table_name" ),
257280 cls .column_name .label ("column_name" ),
281+ cls .impact_dimension .label ("impact_dimension" ),
258282 HygieneIssueType .dq_dimension .label ("dq_dimension" ),
259- func .coalesce (cls .disposition , "Confirmed" ).label ("disposition" ),
283+ func .coalesce (cls .disposition , Disposition . CONFIRMED ).label ("disposition" ),
260284 cls .priority .label ("priority" ),
261285 cls .detail .label ("detail" ),
262286 HygieneIssueType .detail_redactable .label ("detail_redactable" ),
263287 ProfileResult .pii_flag .label ("pii_flag" ),
264288 )
265289 .join (HygieneIssueType , HygieneIssueType .id == cls .type_id )
290+ .join (ProfilingRun , ProfilingRun .id == cls .profile_run_id )
266291 .outerjoin (
267292 ProfileResult ,
268293 and_ (
@@ -272,7 +297,7 @@ def list_for_run(
272297 ProfileResult .column_name == cls .column_name ,
273298 ),
274299 )
275- .where (cls . profile_run_id == profile_run_id , * clauses )
300+ .where (ProfilingRun . job_execution_id == job_execution_id , * clauses )
276301 .order_by (cls ._priority_order (), cls .table_name , cls .column_name , cls .id )
277302 )
278303 return cls ._paginate (query , page = page , limit = limit , data_class = HygieneIssueListRow )
@@ -301,8 +326,9 @@ def search(
301326 cls .schema_name .label ("schema_name" ),
302327 cls .table_name .label ("table_name" ),
303328 cls .column_name .label ("column_name" ),
329+ cls .impact_dimension .label ("impact_dimension" ),
304330 HygieneIssueType .dq_dimension .label ("dq_dimension" ),
305- func .coalesce (cls .disposition , "Confirmed" ).label ("disposition" ),
331+ func .coalesce (cls .disposition , Disposition . CONFIRMED ).label ("disposition" ),
306332 cls .priority .label ("priority" ),
307333 cls .detail .label ("detail" ),
308334 HygieneIssueType .detail_redactable .label ("detail_redactable" ),
@@ -347,10 +373,9 @@ def get_with_context(cls, issue_id: UUID, *clauses) -> HygieneIssueDetail | None
347373 cls .schema_name .label ("schema_name" ),
348374 cls .table_name .label ("table_name" ),
349375 cls .column_name .label ("column_name" ),
350- cls .db_data_type .label ("db_data_type" ),
351376 HygieneIssueType .dq_dimension .label ("dq_dimension" ),
352377 cls .impact_dimension .label ("impact_dimension" ),
353- func .coalesce (cls .disposition , "Confirmed" ).label ("disposition" ),
378+ func .coalesce (cls .disposition , Disposition . CONFIRMED ).label ("disposition" ),
354379 cls .priority .label ("priority" ),
355380 cls .detail .label ("detail" ),
356381 HygieneIssueType .detail_redactable .label ("detail_redactable" ),
0 commit comments