Skip to content

Commit 7d5c997

Browse files
committed
update proto based on code review
1 parent 4c3145f commit 7d5c997

4 files changed

Lines changed: 30 additions & 18 deletions

File tree

pydeequ/v2/analyzers.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,16 +250,26 @@ def __repr__(self) -> str:
250250

251251
@dataclass
252252
class Compliance(_ConnectAnalyzer):
253-
"""Fraction of rows satisfying a SQL predicate, named by `instance`."""
253+
"""
254+
Fraction of rows satisfying a SQL predicate, named by `instance`.
255+
256+
`columns` should list every column name referenced in `predicate`. Deequ
257+
uses it for an upfront precondition check so missing-column mistakes
258+
surface as a clear DeequAnalysisException rather than a confusing
259+
Spark SQL error during execution. Empty list disables the check.
260+
"""
254261

255262
instance: str
256263
predicate: str
257264
where: Optional[str] = None
265+
columns: Sequence[str] = ()
258266

259267
def to_proto(self) -> proto.Analyzer:
260268
msg = proto.Analyzer()
261269
msg.compliance.instance = self.instance
262270
msg.compliance.predicate = self.predicate
271+
if self.columns:
272+
msg.compliance.columns.extend(self.columns)
263273
_set_where(msg, self.where)
264274
return msg
265275

pydeequ/v2/proto/analysis_pb2.py

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pydeequ/v2/proto/analysis_pb2.pyi

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,14 @@ class HistogramSpec(_message.Message):
5959
def __init__(self, column: _Optional[str] = ..., max_detail_bins: _Optional[int] = ...) -> None: ...
6060

6161
class ComplianceAnalyzerSpec(_message.Message):
62-
__slots__ = ("instance", "predicate")
62+
__slots__ = ("instance", "predicate", "columns")
6363
INSTANCE_FIELD_NUMBER: _ClassVar[int]
6464
PREDICATE_FIELD_NUMBER: _ClassVar[int]
65+
COLUMNS_FIELD_NUMBER: _ClassVar[int]
6566
instance: str
6667
predicate: str
67-
def __init__(self, instance: _Optional[str] = ..., predicate: _Optional[str] = ...) -> None: ...
68+
columns: _containers.RepeatedScalarFieldContainer[str]
69+
def __init__(self, instance: _Optional[str] = ..., predicate: _Optional[str] = ..., columns: _Optional[_Iterable[str]] = ...) -> None: ...
6870

6971
class PatternMatchSpec(_message.Message):
7072
__slots__ = ("column", "pattern")

pydeequ/v2/proto/common_pb2.py

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)