Skip to content

Commit 14db64b

Browse files
committed
Address review feedback on V2 PR
- Fix NameError in tests/conftest.py setup_pyspark shim - Drop duplicate PyDeequSession entry in __getattr__ tuple - Widen pyspark pin to >=3.5.0,<4.0.0; remove empty extras table - Bump black target_version to py39 to match python requirement - Raise ValueError on Check.where() before any constraint - Replace fixed sleep with port-wait for Spark Connect server in CI
1 parent 1148373 commit 14db64b

5 files changed

Lines changed: 9 additions & 10 deletions

File tree

.github/workflows/base.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ jobs:
5656
--packages org.apache.spark:spark-connect_2.12:3.5.0 \
5757
--jars $PWD/deequ_2.12-2.1.0b-spark-3.5.jar \
5858
--conf spark.connect.extensions.relation.classes=com.amazon.deequ.connect.DeequRelationPlugin
59-
# Wait for server to start
60-
sleep 20
59+
# Wait for the gRPC port to accept connections (fail fast if startup broke)
60+
timeout 60 bash -c 'until (echo > /dev/tcp/localhost/15002) >/dev/null 2>&1; do sleep 1; done'
6161
# Verify server is running
6262
ps aux | grep SparkConnectServer | grep -v grep
6363

pydeequ/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def __getattr__(name):
4545
return _f2j_maven_coord
4646

4747
if name in ("AnalysisRunner", "Check", "CheckLevel", "ColumnProfilerRunner",
48-
"PyDeequSession", "DEEQU_MAVEN_COORD"):
48+
"DEEQU_MAVEN_COORD"):
4949
# Import legacy modules on demand
5050
if name == "AnalysisRunner":
5151
from pydeequ.analyzers import AnalysisRunner

pydeequ/v2/checks.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -901,8 +901,9 @@ def where(self, filter_condition: str) -> "Check":
901901
Example:
902902
check.isComplete("email").where("status = 'active'")
903903
"""
904-
if self._constraints:
905-
self._constraints[-1].where = filter_condition
904+
if not self._constraints:
905+
raise ValueError("where() called before any constraint was added")
906+
self._constraints[-1].where = filter_condition
906907
return self
907908

908909
# ========================================================================

pyproject.toml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ numpy = ">=1.23.0"
3333
pandas = ">=1.5.0"
3434
protobuf = ">=4.21.0"
3535
setuptools = ">=69.0.0" # Required for Python 3.12+ (distutils removed)
36-
pyspark = {version = "3.5.0", extras = ["connect"]}
36+
pyspark = {version = ">=3.5.0,<4.0.0", extras = ["connect"]}
3737

3838
[tool.poetry.group.dev.dependencies]
3939
pytest = "^8.0.0"
@@ -43,8 +43,6 @@ black = "^24.0.0"
4343
pre-commit = "^3.6.0"
4444
pytest-rerunfailures = "^14.0"
4545

46-
[tool.poetry.extras]
47-
4846
[build-system]
4947
requires = ["poetry-core>=1.0.0"]
5048
build-backend = "poetry.core.masonry.api"
@@ -57,7 +55,7 @@ include_trailing_comma = true
5755
force_grid_wrap = 0
5856
use_parentheses = true
5957
ensure_newline_before_comments = true
60-
target_version = ['py38']
58+
target_version = ['py39']
6159
include = '\.pyi?$'
6260
exclude = '''
6361
/(

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,6 @@ def config(self, key, value):
7575
return self
7676

7777
def getOrCreate(self):
78-
return get_spark_connect_session()
78+
return create_spark_connect_session()
7979

8080
return SparkConnectBuilder()

0 commit comments

Comments
 (0)