Skip to content

Commit c5c7f66

Browse files
committed
Fix empty Seq compatibility across Scala 2.12 and 2.13
The previous fix passed Stream$Empty$ to deequ constructors/methods via to_scala_seq(jvm, ArrayList()), which Py4J's reflection-based overload resolution rejects in Scala 2.12 (Spark 3.x). Add empty_scala_seq() helper that uses JavaConverters.toList() instead of toSeq(). This produces immutable.Nil (an empty List), which deequ accepts as Seq[_] in both Scala 2.12 and 2.13, and is correctly matched by Py4J constructor/method lookup in both versions. Affirm-Skill: acli-jira Affirm-Skill: att-deploy-check Affirm-Skill: att-test Affirm-Skill: buildkite-debug Affirm-Skill: capture-context Affirm-Skill: cmt-writer Affirm-Skill: commit-and-push Affirm-Skill: create-adr Affirm-Skill: create-cmt-ticket Affirm-Skill: create-tech-spec Affirm-Skill: export-conversation-record Affirm-Skill: gather-best-practices Affirm-Skill: mcp-debug Affirm-Skill: multi-thor-provisioner Affirm-Skill: resolve-pr-comments Affirm-Skill: save-output Affirm-Skill: security-scanner-for-skill-md Affirm-Skill: skill-evaluator Affirm-Skill: thor-control Affirm-Skill: tickets-from-plan Affirm-Skill: write-implementation-plan
1 parent 1b23780 commit c5c7f66

3 files changed

Lines changed: 16 additions & 4 deletions

File tree

pydeequ/analyzers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pydeequ.pandas_utils import ensure_pyspark_df
1010
from pydeequ.repository import MetricsRepository, ResultKey
1111
from enum import Enum
12-
from pydeequ.scala_utils import to_scala_seq
12+
from pydeequ.scala_utils import empty_scala_seq, to_scala_seq
1313
from pydeequ.configs import SPARK_VERSION
1414

1515
class _AnalyzerObject:
@@ -311,7 +311,7 @@ def _analyzer_jvm(self):
311311
self.instance,
312312
self.predicate,
313313
self._jvm.scala.Option.apply(self.where),
314-
to_scala_seq(self._jvm, self._jvm.java.util.ArrayList()),
314+
empty_scala_seq(self._jvm),
315315
self._jvm.scala.Option.apply(None)
316316
)
317317

pydeequ/checks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from pyspark.sql import SparkSession
66

77
from pydeequ.check_functions import is_one
8-
from pydeequ.scala_utils import ScalaFunction1, to_scala_seq
8+
from pydeequ.scala_utils import ScalaFunction1, empty_scala_seq, to_scala_seq
99
from pydeequ.configs import SPARK_VERSION
1010

1111
# TODO implement custom assertions
@@ -563,7 +563,7 @@ def satisfies(self, columnCondition, constraintName, assertion=None, hint=None):
563563
constraintName,
564564
assertion_func,
565565
hint,
566-
to_scala_seq(self._jvm, self._jvm.java.util.ArrayList()),
566+
empty_scala_seq(self._jvm),
567567
self._jvm.scala.Option.apply(None)
568568
)
569569
return self

pydeequ/scala_utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,18 @@ def to_scala_seq(jvm, iterable):
8080
return jvm.scala.collection.JavaConverters.iterableAsScalaIterableConverter(iterable).asScala().toSeq()
8181

8282

83+
def empty_scala_seq(jvm):
84+
"""
85+
Returns an empty Scala immutable List (Nil), usable as Seq[_].
86+
Uses JavaConverters.toList() to produce an immutable.List rather than
87+
a Stream, which is required for Py4J constructor/method lookup to succeed
88+
across both Scala 2.12 (Spark 3.x) and Scala 2.13 (Spark 4+).
89+
"""
90+
return jvm.scala.collection.JavaConverters.iterableAsScalaIterableConverter(
91+
jvm.java.util.ArrayList()
92+
).asScala().toList()
93+
94+
8395
def to_scala_map(spark_session, d):
8496
"""
8597
Convert a dict into a JVM Map.

0 commit comments

Comments
 (0)