Fix empty Seq compatibility across Scala 2.12 and 2.13

m-aciek · m-aciek · commit c5c7f664e41d · 2026-04-08T15:21:18.000+02:00
The previous fix passed Stream$Empty$ to deequ constructors/methods via
to_scala_seq(jvm, ArrayList()), which Py4J's reflection-based overload
resolution rejects in Scala 2.12 (Spark 3.x).

Add empty_scala_seq() helper that uses JavaConverters.toList() instead
of toSeq(). This produces immutable.Nil (an empty List), which deequ
accepts as Seq[_] in both Scala 2.12 and 2.13, and is correctly matched
by Py4J constructor/method lookup in both versions.

Affirm-Skill: acli-jira
Affirm-Skill: att-deploy-check
Affirm-Skill: att-test
Affirm-Skill: buildkite-debug
Affirm-Skill: capture-context
Affirm-Skill: cmt-writer
Affirm-Skill: commit-and-push
Affirm-Skill: create-adr
Affirm-Skill: create-cmt-ticket
Affirm-Skill: create-tech-spec
Affirm-Skill: export-conversation-record
Affirm-Skill: gather-best-practices
Affirm-Skill: mcp-debug
Affirm-Skill: multi-thor-provisioner
Affirm-Skill: resolve-pr-comments
Affirm-Skill: save-output
Affirm-Skill: security-scanner-for-skill-md
Affirm-Skill: skill-evaluator
Affirm-Skill: thor-control
Affirm-Skill: tickets-from-plan
Affirm-Skill: write-implementation-plan
diff --git a/pydeequ/analyzers.py b/pydeequ/analyzers.py
@@ -9,7 +9,7 @@
 from pydeequ.pandas_utils import ensure_pyspark_df
 from pydeequ.repository import MetricsRepository, ResultKey
 from enum import Enum
-from pydeequ.scala_utils import to_scala_seq
+from pydeequ.scala_utils import empty_scala_seq, to_scala_seq
 from pydeequ.configs import SPARK_VERSION
 
 class _AnalyzerObject:
@@ -311,7 +311,7 @@ def _analyzer_jvm(self):
             self.instance,
             self.predicate,
             self._jvm.scala.Option.apply(self.where),
-            to_scala_seq(self._jvm, self._jvm.java.util.ArrayList()),
+            empty_scala_seq(self._jvm),
             self._jvm.scala.Option.apply(None)
         )
 
diff --git a/pydeequ/checks.py b/pydeequ/checks.py
@@ -5,7 +5,7 @@
 from pyspark.sql import SparkSession
 
 from pydeequ.check_functions import is_one
-from pydeequ.scala_utils import ScalaFunction1, to_scala_seq
+from pydeequ.scala_utils import ScalaFunction1, empty_scala_seq, to_scala_seq
 from pydeequ.configs import SPARK_VERSION
 
 # TODO implement custom assertions
@@ -563,7 +563,7 @@ def satisfies(self, columnCondition, constraintName, assertion=None, hint=None):
             constraintName,
             assertion_func,
             hint,
-            to_scala_seq(self._jvm, self._jvm.java.util.ArrayList()),
+            empty_scala_seq(self._jvm),
             self._jvm.scala.Option.apply(None)
         )
         return self
diff --git a/pydeequ/scala_utils.py b/pydeequ/scala_utils.py
@@ -80,6 +80,18 @@ def to_scala_seq(jvm, iterable):
     return jvm.scala.collection.JavaConverters.iterableAsScalaIterableConverter(iterable).asScala().toSeq()
 
 
+def empty_scala_seq(jvm):
+    """
+    Returns an empty Scala immutable List (Nil), usable as Seq[_].
+    Uses JavaConverters.toList() to produce an immutable.List rather than
+    a Stream, which is required for Py4J constructor/method lookup to succeed
+    across both Scala 2.12 (Spark 3.x) and Scala 2.13 (Spark 4+).
+    """
+    return jvm.scala.collection.JavaConverters.iterableAsScalaIterableConverter(
+        jvm.java.util.ArrayList()
+    ).asScala().toList()
+
+
 def to_scala_map(spark_session, d):
     """
     Convert a dict into a JVM Map.