Skip to content

Commit 8073cf3

Browse files
committed
test: stop using per-test RootAllocator in RegExpLikeUDFSuite
CI fails the unit suite with 'Memory was leaked by query (98304/147456)' when RegExpLikeUDFSuite runs after CometRegExpJvmSuite in the same surefire JVM (Spark 3.5 Scala 2.13 / Spark 4.x). The per-test RootAllocator only ever held the input vectors anyway: the UDF allocates its output BitVector from CometArrowAllocator, which is where leak detection actually needs to happen. Allocate inputs from CometArrowAllocator alongside the output, and verify each test cleans up by snapshotting the allocator's outstanding bytes before/after the body.
1 parent 42462c3 commit 8073cf3

1 file changed

Lines changed: 33 additions & 28 deletions

File tree

spark/src/test/scala/org/apache/comet/udf/RegExpLikeUDFSuite.scala

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,20 @@ import java.nio.charset.StandardCharsets
2323

2424
import org.scalatest.funsuite.AnyFunSuite
2525

26-
import org.apache.arrow.memory.RootAllocator
26+
import org.apache.arrow.memory.BufferAllocator
2727
import org.apache.arrow.vector.{BitVector, ValueVector, VarCharVector}
2828

29+
import org.apache.comet.CometArrowAllocator
30+
2931
class RegExpLikeUDFSuite extends AnyFunSuite {
3032

31-
private def varchar(allocator: RootAllocator, values: Seq[String]): VarCharVector = {
33+
// Use the same allocator that RegExpLikeUDF allocates its output from. A
34+
// per-test RootAllocator interacts poorly with shared Arrow accounting state
35+
// when other suites have already exercised CometArrowAllocator in the same
36+
// surefire JVM (observed under Spark 3.5 Scala 2.13 / Spark 4.x in CI).
37+
private def allocator: BufferAllocator = CometArrowAllocator
38+
39+
private def varchar(values: Seq[String]): VarCharVector = {
3240
val v = new VarCharVector("subject", allocator)
3341
v.allocateNew()
3442
values.zipWithIndex.foreach { case (s, i) =>
@@ -39,19 +47,27 @@ class RegExpLikeUDFSuite extends AnyFunSuite {
3947
v
4048
}
4149

42-
private def scalarPattern(allocator: RootAllocator, pattern: String): VarCharVector = {
50+
private def scalarPattern(pattern: String): VarCharVector = {
4351
val v = new VarCharVector("pattern", allocator)
4452
v.allocateNew()
4553
v.setSafe(0, pattern.getBytes(StandardCharsets.UTF_8))
4654
v.setValueCount(1)
4755
v
4856
}
4957

58+
/** Verify that everything allocated within `body` is released by the time it returns. */
59+
private def assertNoLeak(body: => Unit): Unit = {
60+
val before = allocator.getAllocatedMemory
61+
body
62+
assert(
63+
allocator.getAllocatedMemory === before,
64+
s"test leaked Arrow memory: ${allocator.getAllocatedMemory - before} bytes")
65+
}
66+
5067
test("matches Java regex semantics including null handling") {
51-
val allocator = new RootAllocator(Long.MaxValue)
52-
try {
53-
val subject = varchar(allocator, Seq("abc123", "no-digits", null, "X"))
54-
val pattern = scalarPattern(allocator, "\\d+")
68+
assertNoLeak {
69+
val subject = varchar(Seq("abc123", "no-digits", null, "X"))
70+
val pattern = scalarPattern("\\d+")
5571

5672
val udf = new RegExpLikeUDF
5773
val out = udf.evaluate(Array[ValueVector](subject, pattern)).asInstanceOf[BitVector]
@@ -64,18 +80,15 @@ class RegExpLikeUDFSuite extends AnyFunSuite {
6480
out.close()
6581
subject.close()
6682
pattern.close()
67-
} finally {
68-
allocator.close()
6983
}
7084
}
7185

7286
test("compiled Pattern is cached across evaluate calls") {
73-
val allocator = new RootAllocator(Long.MaxValue)
74-
try {
87+
assertNoLeak {
7588
val udf = new RegExpLikeUDF
76-
val pattern = scalarPattern(allocator, "[a-z]+")
77-
val s1 = varchar(allocator, Seq("hello"))
78-
val s2 = varchar(allocator, Seq("WORLD"))
89+
val pattern = scalarPattern("[a-z]+")
90+
val s1 = varchar(Seq("hello"))
91+
val s2 = varchar(Seq("WORLD"))
7992

8093
val r1 = udf.evaluate(Array[ValueVector](s1, pattern)).asInstanceOf[BitVector]
8194
val r2 = udf.evaluate(Array[ValueVector](s2, pattern)).asInstanceOf[BitVector]
@@ -84,33 +97,27 @@ class RegExpLikeUDFSuite extends AnyFunSuite {
8497
assert(r2.get(0) === 0)
8598
r1.close(); r2.close()
8699
s1.close(); s2.close(); pattern.close()
87-
} finally {
88-
allocator.close()
89100
}
90101
}
91102

92103
test("empty subject vector yields empty result") {
93-
val allocator = new RootAllocator(Long.MaxValue)
94-
try {
95-
val subject = varchar(allocator, Seq.empty)
96-
val pattern = scalarPattern(allocator, "\\d+")
104+
assertNoLeak {
105+
val subject = varchar(Seq.empty)
106+
val pattern = scalarPattern("\\d+")
97107

98108
val out = new RegExpLikeUDF()
99109
.evaluate(Array[ValueVector](subject, pattern))
100110
.asInstanceOf[BitVector]
101111

102112
assert(out.getValueCount === 0)
103113
out.close(); subject.close(); pattern.close()
104-
} finally {
105-
allocator.close()
106114
}
107115
}
108116

109117
test("all-null subject column produces all-null bitmap") {
110-
val allocator = new RootAllocator(Long.MaxValue)
111-
try {
112-
val subject = varchar(allocator, Seq(null, null, null))
113-
val pattern = scalarPattern(allocator, ".*")
118+
assertNoLeak {
119+
val subject = varchar(Seq(null, null, null))
120+
val pattern = scalarPattern(".*")
114121

115122
val out = new RegExpLikeUDF()
116123
.evaluate(Array[ValueVector](subject, pattern))
@@ -119,8 +126,6 @@ class RegExpLikeUDFSuite extends AnyFunSuite {
119126
assert(out.getValueCount === 3)
120127
assert(out.isNull(0) && out.isNull(1) && out.isNull(2))
121128
out.close(); subject.close(); pattern.close()
122-
} finally {
123-
allocator.close()
124129
}
125130
}
126131
}

0 commit comments

Comments
 (0)