@@ -23,12 +23,20 @@ import java.nio.charset.StandardCharsets
2323
2424import org .scalatest .funsuite .AnyFunSuite
2525
26- import org .apache .arrow .memory .RootAllocator
26+ import org .apache .arrow .memory .BufferAllocator
2727import org .apache .arrow .vector .{BitVector , ValueVector , VarCharVector }
2828
29+ import org .apache .comet .CometArrowAllocator
30+
2931class RegExpLikeUDFSuite extends AnyFunSuite {
3032
31- private def varchar (allocator : RootAllocator , values : Seq [String ]): VarCharVector = {
33+ // Use the same allocator that RegExpLikeUDF allocates its output from. A
34+ // per-test RootAllocator interacts poorly with shared Arrow accounting state
35+ // when other suites have already exercised CometArrowAllocator in the same
36+ // surefire JVM (observed under Spark 3.5 Scala 2.13 / Spark 4.x in CI).
37+ private def allocator : BufferAllocator = CometArrowAllocator
38+
39+ private def varchar (values : Seq [String ]): VarCharVector = {
3240 val v = new VarCharVector (" subject" , allocator)
3341 v.allocateNew()
3442 values.zipWithIndex.foreach { case (s, i) =>
@@ -39,19 +47,27 @@ class RegExpLikeUDFSuite extends AnyFunSuite {
3947 v
4048 }
4149
42- private def scalarPattern (allocator : RootAllocator , pattern : String ): VarCharVector = {
50+ private def scalarPattern (pattern : String ): VarCharVector = {
4351 val v = new VarCharVector (" pattern" , allocator)
4452 v.allocateNew()
4553 v.setSafe(0 , pattern.getBytes(StandardCharsets .UTF_8 ))
4654 v.setValueCount(1 )
4755 v
4856 }
4957
58+ /** Verify that everything allocated within `body` is released by the time it returns. */
59+ private def assertNoLeak (body : => Unit ): Unit = {
60+ val before = allocator.getAllocatedMemory
61+ body
62+ assert(
63+ allocator.getAllocatedMemory === before,
64+ s " test leaked Arrow memory: ${allocator.getAllocatedMemory - before} bytes " )
65+ }
66+
5067 test(" matches Java regex semantics including null handling" ) {
51- val allocator = new RootAllocator (Long .MaxValue )
52- try {
53- val subject = varchar(allocator, Seq (" abc123" , " no-digits" , null , " X" ))
54- val pattern = scalarPattern(allocator, " \\ d+" )
68+ assertNoLeak {
69+ val subject = varchar(Seq (" abc123" , " no-digits" , null , " X" ))
70+ val pattern = scalarPattern(" \\ d+" )
5571
5672 val udf = new RegExpLikeUDF
5773 val out = udf.evaluate(Array [ValueVector ](subject, pattern)).asInstanceOf [BitVector ]
@@ -64,18 +80,15 @@ class RegExpLikeUDFSuite extends AnyFunSuite {
6480 out.close()
6581 subject.close()
6682 pattern.close()
67- } finally {
68- allocator.close()
6983 }
7084 }
7185
7286 test(" compiled Pattern is cached across evaluate calls" ) {
73- val allocator = new RootAllocator (Long .MaxValue )
74- try {
87+ assertNoLeak {
7588 val udf = new RegExpLikeUDF
76- val pattern = scalarPattern(allocator, " [a-z]+" )
77- val s1 = varchar(allocator, Seq (" hello" ))
78- val s2 = varchar(allocator, Seq (" WORLD" ))
89+ val pattern = scalarPattern(" [a-z]+" )
90+ val s1 = varchar(Seq (" hello" ))
91+ val s2 = varchar(Seq (" WORLD" ))
7992
8093 val r1 = udf.evaluate(Array [ValueVector ](s1, pattern)).asInstanceOf [BitVector ]
8194 val r2 = udf.evaluate(Array [ValueVector ](s2, pattern)).asInstanceOf [BitVector ]
@@ -84,33 +97,27 @@ class RegExpLikeUDFSuite extends AnyFunSuite {
8497 assert(r2.get(0 ) === 0 )
8598 r1.close(); r2.close()
8699 s1.close(); s2.close(); pattern.close()
87- } finally {
88- allocator.close()
89100 }
90101 }
91102
92103 test(" empty subject vector yields empty result" ) {
93- val allocator = new RootAllocator (Long .MaxValue )
94- try {
95- val subject = varchar(allocator, Seq .empty)
96- val pattern = scalarPattern(allocator, " \\ d+" )
104+ assertNoLeak {
105+ val subject = varchar(Seq .empty)
106+ val pattern = scalarPattern(" \\ d+" )
97107
98108 val out = new RegExpLikeUDF ()
99109 .evaluate(Array [ValueVector ](subject, pattern))
100110 .asInstanceOf [BitVector ]
101111
102112 assert(out.getValueCount === 0 )
103113 out.close(); subject.close(); pattern.close()
104- } finally {
105- allocator.close()
106114 }
107115 }
108116
109117 test(" all-null subject column produces all-null bitmap" ) {
110- val allocator = new RootAllocator (Long .MaxValue )
111- try {
112- val subject = varchar(allocator, Seq (null , null , null ))
113- val pattern = scalarPattern(allocator, " .*" )
118+ assertNoLeak {
119+ val subject = varchar(Seq (null , null , null ))
120+ val pattern = scalarPattern(" .*" )
114121
115122 val out = new RegExpLikeUDF ()
116123 .evaluate(Array [ValueVector ](subject, pattern))
@@ -119,8 +126,6 @@ class RegExpLikeUDFSuite extends AnyFunSuite {
119126 assert(out.getValueCount === 3 )
120127 assert(out.isNull(0 ) && out.isNull(1 ) && out.isNull(2 ))
121128 out.close(); subject.close(); pattern.close()
122- } finally {
123- allocator.close()
124129 }
125130 }
126131}
0 commit comments