Skip to content

Commit 7afe69f

Browse files
committed
fix arrow shading issue in CI.
1 parent 10df7e0 commit 7afe69f

4 files changed

Lines changed: 58 additions & 11 deletions

File tree

common/src/main/scala/org/apache/comet/udf/CometBatchKernelCodegen.scala

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,32 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
9595
*/
9696
final case class ArrowColumnSpec(vectorClass: Class[_ <: ValueVector], nullable: Boolean)
9797

98+
/**
99+
* Resolve an Arrow vector class by its simple name, using the same classloader the codegen uses
100+
* internally. Intended for tests: the `common` module shades `org.apache.arrow` to
101+
* `org.apache.comet.shaded.arrow`, so `classOf[VarCharVector]` at a call site in an unshaded
102+
* module refers to a different [[Class]] object than the one the codegen compares against.
103+
* Callers pass a simple name and get back the class the production code actually uses.
104+
*/
105+
def vectorClassBySimpleName(name: String): Class[_ <: ValueVector] = name match {
106+
case "BitVector" => classOf[BitVector]
107+
case "TinyIntVector" => classOf[TinyIntVector]
108+
case "SmallIntVector" => classOf[SmallIntVector]
109+
case "IntVector" => classOf[IntVector]
110+
case "BigIntVector" => classOf[BigIntVector]
111+
case "Float4Vector" => classOf[Float4Vector]
112+
case "Float8Vector" => classOf[Float8Vector]
113+
case "DecimalVector" => classOf[DecimalVector]
114+
case "DateDayVector" => classOf[DateDayVector]
115+
case "TimeStampMicroVector" => classOf[TimeStampMicroVector]
116+
case "TimeStampMicroTZVector" => classOf[TimeStampMicroTZVector]
117+
case "VarCharVector" => classOf[VarCharVector]
118+
case "ViewVarCharVector" => classOf[ViewVarCharVector]
119+
case "VarBinaryVector" => classOf[VarBinaryVector]
120+
case "ViewVarBinaryVector" => classOf[ViewVarBinaryVector]
121+
case other => throw new IllegalArgumentException(s"unknown Arrow vector class: $other")
122+
}
123+
98124
/**
99125
* Result of compiling a bound [[Expression]] into a Janino kernel. The `factory` is the Spark
100126
* [[GeneratedClass]] produced by Janino and is safe to share across threads and partitions: it

spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,12 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
236236
test("ArrayInsertUnsupportedArgs") {
237237
// This test checks that the else branch in ArrayInsert
238238
// mapping to the comet is valid and fallback to spark is working fine.
239-
withSQLConf(CometConf.getExprAllowIncompatConfigKey(classOf[ArrayInsert]) -> "true") {
239+
// Disable the codegen dispatcher so the `idx` ScalaUDF child returns None from its serde,
240+
// which is what drives ArrayInsert's "unsupported arguments" branch. With the dispatcher
241+
// enabled, ScalaUDF routes through codegen and the whole plan runs native.
242+
withSQLConf(
243+
CometConf.COMET_CODEGEN_DISPATCH_MODE.key -> CometConf.CODEGEN_DISPATCH_DISABLED,
244+
CometConf.getExprAllowIncompatConfigKey(classOf[ArrayInsert]) -> "true") {
240245
withTempDir { dir =>
241246
val path = new Path(dir.toURI.toString, "test.parquet")
242247
makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = false, 10000)
@@ -247,7 +252,7 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
247252
.withColumn("arrUnsupportedArgs", expr("array_insert(arr, idx, 1)"))
248253
checkSparkAnswerAndFallbackReasons(
249254
df.select("arrUnsupportedArgs"),
250-
Set("scalaudf is not supported", "unsupported arguments for ArrayInsert"))
255+
Set("codegen dispatch disabled", "unsupported arguments for ArrayInsert"))
251256
}
252257
}
253258
}

spark/src/test/scala/org/apache/comet/CometCodegenDispatchSmokeSuite.scala

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,15 +156,22 @@ class CometCodegenDispatchSmokeSuite extends CometTestBase with AdaptiveSparkPla
156156
* `assertCodegenDidWork` (which proves the dispatcher ran in this test), the pair gives both
157157
* "this test exercised the dispatcher" and "the dispatcher's cache has a kernel of the expected
158158
* shape".
159+
*
160+
* Compares by simple name because the `common` module shades `org.apache.arrow`, so a direct
161+
* class-identity check against `classOf[VarCharVector]` at this call site (unshaded) misses the
162+
* shaded classes the dispatcher actually uses internally.
159163
*/
160164
private def assertKernelSignaturePresent(
161165
inputs: Seq[Class[_ <: ValueVector]],
162166
output: DataType): Unit = {
163167
val sigs = CometCodegenDispatchUDF.snapshotCompiledSignatures()
164-
val target = (inputs.toIndexedSeq, output)
168+
val expectedNames = inputs.map(_.getSimpleName).toIndexedSeq
169+
val present = sigs.exists { case (cached, dt) =>
170+
dt == output && cached.map(_.getSimpleName) == expectedNames
171+
}
165172
assert(
166-
sigs.contains(target),
167-
s"expected kernel signature ${target._1.map(_.getSimpleName)} -> ${target._2}; " +
173+
present,
174+
s"expected kernel signature $expectedNames -> $output; " +
168175
s"cache had ${sigs.map { case (c, d) => (c.map(_.getSimpleName), d) }}")
169176
}
170177

spark/src/test/scala/org/apache/comet/CometCodegenSourceSuite.scala

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ package org.apache.comet
2121

2222
import org.scalatest.funsuite.AnyFunSuite
2323

24-
import org.apache.arrow.vector.{VarCharVector, ViewVarCharVector}
2524
import org.apache.spark.sql.catalyst.InternalRow
2625
import org.apache.spark.sql.catalyst.expressions.{BoundReference, Concat, Expression, LeafExpression, Length, Literal, Nondeterministic, RegExpReplace, RLike, Unevaluable, Upper}
2726
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
@@ -30,6 +29,11 @@ import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
3029
import org.apache.comet.udf.CometBatchKernelCodegen
3130
import org.apache.comet.udf.CometBatchKernelCodegen.ArrowColumnSpec
3231

32+
// Resolve Arrow vector classes through the codegen object so tests see the same `Class` objects
33+
// the shaded `common` module sees. A direct `classOf[org.apache.arrow.vector.VarCharVector]` here
34+
// would be the unshaded class from the test classpath, which is not `==` to the shaded class the
35+
// production pattern-matches against.
36+
3337
/**
3438
* Generated-source inspection tests. These exercise `CometBatchKernelCodegen.generateSource` and
3539
* assert on the emitted Java directly, without invoking Janino. The goal is to catch regressions
@@ -48,8 +52,13 @@ import org.apache.comet.udf.CometBatchKernelCodegen.ArrowColumnSpec
4852
*/
4953
class CometCodegenSourceSuite extends AnyFunSuite {
5054

51-
private val nullableString = ArrowColumnSpec(classOf[VarCharVector], nullable = true)
52-
private val nonNullableString = ArrowColumnSpec(classOf[VarCharVector], nullable = false)
55+
private val varCharVectorClass =
56+
CometBatchKernelCodegen.vectorClassBySimpleName("VarCharVector")
57+
private val viewVarCharVectorClass =
58+
CometBatchKernelCodegen.vectorClassBySimpleName("ViewVarCharVector")
59+
60+
private val nullableString = ArrowColumnSpec(varCharVectorClass, nullable = true)
61+
private val nonNullableString = ArrowColumnSpec(varCharVectorClass, nullable = false)
5362

5463
private def gen(
5564
expr: org.apache.spark.sql.catalyst.expressions.Expression,
@@ -94,7 +103,7 @@ class CometCodegenSourceSuite extends AnyFunSuite {
94103
}
95104

96105
test("ViewVarCharVector getUTF8String branches inline vs referenced without allocating") {
97-
val viewSpec = ArrowColumnSpec(classOf[ViewVarCharVector], nullable = true)
106+
val viewSpec = ArrowColumnSpec(viewVarCharVectorClass, nullable = true)
98107
val expr = Length(BoundReference(0, StringType, nullable = true))
99108
val src = gen(expr, viewSpec)
100109
// The view case reads the 16-byte view entry and picks inline vs referenced data without a
@@ -177,8 +186,8 @@ class CometCodegenSourceSuite extends AnyFunSuite {
177186
// being null would skip evaluation, but Concat's null handling differs). Expect the
178187
// default path without the `if (colX.isNull(i) || colY.isNull(i))` wrapper, letting Spark's
179188
// own `ev.code` handle nulls correctly.
180-
val nullable1 = ArrowColumnSpec(classOf[VarCharVector], nullable = true)
181-
val nullable2 = ArrowColumnSpec(classOf[VarCharVector], nullable = true)
189+
val nullable1 = ArrowColumnSpec(varCharVectorClass, nullable = true)
190+
val nullable2 = ArrowColumnSpec(varCharVectorClass, nullable = true)
182191
val expr = RLike(
183192
Concat(
184193
Seq(

0 commit comments

Comments
 (0)