Skip to content

Commit 557752e

Browse files
committed
make format, fix shims for 4.0+
1 parent 08d6b78 commit 557752e

9 files changed

Lines changed: 17 additions & 69 deletions

File tree

common/src/main/java/org/apache/comet/udf/CometUdfBridge.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ protected boolean removeEldestEntry(Map.Entry<String, CometUDF> eldest) {
6262
* @param inputSchemaPtrs addresses of pre-allocated FFI_ArrowSchema structs (one per input)
6363
* @param outArrayPtr address of pre-allocated FFI_ArrowArray for the result
6464
* @param outSchemaPtr address of pre-allocated FFI_ArrowSchema for the result
65-
* @param numRows number of rows in the current batch. Mirrors DataFusion's
66-
* {@code ScalarFunctionArgs.number_rows} and gives UDFs an explicit batch-size signal for
67-
* cases where no input arg is a batch-length array (e.g. a zero-arg non-deterministic
68-
* ScalaUDF). UDFs that already read size from their input vectors can ignore it.
65+
* @param numRows number of rows in the current batch. Mirrors DataFusion's {@code
66+
* ScalarFunctionArgs.number_rows} and gives UDFs an explicit batch-size signal for cases
67+
* where no input arg is a batch-length array (e.g. a zero-arg non-deterministic ScalaUDF).
68+
* UDFs that already read size from their input vectors can ignore it.
6969
*/
7070
public static void evaluate(
7171
String udfClassName,

common/src/main/scala/org/apache/comet/udf/CometBatchKernelCodegen.scala

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -435,19 +435,10 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
435435
* devirtualize. Each getter switches on the column ordinal so the call site (with an inlined
436436
* constant ordinal from `BoundReference.genCode`) folds down to a single branch.
437437
*
438-
* Current coverage: `isNullAt` plus `getUTF8String` for `VarCharVector` and
439-
* `ViewVarCharVector`. Widen by adding vector class cases and new getters for primitive /
440-
* decimal / binary / date / timestamp types.
441-
*
442-
* TODO: the kernel's `isNullAt(int ordinal)` switch has a `return false;` case for every column
443-
* with `ArrowColumnSpec.nullable=false`, and every `BoundReference(ord, ...)` in the expression
444-
* tree produces a call site `this.isNullAt(ord)` with `ord` known as a compile-time constant.
445-
* JIT is expected to inline the method, fold the switch on the constant ordinal, and reduce the
446-
* call to `false` at that call site, so `BoundReference.genCode`'s `isNull` branch
447-
* constant-folds away too. A tighter pass would rewrite the deserialized `Expression` tree,
448-
* setting the matching `BoundReference.nullable=false` so the generated `ev.code` simply omits
449-
* the `isNull` branch at source level rather than relying on the JIT. Cheap to do once we start
450-
* flipping per-batch nullability (e.g. `v.getNullCount == 0`).
438+
* Current coverage: `isNullAt` plus getters for boolean, byte, short, int (including
439+
* `DateDayVector`), long (including `TimeStampMicroVector` and its TZ variant), float, double,
440+
* decimal, binary, and UTF8 (for both `VarCharVector` and `ViewVarCharVector`). Widen by adding
441+
* further vector-class cases to the existing switches.
451442
*/
452443
private def typedInputAccessors(inputSchema: Seq[ArrowColumnSpec]): String = {
453444
val withOrd = inputSchema.zipWithIndex
@@ -679,7 +670,7 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
679670
val subjectClass = inputSchema(subjectOrd).vectorClass
680671
require(
681672
subjectClass == classOf[VarCharVector] || subjectClass == classOf[ViewVarCharVector],
682-
s"specializedRegExpReplaceBody expects VarCharVector or ViewVarCharVector at ordinal " +
673+
"specializedRegExpReplaceBody expects VarCharVector or ViewVarCharVector at ordinal " +
683674
s"$subjectOrd, got ${subjectClass.getSimpleName}")
684675

685676
val patternStr = rr.regexp.eval().toString

common/src/main/scala/org/apache/comet/udf/CometCodegenDispatchUDF.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ class CometCodegenDispatchUDF extends CometUDF {
9494
override def evaluate(inputs: Array[ValueVector], numRows: Int): ValueVector = {
9595
require(
9696
inputs.length >= 1,
97-
s"CometCodegenDispatchUDF requires at least 1 input (serialized expression), " +
97+
"CometCodegenDispatchUDF requires at least 1 input (serialized expression), " +
9898
s"got ${inputs.length}")
9999
val exprVec = inputs(0).asInstanceOf[VarBinaryVector]
100100
require(
@@ -338,7 +338,7 @@ object CometCodegenDispatchUDF {
338338
expr: Expression,
339339
specs: IndexedSeq[ArrowColumnSpec]): Expression = {
340340
expr.transform {
341-
case b @ BoundReference(ord, dt, true)
341+
case BoundReference(ord, dt, true)
342342
if ord >= 0 && ord < specs.length && !specs(ord).nullable =>
343343
BoundReference(ord, dt, nullable = false)
344344
// Fall through unchanged: non-BoundReference nodes and BoundReferences that are already

common/src/main/scala/org/apache/comet/udf/CometUDF.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ import org.apache.arrow.vector.ValueVector
3131
*
3232
* `numRows` mirrors DataFusion's `ScalarFunctionArgs.number_rows` and is the batch row count.
3333
* UDFs that always have at least one batch-length input can read length from it and ignore
34-
* `numRows`; UDFs that may be called with zero data columns (e.g. a zero-arg ScalaUDF through
35-
* the codegen dispatcher) need `numRows` to know how many rows to produce.
34+
* `numRows`; UDFs that may be called with zero data columns (e.g. a zero-arg ScalaUDF through the
35+
* codegen dispatcher) need `numRows` to know how many rows to produce.
3636
*
3737
* Implementations must have a public no-arg constructor and should be stateless: instances are
3838
* cached per executor thread for the lifetime of the JVM.

common/src/main/scala/org/apache/comet/udf/RegExpInStrUDF.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class RegExpInStrUDF extends CometUDF {
7070
compiled
7171
}
7272
}
73-
val idx = idxVec.get(0)
73+
idxVec.get(0)
7474

7575
val n = subject.getValueCount
7676
val out = new IntVector("regexp_instr_result", CometArrowAllocator)

common/src/main/spark-4.x/org/apache/comet/shims/CometInternalRowShim.scala

Lines changed: 0 additions & 43 deletions
This file was deleted.

spark/src/main/scala/org/apache/comet/serde/strings.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ object CometRegExpExtractAll extends CometExpressionSerde[RegExpExtractAll] {
636636
return None
637637
}
638638
(expr.regexp, expr.idx) match {
639-
case (Literal(pattern, DataTypes.StringType), Literal(idx, _: IntegerType)) =>
639+
case (Literal(pattern, DataTypes.StringType), Literal(_, _: IntegerType)) =>
640640
if (pattern == null) {
641641
withInfo(expr, "Null literal pattern is handled by Spark fallback")
642642
return None

spark/src/test/scala/org/apache/comet/CometCodegenSourceSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ class CometCodegenSourceSuite extends AnyFunSuite {
188188
val src = gen(expr, nullable1, nullable2)
189189
assert(
190190
!src.contains("this.col0.isNull(i) || this.col1.isNull(i)"),
191-
s"expected no pre-null short-circuit when Concat breaks the NullIntolerant chain; " +
191+
"expected no pre-null short-circuit when Concat breaks the NullIntolerant chain; " +
192192
s"got:\n$src")
193193
}
194194

spark/src/test/scala/org/apache/spark/sql/benchmark/CometScalaUDFCompositionBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ object CometScalaUDFCompositionBenchmark extends CometBenchmarkBase {
9191
prepareTable(
9292
dir,
9393
spark.sql(
94-
s"SELECT REPEAT(CAST(value AS STRING), 10) AS c1, " +
94+
"SELECT REPEAT(CAST(value AS STRING), 10) AS c1, " +
9595
s"CAST(value AS STRING) AS c2 FROM $tbl"))
9696

9797
registerMultiColUdfs()

0 commit comments

Comments
 (0)