File tree Expand file tree Collapse file tree
common/src/main/scala/org/apache/comet Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -232,8 +232,7 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim {
232232 //
233233 // TODO(method-size): perRowBody is inlined inside process's for-loop and not split.
234234 // Sufficiently deep trees can exceed Janino's 64KB method size; wrap in
235- // ctx.splitExpressionsWithCurrentInputs when hit. See
236- // docs/source/contributor-guide/jvm_udf_dispatch.md#open-items.
235+ // ctx.splitExpressionsWithCurrentInputs when hit.
237236 val (concreteOutClass, outputSetup, perRowBody) = {
238237 // Class-field CSE. `generateExpressions` runs `subexpressionElimination` under the
239238 // hood, which populates `ctx.subexprFunctions` with per-row helper calls that write
Original file line number Diff line number Diff line change @@ -79,7 +79,8 @@ class CometScalaUDFCodegen extends CometUDF {
7979 val bytes = exprVec.get(0 )
8080
8181 // TODO(dict-encoded): kernels assume materialized inputs; dict-encoded vectors would fail the
82- // cast in `specFor` below. See docs/source/contributor-guide/jvm_udf_dispatch.md#open-items.
82+ // cast in `specFor` below. Fix is to materialize at the dispatcher (via
83+ // `CDataDictionaryProvider`) or widen `emitTypedGetters` with a dict-index + lookup path.
8384
8485 val numDataCols = inputs.length - 1
8586 val dataCols = new Array [ValueVector ](numDataCols)
@@ -325,9 +326,9 @@ object CometScalaUDFCodegen {
325326 * Cache key: serialized expression bytes plus per-column compile-time invariants.
326327 *
327328 * `hashCode` walks `bytesKey` per lookup, so for large ScalaUDF closures it scales with closure
328- * size. TODO(perf-cache-key): see
329- * `docs/source/contributor-guide/jvm_udf_dispatch.md#open-items` for possible optimizations if
330- * a workload makes this hot .
329+ * size. TODO(perf-cache-key): if this becomes hot, options are a driver-precomputed hash piggy-
330+ * backed through the proto, a per-instance last-key memoization, or a two-tier cache keyed on
331+ * the generated source string .
331332 */
332333 final case class CacheKey (bytesKey : ByteBuffer , specs : IndexedSeq [ArrowColumnSpec ])
333334
You can’t perform that action at this time.
0 commit comments