@@ -220,6 +220,74 @@ class CometCodegenDispatchFuzzSuite extends CometTestBase with AdaptiveSparkPlan
220220 }
221221 }
222222
223+ /**
224+ * Element-level fuzz for nested array reads. For every `Array<primitive>` column in the random
225+ * schema, runs `id_X(array_max(col))` so Spark's `ArrayMax.doGenCode` walks every element of
226+ * every row and calls the kernel's nested element getter
227+ * (`getInt`/`getLong`/`getDecimal`/etc.). The cardinality probe deliberately avoids element
228+ * materialization, so without this test no fuzz coverage exists on the element-getter paths the
229+ * unsafe-access optimization would touch. `array_max` is comparison-only on every primitive
230+ * Spark supports, so one expression covers all 14 element types.
231+ */
232+ test(" array_max element fuzz: every Array<primitive> column" ) {
233+ val df = spark.read.parquet(mixedTypesFilename)
234+ df.createOrReplaceTempView(" t1" )
235+ val arrayPrimitiveFields = df.schema.fields.filter {
236+ case StructField (_, ArrayType (elemDt, _), _, _) if ! isComplexType(elemDt) => true
237+ case _ => false
238+ }
239+ assert(
240+ arrayPrimitiveFields.nonEmpty,
241+ " expected at least one Array<primitive> column in random schema" )
242+ for (field <- arrayPrimitiveFields) {
243+ val ArrayType (elemDt, _) = field.dataType: @ unchecked
244+ val udfName = s " id_arrmax_ ${field.name}"
245+ registerIdentityUdfFor(elemDt, udfName) match {
246+ case Some (_) =>
247+ assertCodegenRan {
248+ checkSparkAnswerAndOperator(s " SELECT $udfName(array_max( ${field.name})) FROM t1 " )
249+ }
250+ case None =>
251+ fail(
252+ s " array column ${field.name} elem ${elemDt} not in identity UDF catalog; " +
253+ " extend registerIdentityUdfFor" )
254+ }
255+ }
256+ }
257+
258+ /**
259+ * Element-level fuzz for map key and value reads. `map_keys(col)` / `map_values(col)` produce
260+ * arrays the kernel walks via Spark's `ArrayMax`, exercising the map's child key/value getter.
261+ * The leaf primitive read is structurally the same as in the array element fuzz, but the parent
262+ * offset chain (MapVector -> entries StructVector -> child) differs, so a buggy unsafe getter
263+ * that mishandled the map's per-row offset would slip past the array test alone. Filters to
264+ * top-level `Map<primitive, primitive>` columns from the random nested schema.
265+ */
266+ test(" array_max element fuzz: map_keys / map_values on Map<primitive, primitive> columns" ) {
267+ val df = spark.read.parquet(nestedTypesFilename)
268+ df.createOrReplaceTempView(" t2" )
269+ val mapPrimitiveFields = df.schema.fields.filter {
270+ case StructField (_, MapType (kDt, vDt, _), _, _)
271+ if ! isComplexType(kDt) && ! isComplexType(vDt) =>
272+ true
273+ case _ => false
274+ }
275+ for (field <- mapPrimitiveFields) {
276+ val MapType (kDt, vDt, _) = field.dataType: @ unchecked
277+ registerIdentityUdfFor(kDt, s " id_mapk_ ${field.name}" ).foreach { udf =>
278+ assertCodegenRan {
279+ checkSparkAnswerAndOperator(s " SELECT $udf(array_max(map_keys( ${field.name}))) FROM t2 " )
280+ }
281+ }
282+ registerIdentityUdfFor(vDt, s " id_mapv_ ${field.name}" ).foreach { udf =>
283+ assertCodegenRan {
284+ checkSparkAnswerAndOperator(
285+ s " SELECT $udf(array_max(map_values( ${field.name}))) FROM t2 " )
286+ }
287+ }
288+ }
289+ }
290+
223291 /**
224292 * Probes one complex top-level column. ArrayType / MapType go through `cardinality(col)` fed to
225293 * the identity-Int probe UDF (see [[cardinalityProbeUdf ]] for the rationale). StructType drills
0 commit comments