@@ -509,6 +509,28 @@ object CometCreateArray extends CometExpressionSerde[CreateArray] {
509509 return exprToProtoInternal(emptyArrayLiteral, inputs, binding)
510510 }
511511
512+ // DataFusion's `make_array` asserts strict element-type equality in
513+ // `MutableArrayData::with_capacities` and panics on a mismatch. Spark's CreateArray is more
514+ // permissive: its type coercion compares element types with `sameType`, which ignores
515+ // nullability, so children that share a surface type but differ only in nested field
516+ // nullability get no unifying cast. DataFusion tolerates container nullability differences
517+ // (an `ArrayType.containsNull` / `MapType.valueContainsNull` mismatch is coerced), but NOT a
518+ // struct field's nullability -- `array(struct(a not null), struct(a nullable))` panics inside
519+ // `make_array_inner`. Decline only those cases (i.e. children that still differ after
520+ // normalizing container nullability) so Spark's evaluator handles them.
521+ //
522+ // TODO: remove this decline once apache/datafusion#22366 lands; the upstream fix widens the
523+ // element type via nullability-OR-merge and casts each child before MutableArrayData.
524+ val normalizedTypes = children.map(c => normalizeContainerNullability(c.dataType))
525+ if (normalizedTypes.distinct.size > 1 ) {
526+ withFallbackReason(
527+ expr,
528+ " CreateArray children have mismatched data types: " +
529+ children.map(_.dataType).distinct.mkString(" , " ),
530+ children : _* )
531+ return None
532+ }
533+
512534 val childExprs = children.map(exprToProtoInternal(_, inputs, binding))
513535
514536 if (childExprs.forall(_.isDefined)) {
@@ -518,6 +540,26 @@ object CometCreateArray extends CometExpressionSerde[CreateArray] {
518540 None
519541 }
520542 }
543+
544+ /**
545+ * Rewrites a type so that container nullability (`ArrayType.containsNull`,
546+ * `MapType.valueContainsNull`) is forced to `true` everywhere, while struct field nullability
547+ * is left intact. Two CreateArray children whose types differ ONLY in container nullability are
548+ * tolerated by DataFusion's `make_array` (coerced), so they normalize equal here; a difference
549+ * in a struct field's nullability survives normalization and triggers the decline above.
550+ */
551+ private def normalizeContainerNullability (dt : DataType ): DataType = dt match {
552+ case ArrayType (elementType, _) =>
553+ ArrayType (normalizeContainerNullability(elementType), containsNull = true )
554+ case MapType (keyType, valueType, _) =>
555+ MapType (
556+ normalizeContainerNullability(keyType),
557+ normalizeContainerNullability(valueType),
558+ valueContainsNull = true )
559+ case StructType (fields) =>
560+ StructType (fields.map(f => f.copy(dataType = normalizeContainerNullability(f.dataType))))
561+ case other => other
562+ }
521563}
522564
523565object CometGetArrayItem extends CometExpressionSerde [GetArrayItem ] {
0 commit comments