@@ -24,7 +24,7 @@ import scala.jdk.CollectionConverters._
2424import org .apache .arrow .c .{ArrowArrayStream , Data }
2525import org .apache .arrow .memory .BufferAllocator
2626import org .apache .arrow .vector .ipc .ArrowReader
27- import org .apache .arrow .vector .types .pojo .{Field , Schema }
27+ import org .apache .arrow .vector .types .pojo .{Field , FieldType , Schema }
2828import org .apache .spark .TaskContext
2929import org .apache .spark .internal .Logging
3030import org .apache .spark .rdd .RDD
@@ -132,8 +132,14 @@ object CometArrowStream extends Logging {
132132 * `CometDictionaryVector`, [[ColumnarBatchArrowReader ]] decodes it via
133133 * `DictionaryEncoder.decode` before unloading, so the wire-level field is the dictionary's
134134 * *value* type, not `Dictionary<index, value>`. For everything else, use the underlying value
135- * vector's field. Field name / nullability / metadata come from `expected` so that consumers
136- * indexing by name keep working.
135+ * vector's field.
136+ *
137+ * Field name and metadata come from `expected` so that consumers indexing by name keep working.
138+ * Nullability is the union of the two — a CometVector that happens to hold no nulls in this
139+ * batch can still be nullable per Spark's contract (the next batch may have one), and a column
140+ * whose actual buffer carries validity bits must stay nullable even if Spark thought otherwise.
141+ * Taking only `raw.isNullable` here would advertise non-nullable when the next batch does carry
142+ * a null and crash native validation.
137143 */
138144 private def actualFieldOf (col : CometVector , expected : Field ): Field = {
139145 val raw = col match {
@@ -143,7 +149,10 @@ object CometArrowStream extends Logging {
143149 dict.getVector.getField
144150 case _ => col.getValueVector.getField
145151 }
146- new Field (expected.getName, raw.getFieldType, raw.getChildren)
152+ val nullable = expected.isNullable || raw.isNullable
153+ val fieldType =
154+ new FieldType (nullable, raw.getType, raw.getDictionary, expected.getMetadata)
155+ new Field (expected.getName, fieldType, raw.getChildren)
147156 }
148157
149158 /**
0 commit comments