Skip to content

Commit 9c926d3

Browse files
committed
fix: change constructor instead
1 parent b4b7a4b commit 9c926d3

1 file changed

Lines changed: 5 additions & 11 deletions

File tree

sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,11 @@ private[arrow] class ArrayWriter(
387387
val valueVector: ListVector,
388388
val elementWriter: ArrowFieldWriter) extends ArrowFieldWriter {
389389

390+
// SPARK-55056: Arrow format requires ListArray offset buffer to have N+1 entries.
391+
// Even when N=0, the buffer must contain [0]. Initialize offset buffer at construction
392+
// to ensure it exists even if no elements are written.
393+
valueVector.getOffsetBuffer.setInt(0, 0)
394+
390395
override def setNull(): Unit = {
391396
}
392397

@@ -402,17 +407,6 @@ private[arrow] class ArrayWriter(
402407
}
403408

404409
override def finish(): Unit = {
405-
// SPARK-55056: Arrow format requires ListArray offset buffer to have N+1 entries.
406-
// Even when N=0, the buffer must contain [0]. When the outer array is empty,
407-
// nested ArrayWriters are never invoked, so their count stays 0. Then
408-
// getBufferSizeFor(0) returns 0, and the offset buffer is omitted in IPC
409-
// serialization — violating Arrow spec. Simulate one empty write to ensure
410-
// the offset buffer is properly initialized.
411-
if (count == 0) {
412-
valueVector.startNewValue(0)
413-
valueVector.endValue(0, 0)
414-
count = 1
415-
}
416410
super.finish()
417411
elementWriter.finish()
418412
}

0 commit comments

Comments
 (0)