Skip to content

Commit 8926fee

Browse files
committed
GH-3520: Cleanup binary write path
Two small cleanups on the binary write side: 1. DeltaByteArrayWriter: replace v.getBytes() with v.copy().getBytesUnsafe() to avoid the unconditional Arrays.copyOf that getBytes() performs for ByteArrayBackedBinary. copy() is a no-op for constant Binaries, and getBytesUnsafe() returns the backing array directly. For reused-buffer Binaries (e.g. ByteBufferBackedBinary over a slab being mutated), copy() still snapshots them so correctness is preserved. 2. FixedLenByteArrayPlainValuesWriter: drop the unused LittleEndianDataOutputStream wrapper (only used to call Binary.writeTo(), which works directly with the underlying CapacityByteArrayOutputStream). The trailing out.flush() in getBytes() is also dead. Same pattern as #3517 fixed in DeltaLengthByteArrayValuesWriter. No public API change. No file format change. Validation: parquet-column 573 tests pass. Built with -Dspotless.check.skip=true -Drat.skip=true -Djapicmp.skip=true.
1 parent 53d7842 commit 8926fee

2 files changed

Lines changed: 5 additions & 10 deletions

File tree

parquet-column/src/main/java/org/apache/parquet/column/values/deltastrings/DeltaByteArrayWriter.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,10 @@ public String memUsageString(String prefix) {
8989
@Override
9090
public void writeBytes(Binary v) {
9191
int i = 0;
92-
byte[] vb = v.getBytes();
92+
// copy() is a no-op for constant (non-reused) Binaries, and getBytesUnsafe()
93+
// returns the backing array directly for ByteArrayBackedBinary — avoiding
94+
// the unconditional array copy that getBytes() always performs.
95+
byte[] vb = v.copy().getBytesUnsafe();
9396
int length = previous.length < vb.length ? previous.length : vb.length;
9497
// find the number of matching prefix bytes between this value and the previous one
9598
for (i = 0; (i < length) && (previous[i] == vb[i]); i++)

parquet-column/src/main/java/org/apache/parquet/column/values/plain/FixedLenByteArrayPlainValuesWriter.java

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import org.apache.parquet.bytes.ByteBufferAllocator;
2323
import org.apache.parquet.bytes.BytesInput;
2424
import org.apache.parquet.bytes.CapacityByteArrayOutputStream;
25-
import org.apache.parquet.bytes.LittleEndianDataOutputStream;
2625
import org.apache.parquet.column.Encoding;
2726
import org.apache.parquet.column.values.ValuesWriter;
2827
import org.apache.parquet.io.ParquetEncodingException;
@@ -37,7 +36,6 @@ public class FixedLenByteArrayPlainValuesWriter extends ValuesWriter {
3736
private static final Logger LOG = LoggerFactory.getLogger(PlainValuesWriter.class);
3837

3938
private CapacityByteArrayOutputStream arrayOut;
40-
private LittleEndianDataOutputStream out;
4139
private int length;
4240
private ByteBufferAllocator allocator;
4341

@@ -46,7 +44,6 @@ public FixedLenByteArrayPlainValuesWriter(
4644
this.length = length;
4745
this.allocator = allocator;
4846
this.arrayOut = new CapacityByteArrayOutputStream(initialSize, pageSize, this.allocator);
49-
this.out = new LittleEndianDataOutputStream(arrayOut);
5047
}
5148

5249
@Override
@@ -56,7 +53,7 @@ public final void writeBytes(Binary v) {
5653
"Fixed Binary size " + v.length() + " does not match field type length " + length);
5754
}
5855
try {
59-
v.writeTo(out);
56+
v.writeTo(arrayOut);
6057
} catch (IOException e) {
6158
throw new ParquetEncodingException("could not write fixed bytes", e);
6259
}
@@ -69,11 +66,6 @@ public long getBufferedSize() {
6966

7067
@Override
7168
public BytesInput getBytes() {
72-
try {
73-
out.flush();
74-
} catch (IOException e) {
75-
throw new ParquetEncodingException("could not write page", e);
76-
}
7769
LOG.debug("writing a buffer of size {}", arrayOut.size());
7870
return BytesInput.from(arrayOut);
7971
}

0 commit comments

Comments
 (0)