Skip to content

Commit efbf0fb

Browse files
committed
Cache packer in delta writers (resolves TODO)
Addresses the TODO at DeltaBinaryPackingValuesWriterForLong.java:119: "should this cache the packer?". Adds a per-instance BytePacker[] / BytePackerForLong[] cache keyed by bit width to avoid a factory dispatch + array load on each miniblock flush. Symmetric to the reader cache added in the previous commit (9c14a33). Results on IntEncodingBenchmark.encodeDelta (5 iter x 1 fork): SEQUENTIAL: 73.05M -> 73.67M (+0.8%, within noise) RANDOM: 50.99M -> 51.51M (+1.0%, within noise) LOW_CARDINALITY: 61.15M -> 61.37M (+0.4%, within noise) HIGH_CARDINALITY: 51.30M -> 50.79M (-1.0%, within noise) Kept as code-quality improvement: resolves the explicit TODO, mirrors the reader structure, and the long writer has a 65-entry bit-width space vs 33 for int (more factory dispatch cost avoided). All 573 parquet-column tests pass (including 100K-round random delta tests).
1 parent e67c383 commit efbf0fb

2 files changed

Lines changed: 22 additions & 3 deletions

File tree

parquet-column/src/main/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesWriterForInteger.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,12 @@ public class DeltaBinaryPackingValuesWriterForInteger extends DeltaBinaryPacking
6060
*/
6161
private int minDeltaInCurrentBlock = Integer.MAX_VALUE;
6262

63+
/**
64+
* Cache of BytePacker instances indexed by bit width [0, 32].
65+
* Avoids a factory dispatch + array load per miniblock flush.
66+
*/
67+
private final BytePacker[] packerCache = new BytePacker[MAX_BITWIDTH + 1];
68+
6369
public DeltaBinaryPackingValuesWriterForInteger(int slabSize, int pageSize, ByteBufferAllocator allocator) {
6470
this(DEFAULT_NUM_BLOCK_VALUES, DEFAULT_NUM_MINIBLOCKS, slabSize, pageSize, allocator);
6571
}
@@ -116,7 +122,11 @@ private void flushBlockBuffer() {
116122
for (int i = 0; i < miniBlocksToFlush; i++) {
117123
// writing i th miniblock
118124
int currentBitWidth = bitWidths[i];
119-
BytePacker packer = Packer.LITTLE_ENDIAN.newBytePacker(currentBitWidth);
125+
BytePacker packer = packerCache[currentBitWidth];
126+
if (packer == null) {
127+
packer = Packer.LITTLE_ENDIAN.newBytePacker(currentBitWidth);
128+
packerCache[currentBitWidth] = packer;
129+
}
120130
int miniBlockStart = i * config.miniBlockSizeInValues;
121131
// Mini blocks are always flushed as full 32-value groups in the current format.
122132
// Use the packer's 32-value entry point to avoid four pack8Values calls per miniblock.

parquet-column/src/main/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesWriterForLong.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,12 @@ public class DeltaBinaryPackingValuesWriterForLong extends DeltaBinaryPackingVal
6060
*/
6161
private long minDeltaInCurrentBlock = Long.MAX_VALUE;
6262

63+
/**
64+
* Cache of BytePackerForLong instances indexed by bit width [0, 64].
65+
* Avoids a factory dispatch + array load per miniblock flush.
66+
*/
67+
private final BytePackerForLong[] packerCache = new BytePackerForLong[MAX_BITWIDTH + 1];
68+
6369
public DeltaBinaryPackingValuesWriterForLong(int slabSize, int pageSize, ByteBufferAllocator allocator) {
6470
this(DEFAULT_NUM_BLOCK_VALUES, DEFAULT_NUM_MINIBLOCKS, slabSize, pageSize, allocator);
6571
}
@@ -116,8 +122,11 @@ private void flushBlockBuffer() {
116122
for (int i = 0; i < miniBlocksToFlush; i++) {
117123
// writing i th miniblock
118124
int currentBitWidth = bitWidths[i];
119-
// TODO: should this cache the packer?
120-
BytePackerForLong packer = Packer.LITTLE_ENDIAN.newBytePackerForLong(currentBitWidth);
125+
BytePackerForLong packer = packerCache[currentBitWidth];
126+
if (packer == null) {
127+
packer = Packer.LITTLE_ENDIAN.newBytePackerForLong(currentBitWidth);
128+
packerCache[currentBitWidth] = packer;
129+
}
121130
int miniBlockStart = i * config.miniBlockSizeInValues;
122131
// Mini blocks are always flushed as full 32-value groups in the current format.
123132
// Use the packer's 32-value entry point to avoid four pack8Values calls per miniblock.

0 commit comments

Comments
 (0)