1919package org .apache .parquet .column .values .delta ;
2020
2121import java .io .IOException ;
22- import java .nio .ByteBuffer ;
2322import org .apache .parquet .bytes .ByteBufferInputStream ;
2423import org .apache .parquet .bytes .BytesUtils ;
2524import org .apache .parquet .column .values .ValuesReader ;
@@ -53,6 +52,21 @@ public class DeltaBinaryPackingValuesReader extends ValuesReader {
5352 private DeltaBinaryPackingConfig config ;
5453 private int [] bitWidths ;
5554
55+ /**
56+ * Reusable byte buffer for holding the packed bytes of a single mini block.
57+ * Avoids allocating a fresh {@code ByteBuffer.slice()} per mini block in
58+ * {@link #unpackMiniBlock(BytePackerForLong)}. Sized lazily to the maximum
59+ * miniblock byte count seen so far.
60+ */
61+ private byte [] miniBlockByteBuffer = new byte [0 ];
62+
63+ /**
64+ * Cache of {@link BytePackerForLong} instances keyed by bit width (0..64).
65+ * The default packer factory does an array lookup, but caching the resolved
66+ * packer instance per reader avoids two virtual dispatches per mini block.
67+ */
68+ private final BytePackerForLong [] packerCache = new BytePackerForLong [65 ];
69+
5670 /**
5771 * eagerly loads all the data into memory
5872 */
@@ -130,7 +144,12 @@ private void loadNewBlockToBuffer() throws IOException {
130144 // mini block is atomic for reading, we read a mini block when there are more values left
131145 int i ;
132146 for (i = 0 ; i < config .miniBlockNumInABlock && valuesBuffered < totalValueCount ; i ++) {
133- BytePackerForLong packer = Packer .LITTLE_ENDIAN .newBytePackerForLong (bitWidths [i ]);
147+ int bitWidth = bitWidths [i ];
148+ BytePackerForLong packer = packerCache [bitWidth ];
149+ if (packer == null ) {
150+ packer = Packer .LITTLE_ENDIAN .newBytePackerForLong (bitWidth );
151+ packerCache [bitWidth ] = packer ;
152+ }
134153 unpackMiniBlock (packer );
135154 }
136155
@@ -143,22 +162,49 @@ private void loadNewBlockToBuffer() throws IOException {
143162 }
144163
145164 /**
146- * mini block has a size of 8*n, unpack 8 value each time
165+ * Mini block has a size of 8*n. Reads the packed bytes once into a reused {@code byte[]}
166+ * buffer (avoiding the per-mini-block {@code ByteBuffer.slice()} allocation), then unpacks
167+ * 32 values at a time using the byte[] form of the packer (which is faster than the
168+ * ByteBuffer form per the comment in {@code ByteBitPackingValuesReader}). For the
169+ * default mini-block size of 32 values this collapses to a single {@code unpack32Values}
170+ * call per mini block. Any residual {@code 8*n} group (mini-block size not a multiple of 32)
171+ * falls back to {@code unpack8Values}.
147172 *
148173 * @param packer the packer created from bitwidth of current mini block
149174 */
150175 private void unpackMiniBlock (BytePackerForLong packer ) throws IOException {
151- for (int j = 0 ; j < config .miniBlockSizeInValues ; j += 8 ) {
152- unpack8Values (packer );
176+ final int bitWidth = packer .getBitWidth ();
177+ final int valueCount = config .miniBlockSizeInValues ;
178+ final int byteCount = (valueCount / 8 ) * bitWidth ;
179+
180+ if (miniBlockByteBuffer .length < byteCount ) {
181+ miniBlockByteBuffer = new byte [byteCount ];
182+ }
183+ int read = 0 ;
184+ while (read < byteCount ) {
185+ int n = in .read (miniBlockByteBuffer , read , byteCount - read );
186+ if (n < 0 ) {
187+ throw new ParquetDecodingException (
188+ "not enough bytes for mini block: needed " + byteCount + ", got " + read );
189+ }
190+ read += n ;
153191 }
154- }
155192
156- private void unpack8Values (BytePackerForLong packer ) throws IOException {
157- // get a single buffer of 8 values. most of the time, this won't require a copy
158- // TODO: update the packer to consume from an InputStream
159- ByteBuffer buffer = in .slice (packer .getBitWidth ());
160- packer .unpack8Values (buffer , buffer .position (), valuesBuffer , valuesBuffered );
161- this .valuesBuffered += 8 ;
193+ // Unpack 32 values at a time when possible; fall back to 8 for any residual.
194+ int valueIndex = 0 ;
195+ int byteIndex = 0 ;
196+ final int step32 = bitWidth * 4 ;
197+ while (valueIndex + 32 <= valueCount ) {
198+ packer .unpack32Values (miniBlockByteBuffer , byteIndex , valuesBuffer , valuesBuffered + valueIndex );
199+ valueIndex += 32 ;
200+ byteIndex += step32 ;
201+ }
202+ while (valueIndex < valueCount ) {
203+ packer .unpack8Values (miniBlockByteBuffer , byteIndex , valuesBuffer , valuesBuffered + valueIndex );
204+ valueIndex += 8 ;
205+ byteIndex += bitWidth ;
206+ }
207+ valuesBuffered += valueCount ;
162208 }
163209
164210 private void readBitWidthsForMiniBlocks () {
0 commit comments