Skip to content

Commit 699e1e1

Browse files
committed
Add dense bulk path for multi-dimensional point fields via BinaryColumn
This commit adds a dense bulk fast path for multi-dimensional point fields using BinaryColumn. The key optimization is a dynamically-sized dense points buffer that ensures at least MIN_VALUES_PER_CHUNK (64) values per chunk, preventing pathologically small chunk sizes for wide point values. Previously, the N-D dense path used a fixed 4KB shared scratch buffer, which could result in very small per-chunk counts for high-dimensional or wide-byte points. The new pointsDenseBuffer() method allocates a larger buffer when needed to maintain efficient bulk processing. Also adds comprehensive tests for multi-dimensional point indexing via BinaryColumn covering LatLonPoint, XYPoint, IntPoint, LongPoint, FloatPoint, DoublePoint, and InetAddressPoint.
1 parent 529a575 commit 699e1e1

4 files changed

Lines changed: 735 additions & 5 deletions

File tree

lucene/CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ API Changes
341341

342342
* GITHUB#16224 Add TokenStreamColumn for experimental columnar batch indexing. (Tim Brooks)
343343

344+
* GITHUB#16209: Add dense bulk path for multi-dimensional point fields via BinaryColumn. (Prithvi S)
344345

345346
New Features
346347
---------------------

lucene/core/src/java/org/apache/lucene/document/column/BinaryColumn.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
import org.apache.lucene.util.BytesRef;
2222

2323
/**
24-
* A {@link Column} that provides variable-size binary values via a tuple cursor. Used for {@link
25-
* org.apache.lucene.index.DocValuesType#BINARY BINARY}, {@link
26-
* org.apache.lucene.index.DocValuesType#SORTED SORTED}, and {@link
24+
* A {@link Column} that provides variable-size binary values via a tuple cursor, and dense values
25+
* via a {@link BytesRefValuesCursor}. Used for {@link org.apache.lucene.index.DocValuesType#BINARY
26+
* BINARY}, {@link org.apache.lucene.index.DocValuesType#SORTED SORTED}, and {@link
2727
* org.apache.lucene.index.DocValuesType#SORTED_SET SORTED_SET} doc values, and for stored/indexed
2828
* binary or text fields. Values fed to points are passed through unchanged, so callers are
2929
* responsible for producing sort-encoded bytes of the correct total length.

lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ class PointValuesWriter {
5858
+ (PointValues.MAX_NUM_BYTES * BKDConfig.MAX_DIMS);
5959
}
6060

61+
/** Minimum number of values to process per chunk in the dense N-D bulk path. */
62+
private static final int MIN_VALUES_PER_CHUNK = 64;
63+
64+
private byte[] densePointsBuffer;
65+
6166
PointValuesWriter(Counter bytesUsed, FieldInfo fieldInfo, SharedIndexingScratch sharedScratch) {
6267
this.fieldInfo = fieldInfo;
6368
this.iwBytesUsed = bytesUsed;
@@ -137,15 +142,19 @@ void addDense1DLongValues(int firstDocID, LongValuesCursor cursor) throws IOExce
137142
commitDenseRange(firstDocID, size, ramBefore);
138143
}
139144

145+
/**
146+
* Bulk-adds dense N-dimensional packed point values from a {@link BytesRefValuesCursor}. Each value
147+
* is a pre-encoded packed byte array of {@code packedBytesLength} bytes.
148+
*/
140149
void addDenseNDValues(int firstDocID, BytesRefValuesCursor cursor) throws IOException {
141150
final int size = cursor.size();
142151
if (size == 0) {
143152
return;
144153
}
145154
final long ramBefore = reserveDenseRange(firstDocID, size);
146155
final int width = packedBytesLength;
147-
final int perChunk = SharedIndexingScratch.BYTES_SCRATCH_SIZE / width;
148-
final byte[] buffer = sharedScratch.bytesScratch();
156+
final byte[] buffer = pointsDenseBuffer(width);
157+
final int perChunk = buffer.length / width;
149158
int remaining = size;
150159
while (remaining > 0) {
151160
int chunk = Math.min(perChunk, remaining);
@@ -156,6 +165,22 @@ void addDenseNDValues(int firstDocID, BytesRefValuesCursor cursor) throws IOExce
156165
commitDenseRange(firstDocID, size, ramBefore);
157166
}
158167

168+
/**
169+
* Returns a dense buffer sized to fit at least {@link SharedIndexingScratch#BYTES_SCRATCH_SIZE}
170+
* bytes, or a larger buffer if {@code packedLength} requires it to hold at least {@code
171+
* MIN_VALUES_PER_CHUNK} values per chunk.
172+
*/
173+
private byte[] pointsDenseBuffer(int packedLength) {
174+
final int minBytes = packedLength * MIN_VALUES_PER_CHUNK;
175+
if (densePointsBuffer == null) {
176+
densePointsBuffer =
177+
new byte[Math.max(SharedIndexingScratch.BYTES_SCRATCH_SIZE, minBytes)];
178+
} else if (densePointsBuffer.length < minBytes) {
179+
densePointsBuffer = new byte[minBytes];
180+
}
181+
return densePointsBuffer;
182+
}
183+
159184
private void validate1DPacked(int byteWidth) {
160185
if (fieldInfo.getPointDimensionCount() != 1 || fieldInfo.getPointNumBytes() != byteWidth) {
161186
throw new IllegalArgumentException(

0 commit comments

Comments
 (0)