Skip to content

Commit 784cedd

Browse files
committed
add dense bulk path for multi-dimensional point fields via BinaryColumn
1 parent 3a63f5e commit 784cedd

4 files changed

Lines changed: 799 additions & 7 deletions

File tree

lucene/CHANGES.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ Other
284284

285285
API Changes
286286
---------------------
287-
(No changes)
287+
* GITHUB#16209: Add dense bulk path for multi-dimensional point fields via BinaryColumn. (Prithvi S)
288288

289289
New Features
290290
---------------------
@@ -346,7 +346,6 @@ API Changes
346346

347347
* GITHUB#16224 Add TokenStreamColumn for experimental columnar batch indexing. (Tim Brooks)
348348

349-
350349
New Features
351350
---------------------
352351

lucene/core/src/java/org/apache/lucene/document/column/BinaryColumn.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
import org.apache.lucene.util.BytesRef;
2222

2323
/**
24-
* A {@link Column} that provides variable-size binary values via a tuple cursor. Used for {@link
25-
* org.apache.lucene.index.DocValuesType#BINARY BINARY}, {@link
26-
* org.apache.lucene.index.DocValuesType#SORTED SORTED}, and {@link
24+
* A {@link Column} that provides variable-size binary values via a tuple cursor, and dense values
25+
* via a {@link BytesRefValuesCursor}. Used for {@link org.apache.lucene.index.DocValuesType#BINARY
26+
* BINARY}, {@link org.apache.lucene.index.DocValuesType#SORTED SORTED}, and {@link
2727
* org.apache.lucene.index.DocValuesType#SORTED_SET SORTED_SET} doc values, and for stored/indexed
2828
* binary or text fields. Values fed to points are passed through unchanged, so callers are
2929
* responsible for producing sort-encoded bytes of the correct total length.

lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ class PointValuesWriter {
5858
+ (PointValues.MAX_NUM_BYTES * BKDConfig.MAX_DIMS);
5959
}
6060

61+
/** Minimum number of values to process per chunk in the dense N-D bulk path. */
62+
private static final int MIN_VALUES_PER_CHUNK = 64;
63+
64+
private byte[] densePointsBuffer;
65+
6166
PointValuesWriter(Counter bytesUsed, FieldInfo fieldInfo, SharedIndexingScratch sharedScratch) {
6267
this.fieldInfo = fieldInfo;
6368
this.iwBytesUsed = bytesUsed;
@@ -137,15 +142,30 @@ void addDense1DLongValues(int firstDocID, LongValuesCursor cursor) throws IOExce
137142
commitDenseRange(firstDocID, size, ramBefore);
138143
}
139144

145+
/**
146+
* Bulk-adds dense N-dimensional packed point values from a {@link BytesRefValuesCursor}. Each value
147+
* is a pre-encoded packed byte array of {@code packedBytesLength} bytes.
148+
*/
140149
void addDenseNDValues(int firstDocID, BytesRefValuesCursor cursor) throws IOException {
141150
final int size = cursor.size();
142151
if (size == 0) {
143152
return;
144153
}
145154
final long ramBefore = reserveDenseRange(firstDocID, size);
146155
final int width = packedBytesLength;
147-
final int perChunk = SharedIndexingScratch.BYTES_SCRATCH_SIZE / width;
148-
final byte[] buffer = sharedScratch.bytesScratch();
156+
final byte[] buffer;
157+
final int perChunk;
158+
if (width * MIN_VALUES_PER_CHUNK <= SharedIndexingScratch.BYTES_SCRATCH_SIZE) {
159+
// Common case (packed <= ~64 bytes): reuse the shared 4 KiB scratch which already gives
160+
// at least MIN_VALUES_PER_CHUNK values per chunk. Matches the 1D dense path behavior.
161+
buffer = sharedScratch.bytesScratch();
162+
perChunk = SharedIndexingScratch.BYTES_SCRATCH_SIZE / width;
163+
} else {
164+
// Wide points: shared 4 KiB would yield < MIN_VALUES_PER_CHUNK; allocate a dedicated
165+
// larger buffer (sized for at least 64 values) and keep it for the lifetime of this writer.
166+
buffer = pointsDenseBuffer(width);
167+
perChunk = buffer.length / width;
168+
}
149169
int remaining = size;
150170
while (remaining > 0) {
151171
int chunk = Math.min(perChunk, remaining);
@@ -156,6 +176,26 @@ void addDenseNDValues(int firstDocID, BytesRefValuesCursor cursor) throws IOExce
156176
commitDenseRange(firstDocID, size, ramBefore);
157177
}
158178

179+
/**
180+
* Returns (and caches) a dedicated buffer for wide packed point values, sized to hold at least
181+
* {@code MIN_VALUES_PER_CHUNK} values. Only called when the shared scratch would result in fewer
182+
* than {@link #MIN_VALUES_PER_CHUNK} values per chunk (i.e. packed length > ~64 bytes).
183+
*
184+
* <p>The allocated size is charged to {@code iwBytesUsed}.
185+
*/
186+
private byte[] pointsDenseBuffer(int packedLength) {
187+
final int minBytes = packedLength * MIN_VALUES_PER_CHUNK;
188+
if (densePointsBuffer == null) {
189+
densePointsBuffer = new byte[minBytes];
190+
iwBytesUsed.addAndGet(minBytes);
191+
} else if (densePointsBuffer.length < minBytes) {
192+
final int old = densePointsBuffer.length;
193+
densePointsBuffer = new byte[minBytes];
194+
iwBytesUsed.addAndGet(minBytes - (long) old);
195+
}
196+
return densePointsBuffer;
197+
}
198+
159199
private void validate1DPacked(int byteWidth) {
160200
if (fieldInfo.getPointDimensionCount() != 1 || fieldInfo.getPointNumBytes() != byteWidth) {
161201
throw new IllegalArgumentException(

0 commit comments

Comments
 (0)