Skip to content

Commit 103e627

Browse files
committed
Tweaks: byte tests, JMH/benchmark polish, query javadoc
1 parent 522b26c commit 103e627

5 files changed

Lines changed: 225 additions & 64 deletions

File tree

lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenFloatKnnJoinBenchmark.java

Lines changed: 21 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.lucene.search.join.BitSetProducer;
3838
import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
3939
import org.apache.lucene.search.join.QueryBitSetProducer;
40+
import org.apache.lucene.search.knn.KnnSearchStrategy;
4041
import org.apache.lucene.store.ByteBuffersDirectory;
4142
import org.apache.lucene.store.Directory;
4243
import org.apache.lucene.util.VectorUtil;
@@ -60,39 +61,38 @@
6061
* index (children + parent marker per block), using the default HNSW approximate path ({@code
6162
* childFilter == null}).
6263
*
63-
* <p>The {@code blockRescore} parameter switches the feature on/off so both modes can be compared
64+
* <p>The {@code rescoreBlocks} parameter switches the feature on/off so both modes can be compared
6465
* in a single run (see <a href="https://github.com/apache/lucene/issues/15839">LUCENE-15839</a>).
6566
* Extra work scales roughly with {@code topK * childrenPerParent}.
6667
*
67-
* <p>Indicative results on Apple M-series, JDK 25, dim=96, topK=64, 4096 parent blocks (lower is
68-
* better):
68+
* <p>Indicative results — 3 forks, 5 warmup / 10 measurement iterations, JDK 25, {@code -Xmx2g},
69+
* dim=96, topK=64, 4096 parent blocks (lower is better):
6970
*
7071
* <pre>
71-
* blockRescore childrenPerParent Score (ms/op)
72-
* false 8 0.123
73-
* false 32 0.226
74-
* false 64 0.254
75-
* true 8 0.151 (+23%)
76-
* true 32 0.316 (+40%)
77-
* true 64 0.412 (+62%)
72+
* childrenPerParent rescoreBlocks=false rescoreBlocks=true
73+
* 8 0.117 ± 0.002 ms/op 0.149 ± 0.002 ms/op (+27%)
74+
* 32 0.237 ± 0.006 ms/op 0.326 ± 0.009 ms/op (+38%)
75+
* 64 0.259 ± 0.005 ms/op 0.426 ± 0.013 ms/op (+64%)
7876
* </pre>
7977
*
78+
* <p>Overhead grows with block width (and with {@code topK}).
79+
*
8080
* <p>Example:
8181
*
8282
* <pre>{@code
8383
* ./gradlew :lucene:benchmark-jmh:assemble
8484
* cd lucene/benchmark-jmh/build/benchmarks
8585
* java -jar lucene-benchmark-jmh-*-SNAPSHOT.jar DiversifyingChildrenFloatKnnJoin \\
86-
* -f 2 -wi 3 -i 8 -tu ms
86+
* -f 3 -wi 5 -i 10 -tu ms
8787
* }</pre>
8888
*/
8989
@BenchmarkMode(Mode.AverageTime)
9090
@OutputTimeUnit(TimeUnit.MILLISECONDS)
9191
@State(Scope.Benchmark)
92-
@Warmup(iterations = 3, time = 1)
93-
@Measurement(iterations = 6, time = 1)
92+
@Warmup(iterations = 5, time = 1)
93+
@Measurement(iterations = 10, time = 1)
9494
@Fork(
95-
value = 2,
95+
value = 3,
9696
jvmArgsAppend = {
9797
"-Xmx2g",
9898
"-Xms2g",
@@ -117,12 +117,12 @@ public class DiversifyingChildrenFloatKnnJoinBenchmark {
117117

118118
/**
119119
* Whether to enable post-HNSW block rescoring. When {@code true}, after HNSW search all children
120-
* in each found parent's block are scored to guarantee the best child is returned. Compare
121-
* {@code false} (baseline / no rescoring) against {@code true} (rescoring enabled) to measure
122-
* latency overhead.
120+
* in each found parent's block are scored to guarantee the best child is returned. Compare {@code
121+
* false} (baseline / no rescoring) against {@code true} (rescoring enabled) to measure latency
122+
* overhead.
123123
*/
124124
@Param({"false", "true"})
125-
public boolean blockRescore;
125+
public boolean rescoreBlocks;
126126

127127
private Directory directory;
128128
private IndexSearcher searcher;
@@ -176,21 +176,18 @@ public void setupTrial() throws IOException {
176176
searcher = new IndexSearcher(reader);
177177
BitSetProducer parentsFilter =
178178
new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
179+
// [1, 0, ..., 0] is already L2-normalized.
179180
float[] queryVector = new float[dimension];
180181
queryVector[0] = 1f;
181-
for (int i = 1; i < dimension; i++) {
182-
queryVector[i] = 0f;
183-
}
184-
VectorUtil.l2normalize(queryVector, false);
185182
diversifyingJoinQuery =
186183
new DiversifyingChildrenFloatKnnVectorQuery(
187184
"vec",
188185
queryVector,
189186
null,
190187
topK,
191188
parentsFilter,
192-
org.apache.lucene.search.knn.KnnSearchStrategy.Hnsw.DEFAULT,
193-
blockRescore);
189+
KnnSearchStrategy.Hnsw.DEFAULT,
190+
rescoreBlocks);
194191
}
195192

196193
@TearDown(Level.Trial)

lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenByteKnnVectorQuery.java

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ public class DiversifyingChildrenByteKnnVectorQuery extends KnnByteVectorQuery {
6060
private final Query childFilter;
6161
private final int k;
6262
private final byte[] query;
63-
private final boolean blockRescore;
63+
private final boolean rescoreBlocks;
6464

6565
/**
6666
* Create a ToParentBlockJoinByteVectorQuery.
@@ -102,18 +102,23 @@ public DiversifyingChildrenByteKnnVectorQuery(
102102
/**
103103
* Create a DiversifyingChildrenByteKnnVectorQuery with optional post-HNSW block rescoring.
104104
*
105-
* <p>When {@code blockRescore} is {@code true}, after the approximate HNSW search completes, all
106-
* children in each found parent's block are scored to guarantee the truly best child is returned.
107-
* See {@link DiversifyingChildrenFloatKnnVectorQuery#DiversifyingChildrenFloatKnnVectorQuery(
108-
* String, float[], Query, int, BitSetProducer, KnnSearchStrategy, boolean)} for details.
105+
* <p>When {@code rescoreBlocks} is {@code true}, after the approximate HNSW search completes, all
106+
* children in each found parent's block are scored to guarantee the truly best child is returned
107+
* — not merely the sibling the graph traversal happened to reach first. This adds O(k &times;
108+
* childrenPerParent) extra scoring work; enable it when block sizes are small or result quality
109+
* is more important than latency.
110+
*
111+
* <p>This applies only to the approximate (HNSW) search path. When the index is small enough that
112+
* Lucene falls back to exact search, all children are already scored exhaustively and no
113+
* additional rescoring is performed.
109114
*
110115
* @param field the query field
111116
* @param query the vector query
112117
* @param childFilter the child filter
113118
* @param k how many parent documents to return given the matching children
114119
* @param parentsFilter Filter identifying the parent documents.
115120
* @param searchStrategy the search strategy to use.
116-
* @param blockRescore if {@code true}, enables post-HNSW block rescoring.
121+
* @param rescoreBlocks if {@code true}, enables post-HNSW block rescoring.
117122
* @lucene.experimental
118123
*/
119124
public DiversifyingChildrenByteKnnVectorQuery(
@@ -123,13 +128,13 @@ public DiversifyingChildrenByteKnnVectorQuery(
123128
int k,
124129
BitSetProducer parentsFilter,
125130
KnnSearchStrategy searchStrategy,
126-
boolean blockRescore) {
131+
boolean rescoreBlocks) {
127132
super(field, query, k, childFilter, searchStrategy);
128133
this.childFilter = childFilter;
129134
this.parentsFilter = parentsFilter;
130135
this.k = k;
131136
this.query = query;
132-
this.blockRescore = blockRescore;
137+
this.rescoreBlocks = rescoreBlocks;
133138
}
134139

135140
@Override
@@ -204,7 +209,7 @@ protected TopDocs approximateSearch(
204209
}
205210
context.reader().searchNearestVectors(field, query, collector, acceptDocs);
206211
TopDocs results = collector.topDocs();
207-
if (!blockRescore || results.scoreDocs.length == 0) {
212+
if (!rescoreBlocks || results.scoreDocs.length == 0) {
208213
return results;
209214
}
210215
BitSet parentBitSet = parentsFilter.getBitSet(context);
@@ -243,15 +248,15 @@ public boolean equals(Object o) {
243248
if (!super.equals(o)) return false;
244249
DiversifyingChildrenByteKnnVectorQuery that = (DiversifyingChildrenByteKnnVectorQuery) o;
245250
return k == that.k
246-
&& blockRescore == that.blockRescore
251+
&& rescoreBlocks == that.rescoreBlocks
247252
&& Objects.equals(parentsFilter, that.parentsFilter)
248253
&& Objects.equals(childFilter, that.childFilter)
249254
&& Arrays.equals(query, that.query);
250255
}
251256

252257
@Override
253258
public int hashCode() {
254-
int result = Objects.hash(super.hashCode(), parentsFilter, childFilter, k, blockRescore);
259+
int result = Objects.hash(super.hashCode(), parentsFilter, childFilter, k, rescoreBlocks);
255260
result = 31 * result + Arrays.hashCode(query);
256261
return result;
257262
}

lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenFloatKnnVectorQuery.java

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
import java.io.IOException;
2222
import java.util.Arrays;
23-
import java.util.Comparator;
2423
import java.util.Objects;
2524
import org.apache.lucene.index.FloatVectorValues;
2625
import org.apache.lucene.index.LeafReaderContext;
@@ -62,7 +61,7 @@ public class DiversifyingChildrenFloatKnnVectorQuery extends KnnFloatVectorQuery
6261
private final Query childFilter;
6362
private final int k;
6463
private final float[] query;
65-
private final boolean blockRescore;
64+
private final boolean rescoreBlocks;
6665

6766
/**
6867
* Create a DiversifyingChildrenFloatKnnVectorQuery.
@@ -104,19 +103,23 @@ public DiversifyingChildrenFloatKnnVectorQuery(
104103
/**
105104
* Create a DiversifyingChildrenFloatKnnVectorQuery with optional post-HNSW block rescoring.
106105
*
107-
* <p>When {@code blockRescore} is {@code true}, after the approximate HNSW search completes, all
108-
* children in each found parent's block are scored to guarantee the truly best child is returned —
109-
* not merely the sibling the graph traversal happened to reach first. This adds O(k &times;
110-
* childrenPerParent) extra scoring work; enable it when block sizes are small or result quality is
111-
* more important than latency.
106+
* <p>When {@code rescoreBlocks} is {@code true}, after the approximate HNSW search completes, all
107+
* children in each found parent's block are scored to guarantee the truly best child is returned
108+
* — not merely the sibling the graph traversal happened to reach first. This adds O(k &times;
109+
* childrenPerParent) extra scoring work; enable it when block sizes are small or result quality
110+
* is more important than latency.
111+
*
112+
* <p>This applies only to the approximate (HNSW) search path. When the index is small enough that
113+
* Lucene falls back to exact search, all children are already scored exhaustively and no
114+
* additional rescoring is performed.
112115
*
113116
* @param field the query field
114117
* @param query the vector query
115118
* @param childFilter the child filter
116119
* @param k how many parent documents to return given the matching children
117120
* @param parentsFilter Filter identifying the parent documents.
118121
* @param searchStrategy the search strategy to use.
119-
* @param blockRescore if {@code true}, enables post-HNSW block rescoring.
122+
* @param rescoreBlocks if {@code true}, enables post-HNSW block rescoring.
120123
* @lucene.experimental
121124
*/
122125
public DiversifyingChildrenFloatKnnVectorQuery(
@@ -126,13 +129,13 @@ public DiversifyingChildrenFloatKnnVectorQuery(
126129
int k,
127130
BitSetProducer parentsFilter,
128131
KnnSearchStrategy searchStrategy,
129-
boolean blockRescore) {
132+
boolean rescoreBlocks) {
130133
super(field, query, k, childFilter, searchStrategy);
131134
this.childFilter = childFilter;
132135
this.parentsFilter = parentsFilter;
133136
this.k = k;
134137
this.query = query;
135-
this.blockRescore = blockRescore;
138+
this.rescoreBlocks = rescoreBlocks;
136139
}
137140

138141
@Override
@@ -206,7 +209,7 @@ protected TopDocs approximateSearch(
206209
}
207210
context.reader().searchNearestVectors(field, query, collector, acceptDocs);
208211
TopDocs results = collector.topDocs();
209-
if (!blockRescore || results.scoreDocs.length == 0) {
212+
if (!rescoreBlocks || results.scoreDocs.length == 0) {
210213
return results;
211214
}
212215
BitSet parentBitSet = parentsFilter.getBitSet(context);
@@ -226,13 +229,10 @@ protected TopDocs approximateSearch(
226229

227230
/**
228231
* For each parent already found by approximate search, scores all children in that parent's block
229-
* to ensure the truly best child is returned — not merely the sibling the graph traversal happened
230-
* to reach first. Children are processed in ascending docId order so the sequential {@link
231-
* VectorScorer} only advances forward. Extra nodes scored are added to {@link
232+
* to ensure the truly best child is returned — not merely the sibling the graph traversal
233+
* happened to reach first. Children are processed in ascending docId order so the sequential
234+
* {@link VectorScorer} only advances forward. Extra nodes scored are added to {@link
232235
* TotalHits#value()}.
233-
*
234-
* <p>This method is package-private so that {@link DiversifyingChildrenByteKnnVectorQuery} can
235-
* reuse the same implementation rather than duplicating it.
236236
*/
237237
static TopDocs blockRescore(
238238
TopDocs results, AcceptDocs acceptDocs, BitSet parentBitSet, VectorScorer scorer)
@@ -243,7 +243,7 @@ static TopDocs blockRescore(
243243
// Sort by docId so parent blocks are visited in ascending order — the forward-only
244244
// VectorScorer cannot go backwards.
245245
ScoreDoc[] scoreDocs = results.scoreDocs.clone();
246-
Arrays.sort(scoreDocs, Comparator.comparingInt(sd -> sd.doc));
246+
Arrays.sort(scoreDocs, (a, b) -> Integer.compare(a.doc, b.doc));
247247

248248
long extraVisited = 0;
249249
for (ScoreDoc scoreDoc : scoreDocs) {
@@ -256,10 +256,11 @@ static TopDocs blockRescore(
256256
continue;
257257
}
258258
if (scorerIter.advance(child) == child) {
259-
// Don't double-count the child HNSW already visited.
260-
if (child != hnswBestChild) {
261-
extraVisited++;
259+
if (child == hnswBestChild) {
260+
// Advance past the child HNSW already scored; no need to re-compute.
261+
continue;
262262
}
263+
extraVisited++;
263264
float s = scorer.score();
264265
if (s > scoreDoc.score) {
265266
scoreDoc.score = s;
@@ -293,15 +294,15 @@ public boolean equals(Object o) {
293294
if (!super.equals(o)) return false;
294295
DiversifyingChildrenFloatKnnVectorQuery that = (DiversifyingChildrenFloatKnnVectorQuery) o;
295296
return k == that.k
296-
&& blockRescore == that.blockRescore
297+
&& rescoreBlocks == that.rescoreBlocks
297298
&& Objects.equals(parentsFilter, that.parentsFilter)
298299
&& Objects.equals(childFilter, that.childFilter)
299300
&& Arrays.equals(query, that.query);
300301
}
301302

302303
@Override
303304
public int hashCode() {
304-
int result = Objects.hash(super.hashCode(), parentsFilter, childFilter, k, blockRescore);
305+
int result = Objects.hash(super.hashCode(), parentsFilter, childFilter, k, rescoreBlocks);
305306
result = 31 * result + Arrays.hashCode(query);
306307
return result;
307308
}

0 commit comments

Comments
 (0)