Join post-HNSW block rescore for diversifying child KNN (POC)

iprithv · iprithv · commit 522b26cc53e9 · 2026-05-11T01:49:45.000+05:30
Optional blockRescore on DiversifyingChildren float/byte KNN; shared blockRescore() with visited accounting; tests; JMH benchmark; CHANGES (Improvements, GITHUB#15839). Relates to #15839
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -108,6 +108,11 @@ New Features
 
 Improvements
 ---------------------
+* GITHUB#15839: DiversifyingChildren KNN queries now support optional post-HNSW block rescoring:
+  when enabled, all children in each found parent's block are scored after approximate search,
+  guaranteeing the best child per parent is returned and correctly tracking extra visited nodes.
+  (Prithvi S)
+
 * GITHUB#15704: Replace LinkedList with more efficient data structure. (Renato Haeberli)
 
 * GITHUB#15682: Use ArrayDeque instead of LinkedList in CompoundWordTokenFilterBase.java. (Renato Haeberli)
diff --git a/lucene/benchmark-jmh/build.gradle b/lucene/benchmark-jmh/build.gradle
@@ -20,6 +20,7 @@ description = 'Lucene JMH micro-benchmarking module'
 dependencies {
   moduleImplementation project(':lucene:core')
   moduleImplementation project(':lucene:expressions')
+  moduleImplementation project(':lucene:join')
   moduleImplementation project(':lucene:sandbox')
   moduleTestImplementation project(':lucene:test-framework')
 
diff --git a/lucene/benchmark-jmh/src/java/module-info.java b/lucene/benchmark-jmh/src/java/module-info.java
@@ -24,6 +24,7 @@
   requires jdk.unsupported;
   requires org.apache.lucene.core;
   requires org.apache.lucene.expressions;
+  requires org.apache.lucene.join;
   requires org.apache.lucene.sandbox;
   requires commons.math3;
 
diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenFloatKnnJoinBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenFloatKnnJoinBenchmark.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.KnnFloatVectorField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.join.BitSetProducer;
+import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
+import org.apache.lucene.search.join.QueryBitSetProducer;
+import org.apache.lucene.store.ByteBuffersDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.VectorUtil;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * End-to-end {@link DiversifyingChildrenFloatKnnVectorQuery} search on a single-segment block-join
+ * index (children + parent marker per block), using the default HNSW approximate path ({@code
+ * childFilter == null}).
+ *
+ * <p>The {@code blockRescore} parameter switches the feature on/off so both modes can be compared
+ * in a single run (see <a href="https://github.com/apache/lucene/issues/15839">LUCENE-15839</a>).
+ * Extra work scales roughly with {@code topK * childrenPerParent}.
+ *
+ * <p>Indicative results on Apple M-series, JDK 25, dim=96, topK=64, 4096 parent blocks (lower is
+ * better):
+ *
+ * <pre>
+ * blockRescore  childrenPerParent  Score (ms/op)
+ * false         8                  0.123
+ * false         32                 0.226
+ * false         64                 0.254
+ * true          8                  0.151  (+23%)
+ * true          32                 0.316  (+40%)
+ * true          64                 0.412  (+62%)
+ * </pre>
+ *
+ * <p>Example:
+ *
+ * <pre>{@code
+ * ./gradlew :lucene:benchmark-jmh:assemble
+ * cd lucene/benchmark-jmh/build/benchmarks
+ * java -jar lucene-benchmark-jmh-*-SNAPSHOT.jar DiversifyingChildrenFloatKnnJoin \\
+ *     -f 2 -wi 3 -i 8 -tu ms
+ * }</pre>
+ */
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 3, time = 1)
+@Measurement(iterations = 6, time = 1)
+@Fork(
+    value = 2,
+    jvmArgsAppend = {
+      "-Xmx2g",
+      "-Xms2g",
+      "-XX:+AlwaysPreTouch",
+      "--add-modules=jdk.incubator.vector"
+    })
+public class DiversifyingChildrenFloatKnnJoinBenchmark {
+
+  /** Approximate neighbors per diversified parent bucket. */
+  @Param({"64"})
+  public int topK;
+
+  /**
+   * Children with vectors per parent block. Post-HNSW block rescoring iterates sibling children in
+   * each retained block, so incremental cost rises with this parameter.
+   */
+  @Param({"8", "32", "64"})
+  public int childrenPerParent;
+
+  @Param({"96"})
+  public int dimension;
+
+  /**
+   * Whether to enable post-HNSW block rescoring. When {@code true}, after HNSW search all children
+   * in each found parent's block are scored to guarantee the best child is returned. Compare
+   * {@code false} (baseline / no rescoring) against {@code true} (rescoring enabled) to measure
+   * latency overhead.
+   */
+  @Param({"false", "true"})
+  public boolean blockRescore;
+
+  private Directory directory;
+  private IndexSearcher searcher;
+  private Query diversifyingJoinQuery;
+
+  static Document parentDoc() {
+    Document d = new Document();
+    d.add(new StringField("docType", "_parent", Field.Store.NO));
+    return d;
+  }
+
+  /** Fixed corpus size for stable HNSW behavior; must be >= topK. */
+  private static final int NUM_PARENT_BLOCKS = 4096;
+
+  private static float[] randomUnitVector(Random random, int dim, float[] scratch) {
+    for (int i = 0; i < dim; i++) {
+      scratch[i] = random.nextFloat() * 2f - 1f;
+    }
+    return VectorUtil.l2normalize(scratch, false);
+  }
+
+  @Setup(Level.Trial)
+  public void setupTrial() throws IOException {
+    if (topK > NUM_PARENT_BLOCKS) {
+      throw new IllegalStateException("topK must be <= NUM_PARENT_BLOCKS");
+    }
+    directory = new ByteBuffersDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig();
+    long randomSeed = 0xC0FFEE42F00DL ^ ((long) childrenPerParent << 32) ^ dimension;
+    Random random = new Random(randomSeed);
+    float[] scratch = new float[dimension];
+    try (IndexWriter w = new IndexWriter(directory, iwc)) {
+      for (int p = 0; p < NUM_PARENT_BLOCKS; p++) {
+        List<Document> block = new ArrayList<>(childrenPerParent + 1);
+        for (int c = 0; c < childrenPerParent; c++) {
+          Document child = new Document();
+          child.add(
+              new KnnFloatVectorField(
+                  "vec",
+                  randomUnitVector(random, dimension, scratch),
+                  VectorSimilarityFunction.DOT_PRODUCT));
+          block.add(child);
+        }
+        block.add(parentDoc());
+        w.addDocuments(block);
+      }
+      w.forceMerge(1);
+    }
+
+    var reader = DirectoryReader.open(directory);
+    searcher = new IndexSearcher(reader);
+    BitSetProducer parentsFilter =
+        new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
+    float[] queryVector = new float[dimension];
+    queryVector[0] = 1f;
+    for (int i = 1; i < dimension; i++) {
+      queryVector[i] = 0f;
+    }
+    VectorUtil.l2normalize(queryVector, false);
+    diversifyingJoinQuery =
+        new DiversifyingChildrenFloatKnnVectorQuery(
+            "vec",
+            queryVector,
+            null,
+            topK,
+            parentsFilter,
+            org.apache.lucene.search.knn.KnnSearchStrategy.Hnsw.DEFAULT,
+            blockRescore);
+  }
+
+  @TearDown(Level.Trial)
+  public void tearDownTrial() throws IOException {
+    if (searcher != null) {
+      searcher.getIndexReader().close();
+    }
+    if (directory != null) {
+      directory.close();
+    }
+  }
+
+  @Benchmark
+  public void searchDiversifyingJoinHnsw(Blackhole bh) throws IOException {
+    TopDocs hits = searcher.search(diversifyingJoinQuery, topK);
+    bh.consume(hits.scoreDocs.length);
+    bh.consume(hits.totalHits.value());
+    if (hits.scoreDocs.length > 0) {
+      bh.consume(hits.scoreDocs[0].doc);
+      bh.consume(hits.scoreDocs[0].score);
+    }
+  }
+}
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenByteKnnVectorQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenByteKnnVectorQuery.java
@@ -60,6 +60,7 @@ public class DiversifyingChildrenByteKnnVectorQuery extends KnnByteVectorQuery {
   private final Query childFilter;
   private final int k;
   private final byte[] query;
+  private final boolean blockRescore;
 
   /**
    * Create a ToParentBlockJoinByteVectorQuery.
@@ -72,7 +73,7 @@ public class DiversifyingChildrenByteKnnVectorQuery extends KnnByteVectorQuery {
    */
   public DiversifyingChildrenByteKnnVectorQuery(
       String field, byte[] query, Query childFilter, int k, BitSetProducer parentsFilter) {
-    this(field, query, childFilter, k, parentsFilter, DEFAULT);
+    this(field, query, childFilter, k, parentsFilter, DEFAULT, false);
   }
 
   /**
@@ -95,11 +96,40 @@ public DiversifyingChildrenByteKnnVectorQuery(
       int k,
       BitSetProducer parentsFilter,
       KnnSearchStrategy searchStrategy) {
+    this(field, query, childFilter, k, parentsFilter, searchStrategy, false);
+  }
+
+  /**
+   * Create a DiversifyingChildrenByteKnnVectorQuery with optional post-HNSW block rescoring.
+   *
+   * <p>When {@code blockRescore} is {@code true}, after the approximate HNSW search completes, all
+   * children in each found parent's block are scored to guarantee the truly best child is returned.
+   * See {@link DiversifyingChildrenFloatKnnVectorQuery#DiversifyingChildrenFloatKnnVectorQuery(
+   * String, float[], Query, int, BitSetProducer, KnnSearchStrategy, boolean)} for details.
+   *
+   * @param field the query field
+   * @param query the vector query
+   * @param childFilter the child filter
+   * @param k how many parent documents to return given the matching children
+   * @param parentsFilter Filter identifying the parent documents.
+   * @param searchStrategy the search strategy to use.
+   * @param blockRescore if {@code true}, enables post-HNSW block rescoring.
+   * @lucene.experimental
+   */
+  public DiversifyingChildrenByteKnnVectorQuery(
+      String field,
+      byte[] query,
+      Query childFilter,
+      int k,
+      BitSetProducer parentsFilter,
+      KnnSearchStrategy searchStrategy,
+      boolean blockRescore) {
     super(field, query, k, childFilter, searchStrategy);
     this.childFilter = childFilter;
     this.parentsFilter = parentsFilter;
     this.k = k;
     this.query = query;
+    this.blockRescore = blockRescore;
   }
 
   @Override
@@ -173,7 +203,25 @@ protected TopDocs approximateSearch(
       return NO_RESULTS;
     }
     context.reader().searchNearestVectors(field, query, collector, acceptDocs);
-    return collector.topDocs();
+    TopDocs results = collector.topDocs();
+    if (!blockRescore || results.scoreDocs.length == 0) {
+      return results;
+    }
+    BitSet parentBitSet = parentsFilter.getBitSet(context);
+    if (parentBitSet == null) {
+      return results;
+    }
+    ByteVectorValues vectorValues = context.reader().getByteVectorValues(field);
+    if (vectorValues == null) {
+      return results;
+    }
+    VectorScorer scorer = vectorValues.scorer(query);
+    if (scorer == null) {
+      return results;
+    }
+    // Delegate to the shared static implementation in the float variant.
+    return DiversifyingChildrenFloatKnnVectorQuery.blockRescore(
+        results, acceptDocs, parentBitSet, scorer);
   }
 
   @Override
@@ -195,14 +243,15 @@ public boolean equals(Object o) {
     if (!super.equals(o)) return false;
     DiversifyingChildrenByteKnnVectorQuery that = (DiversifyingChildrenByteKnnVectorQuery) o;
     return k == that.k
+        && blockRescore == that.blockRescore
         && Objects.equals(parentsFilter, that.parentsFilter)
         && Objects.equals(childFilter, that.childFilter)
         && Arrays.equals(query, that.query);
   }
 
   @Override
   public int hashCode() {
-    int result = Objects.hash(super.hashCode(), parentsFilter, childFilter, k);
+    int result = Objects.hash(super.hashCode(), parentsFilter, childFilter, k, blockRescore);
     result = 31 * result + Arrays.hashCode(query);
     return result;
   }
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenFloatKnnVectorQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenFloatKnnVectorQuery.java
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestParentBlockJoinFloatKnnVectorQuery.java b/lucene/join/src/test/org/apache/lucene/search/join/TestParentBlockJoinFloatKnnVectorQuery.java