Skip to content

Commit 512d5b4

Browse files
committed
Fix HNSW InfoStream duplicate times and add per-chunk completion logging (#15967)
Signed-off-by: prithvi <prithvisivasankar@gmail.com>
1 parent db3222d commit 512d5b4

3 files changed

Lines changed: 72 additions & 16 deletions

File tree

lucene/CHANGES.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,10 @@ Optimizations
148148

149149
Bug Fixes
150150
---------------------
151+
* GITHUB#15967: Fix HNSW InfoStream progress to show elapsed time since merge start instead of
152+
duplicate incremental/total times. Add per-chunk completion logging with sub-millisecond
153+
precision to aid merge concurrency debugging. (Prithvi S)
154+
151155
* GITHUB#14049: Randomize KNN codec params in RandomCodec. Fixes scalar quantization div-by-zero
152156
when all values are identical. (Mike Sokolov)
153157

lucene/core/src/java/org/apache/lucene/util/hnsw/HnswConcurrentMergeBuilder.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@
2222
import java.io.IOException;
2323
import java.util.ArrayList;
2424
import java.util.List;
25+
import java.util.Locale;
2526
import java.util.concurrent.Callable;
2627
import java.util.concurrent.atomic.AtomicInteger;
28+
import java.util.concurrent.atomic.AtomicLong;
2729
import java.util.concurrent.locks.Lock;
2830
import org.apache.lucene.internal.hppc.IntHashSet;
2931
import org.apache.lucene.search.TaskExecutor;
@@ -76,11 +78,17 @@ public OnHeapHnswGraph build(int maxOrd) throws IOException {
7678
if (frozen) {
7779
throw new IllegalStateException("graph has already been built");
7880
}
81+
long mergeStartTimeNS = System.nanoTime();
7982
if (infoStream.isEnabled(HNSW_COMPONENT)) {
8083
infoStream.message(
8184
HNSW_COMPONENT,
8285
"build graph from " + maxOrd + " vectors, with " + workers.length + " workers");
8386
}
87+
AtomicLong cumulativeWorkTimeNS = new AtomicLong();
88+
for (ConcurrentMergeWorker worker : workers) {
89+
worker.setMergeStartTimeNS(mergeStartTimeNS);
90+
worker.setCumulativeWorkTimeNS(cumulativeWorkTimeNS);
91+
}
8492
List<Callable<Void>> futures = new ArrayList<>();
8593
for (int i = 0; i < workers.length; i++) {
8694
int finalI = i;
@@ -91,6 +99,20 @@ public OnHeapHnswGraph build(int maxOrd) throws IOException {
9199
});
92100
}
93101
taskExecutor.invokeAll(futures);
102+
if (infoStream.isEnabled(HNSW_COMPONENT)) {
103+
double wallClockMs = (System.nanoTime() - mergeStartTimeNS) / 1_000_000.0;
104+
double totalWorkerMs = cumulativeWorkTimeNS.get() / 1_000_000.0;
105+
double effectiveConcurrency = wallClockMs > 0 ? totalWorkerMs / wallClockMs : 0;
106+
infoStream.message(
107+
HNSW_COMPONENT,
108+
String.format(
109+
Locale.ROOT,
110+
"merge completed: %d vectors, %.2f ms wall clock, %.2f ms cumulative worker time, %.2fx effective concurrency",
111+
maxOrd,
112+
wallClockMs,
113+
totalWorkerMs,
114+
effectiveConcurrency));
115+
}
94116
return getCompletedGraph();
95117
}
96118

lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import java.util.Locale;
2727
import java.util.Objects;
2828
import java.util.SplittableRandom;
29-
import java.util.concurrent.TimeUnit;
29+
import java.util.concurrent.atomic.AtomicLong;
3030
import java.util.concurrent.locks.Lock;
3131
import org.apache.lucene.internal.hppc.IntHashSet;
3232
import org.apache.lucene.search.KnnCollector;
@@ -85,6 +85,29 @@ public class HnswGraphBuilder implements HnswBuilder {
8585
protected InfoStream infoStream = InfoStream.getDefault();
8686
protected boolean frozen;
8787

88+
/**
89+
* Merge-level start time in nanoseconds. When set, the periodic progress prints (every 10K
90+
* vectors) show elapsed time since the overall merge began rather than since the current chunk
91+
* began. A value of -1 means not set (non-concurrent path).
92+
*/
93+
private long mergeStartTimeNS = -1;
94+
95+
/**
96+
* Shared accumulator for total worker time across all concurrent merge workers. Each chunk's
97+
* elapsed time is added here so that effective concurrency can be computed at merge end.
98+
*/
99+
private AtomicLong cumulativeWorkTimeNS;
100+
101+
/** Set the merge-level start time so progress prints show time since merge began. */
102+
void setMergeStartTimeNS(long mergeStartTimeNS) {
103+
this.mergeStartTimeNS = mergeStartTimeNS;
104+
}
105+
106+
/** Set the shared accumulator for tracking cumulative worker time across concurrent chunks. */
107+
void setCumulativeWorkTimeNS(AtomicLong cumulativeWorkTimeNS) {
108+
this.cumulativeWorkTimeNS = cumulativeWorkTimeNS;
109+
}
110+
88111
public static HnswGraphBuilder create(
89112
RandomVectorScorerSupplier scorerSupplier, int M, int beamWidth, long seed)
90113
throws IOException {
@@ -204,16 +227,30 @@ protected void addVectors(int minOrd, int maxOrd) throws IOException {
204227
if (frozen) {
205228
throw new IllegalStateException("This HnswGraphBuilder is frozen and cannot be updated");
206229
}
207-
long start = System.nanoTime(), t = start;
208-
if (infoStream.isEnabled(HNSW_COMPONENT)) {
209-
infoStream.message(HNSW_COMPONENT, "addVectors [" + minOrd + " " + maxOrd + ")");
210-
}
230+
long start = System.nanoTime();
231+
long progressStart = mergeStartTimeNS != -1 ? mergeStartTimeNS : start;
211232
for (int node = minOrd; node < maxOrd; node++) {
212233
addGraphNode(node);
213234
if ((node % 10000 == 0) && infoStream.isEnabled(HNSW_COMPONENT)) {
214-
t = printGraphBuildStatus(node, start, t);
235+
printGraphBuildStatus(node, progressStart);
215236
}
216237
}
238+
long chunkElapsedNS = System.nanoTime() - start;
239+
if (cumulativeWorkTimeNS != null) {
240+
cumulativeWorkTimeNS.addAndGet(chunkElapsedNS);
241+
}
242+
if (infoStream.isEnabled(HNSW_COMPONENT)) {
243+
double elapsedMs = chunkElapsedNS / 1_000_000.0;
244+
infoStream.message(
245+
HNSW_COMPONENT,
246+
String.format(
247+
Locale.ROOT,
248+
"addVectors [%d %d): %d vectors in %.2f ms",
249+
minOrd,
250+
maxOrd,
251+
maxOrd - minOrd,
252+
elapsedMs));
253+
}
217254
}
218255

219256
private void addVectors(int maxOrd) throws IOException {
@@ -339,17 +376,10 @@ public void addGraphNode(int node, IntHashSet eps0) throws IOException {
339376
addGraphNodeInternal(node, scorer, eps0);
340377
}
341378

342-
private long printGraphBuildStatus(int node, long start, long t) {
343-
long now = System.nanoTime();
379+
private void printGraphBuildStatus(int node, long start) {
380+
double elapsedMs = (System.nanoTime() - start) / 1_000_000.0;
344381
infoStream.message(
345-
HNSW_COMPONENT,
346-
String.format(
347-
Locale.ROOT,
348-
"built %d in %d/%d ms",
349-
node,
350-
TimeUnit.NANOSECONDS.toMillis(now - t),
351-
TimeUnit.NANOSECONDS.toMillis(now - start)));
352-
return now;
382+
HNSW_COMPONENT, String.format(Locale.ROOT, "built %d in %.2f ms", node, elapsedMs));
353383
}
354384

355385
void addDiverseNeighbors(

0 commit comments

Comments
 (0)