|
26 | 26 | import java.util.Locale; |
27 | 27 | import java.util.Objects; |
28 | 28 | import java.util.SplittableRandom; |
29 | | -import java.util.concurrent.TimeUnit; |
| 29 | +import java.util.concurrent.atomic.AtomicLong; |
30 | 30 | import java.util.concurrent.locks.Lock; |
31 | 31 | import org.apache.lucene.internal.hppc.IntHashSet; |
32 | 32 | import org.apache.lucene.search.KnnCollector; |
@@ -85,6 +85,29 @@ public class HnswGraphBuilder implements HnswBuilder { |
85 | 85 | protected InfoStream infoStream = InfoStream.getDefault(); |
86 | 86 | protected boolean frozen; |
87 | 87 |
|
| 88 | + /** |
| 89 | + * Merge-level start time in nanoseconds. When set, the periodic progress prints (every 10K |
| 90 | + * vectors) show elapsed time since the overall merge began rather than since the current chunk |
| 91 | + * began. A value of -1 means not set (non-concurrent path). |
| 92 | + */ |
| 93 | + private long mergeStartTimeNS = -1; |
| 94 | + |
| 95 | + /** |
| 96 | + * Shared accumulator for total worker time across all concurrent merge workers. Each chunk's |
| 97 | + * elapsed time is added here so that effective concurrency can be computed at merge end. |
| 98 | + */ |
| 99 | + private AtomicLong cumulativeWorkTimeNS; |
| 100 | + |
| 101 | + /** Set the merge-level start time so progress prints show time since merge began. */ |
| 102 | + void setMergeStartTimeNS(long mergeStartTimeNS) { |
| 103 | + this.mergeStartTimeNS = mergeStartTimeNS; |
| 104 | + } |
| 105 | + |
| 106 | + /** Set the shared accumulator for tracking cumulative worker time across concurrent chunks. */ |
| 107 | + void setCumulativeWorkTimeNS(AtomicLong cumulativeWorkTimeNS) { |
| 108 | + this.cumulativeWorkTimeNS = cumulativeWorkTimeNS; |
| 109 | + } |
| 110 | + |
88 | 111 | public static HnswGraphBuilder create( |
89 | 112 | RandomVectorScorerSupplier scorerSupplier, int M, int beamWidth, long seed) |
90 | 113 | throws IOException { |
@@ -204,16 +227,30 @@ protected void addVectors(int minOrd, int maxOrd) throws IOException { |
204 | 227 | if (frozen) { |
205 | 228 | throw new IllegalStateException("This HnswGraphBuilder is frozen and cannot be updated"); |
206 | 229 | } |
207 | | - long start = System.nanoTime(), t = start; |
208 | | - if (infoStream.isEnabled(HNSW_COMPONENT)) { |
209 | | - infoStream.message(HNSW_COMPONENT, "addVectors [" + minOrd + " " + maxOrd + ")"); |
210 | | - } |
| 230 | + long start = System.nanoTime(); |
| 231 | + long progressStart = mergeStartTimeNS != -1 ? mergeStartTimeNS : start; |
211 | 232 | for (int node = minOrd; node < maxOrd; node++) { |
212 | 233 | addGraphNode(node); |
213 | 234 | if ((node % 10000 == 0) && infoStream.isEnabled(HNSW_COMPONENT)) { |
214 | | - t = printGraphBuildStatus(node, start, t); |
| 235 | + printGraphBuildStatus(node, progressStart); |
215 | 236 | } |
216 | 237 | } |
| 238 | + long chunkElapsedNS = System.nanoTime() - start; |
| 239 | + if (cumulativeWorkTimeNS != null) { |
| 240 | + cumulativeWorkTimeNS.addAndGet(chunkElapsedNS); |
| 241 | + } |
| 242 | + if (infoStream.isEnabled(HNSW_COMPONENT)) { |
| 243 | + double elapsedMs = chunkElapsedNS / 1_000_000.0; |
| 244 | + infoStream.message( |
| 245 | + HNSW_COMPONENT, |
| 246 | + String.format( |
| 247 | + Locale.ROOT, |
| 248 | + "addVectors [%d %d): %d vectors in %.2f ms", |
| 249 | + minOrd, |
| 250 | + maxOrd, |
| 251 | + maxOrd - minOrd, |
| 252 | + elapsedMs)); |
| 253 | + } |
217 | 254 | } |
218 | 255 |
|
219 | 256 | private void addVectors(int maxOrd) throws IOException { |
@@ -339,17 +376,10 @@ public void addGraphNode(int node, IntHashSet eps0) throws IOException { |
339 | 376 | addGraphNodeInternal(node, scorer, eps0); |
340 | 377 | } |
341 | 378 |
|
342 | | - private long printGraphBuildStatus(int node, long start, long t) { |
343 | | - long now = System.nanoTime(); |
| 379 | + private void printGraphBuildStatus(int node, long start) { |
| 380 | + double elapsedMs = (System.nanoTime() - start) / 1_000_000.0; |
344 | 381 | infoStream.message( |
345 | | - HNSW_COMPONENT, |
346 | | - String.format( |
347 | | - Locale.ROOT, |
348 | | - "built %d in %d/%d ms", |
349 | | - node, |
350 | | - TimeUnit.NANOSECONDS.toMillis(now - t), |
351 | | - TimeUnit.NANOSECONDS.toMillis(now - start))); |
352 | | - return now; |
| 382 | + HNSW_COMPONENT, String.format(Locale.ROOT, "built %d in %.2f ms", node, elapsedMs)); |
353 | 383 | } |
354 | 384 |
|
355 | 385 | void addDiverseNeighbors( |
|
0 commit comments