@@ -107,26 +107,22 @@ public enum IndexPrecision {
107107
108108 public enum WorkloadMode {
109109 /**
110- * Build per-source partitions and stop. (No compaction, no recall.)
110+ * Build per-source partitions and stop.
111111 */
112- PARTITION_ONLY ,
112+ PARTITION ,
113113
114114 /**
115115 * Assume partitions exist on disk; compact them.
116116 */
117- COMPACT_ONLY ,
117+ COMPACT ,
118118
119119 /**
120- * Assume partitions exist on disk; compact them, then run recall .
120+ * Build a single graph for the whole dataset and write it .
121121 */
122- COMPACT_AND_RECALL ,
122+ BUILD ,
123123
124124 /**
125- * Build a single graph for the whole dataset and write it. Then run recall.
126- */
127- BUILD_FROM_SCRATCH ,
128- /**
129- * (Default) Build partitions, compact them, then run recall.
125+ * (Default) Build partitions, compact them.
130126 */
131127 PARTITION_AND_COMPACT
132128 }
@@ -278,6 +274,9 @@ private static void writeCompletedCount(int count) {
278274 @ Param ({"PARTITION_AND_COMPACT" })
279275 public WorkloadMode workloadMode ;
280276
277+ @ Param ({"true" })
278+ public boolean measureRecall ;
279+
281280 @ Param ({"4" }) // Default value, can be overridden via command line
282281 public int numPartitions ;
283282
@@ -387,25 +386,10 @@ public void setup() throws Exception {
387386
388387 int dimension ;
389388
390- if (workloadMode == WorkloadMode .COMPACT_ONLY ) {
391- ds = null ;
392- queryVectors = null ;
393- groundTruth = null ;
394- ravv = null ;
395- baseVectors = null ;
396- dimension = -1 ;
397-
398- var datasetInfo = DataSets .loadDataSet (datasetNames );
399- similarityFunction = datasetInfo
400- .flatMap (DataSetInfo ::similarityFunction )
401- .orElseGet (() -> {
402- log .warn ("Could not determine similarity function for dataset '{}'; defaulting to COSINE" , datasetNames );
403- return VectorSimilarityFunction .COSINE ;
404- });
389+ boolean needsBaseVectors = workloadMode != WorkloadMode .COMPACT ;
390+ boolean needsRecallData = measureRecall && workloadMode != WorkloadMode .PARTITION ;
405391
406- log .info ("Skipping dataset load for COMPACT_ONLY mode without recall. Workload: {}, similarityFunction: {}, Live nodes rate: {}" ,
407- workloadMode , similarityFunction , liveNodesRate );
408- } else {
392+ if (needsBaseVectors ) {
409393 ds = DataSets .loadDataSet (datasetNames )
410394 .orElseThrow (() -> new RuntimeException ("Dataset not found: " + datasetNames ))
411395 .getDataSet ();
@@ -425,13 +409,38 @@ public void setup() throws Exception {
425409 ravv = new ListRandomAccessVectorValues (baseVectors , ds .getDimension ());
426410 }
427411
428- queryVectors = ds .getQueryVectors ();
429- groundTruth = ds .getGroundTruth ();
430412 similarityFunction = ds .getSimilarityFunction ();
431413 dimension = ds .getDimension ();
432414
433- log .info ("Dataset {} loaded with recall data. Base vectors: {} (portion {}), Query vectors: {}, Dim: {}, Similarity: {}, Workload: {}, Live nodes rate: {}" ,
434- datasetNames , ravv .size (), datasetPortion , queryVectors .size (), dimension , similarityFunction , workloadMode , liveNodesRate );
415+ if (needsRecallData ) {
416+ queryVectors = ds .getQueryVectors ();
417+ groundTruth = ds .getGroundTruth ();
418+ log .info ("Dataset {} loaded with recall data. Base vectors: {} (portion {}), Query vectors: {}, Dim: {}, Similarity: {}, Workload: {}, measureRecall: {}, Live nodes rate: {}" ,
419+ datasetNames , ravv .size (), datasetPortion , queryVectors .size (), dimension , similarityFunction , workloadMode , measureRecall , liveNodesRate );
420+ } else {
421+ queryVectors = null ;
422+ groundTruth = null ;
423+ log .info ("Dataset {} loaded (base vectors only). Base vectors: {} (portion {}), Dim: {}, Similarity: {}, Workload: {}, measureRecall: {}" ,
424+ datasetNames , ravv .size (), datasetPortion , dimension , similarityFunction , workloadMode , measureRecall );
425+ }
426+ } else {
427+ ds = null ;
428+ queryVectors = null ;
429+ groundTruth = null ;
430+ ravv = null ;
431+ baseVectors = null ;
432+ dimension = -1 ;
433+
434+ var datasetInfo = DataSets .loadDataSet (datasetNames );
435+ similarityFunction = datasetInfo
436+ .flatMap (DataSetInfo ::similarityFunction )
437+ .orElseGet (() -> {
438+ log .warn ("Could not determine similarity function for dataset '{}'; defaulting to COSINE" , datasetNames );
439+ return VectorSimilarityFunction .COSINE ;
440+ });
441+
442+ log .info ("Skipping dataset load for {} mode. similarityFunction: {}, Live nodes rate: {}" ,
443+ workloadMode , similarityFunction , liveNodesRate );
435444 }
436445
437446 // Resolve storagePaths + partitionsDir
@@ -440,22 +449,18 @@ public void setup() throws Exception {
440449 compactOutputPath = resolveCompactOutputPath (partitionsBaseDir );
441450 scratchOutputPath = resolveScratchOutputPath (partitionsBaseDir );
442451
443- // Clean stale artifacts only if we're going to rebuild them.
444- if (workloadMode == WorkloadMode .COMPACT_ONLY || workloadMode == WorkloadMode .COMPACT_AND_RECALL ) {
445- // For compact-only and compact-and-recall, ensure the partition files exist.
452+ if (workloadMode == WorkloadMode .COMPACT ) {
446453 verifyPartitionsExist (partitionsBaseDir , numPartitions );
447454 }
448455
449- // Partition metadata for remapping (needed for compaction)
450- if (workloadMode == WorkloadMode .PARTITION_ONLY || workloadMode == WorkloadMode .PARTITION_AND_COMPACT ) {
456+ if (workloadMode == WorkloadMode .PARTITION || workloadMode == WorkloadMode .PARTITION_AND_COMPACT ) {
451457 var partitionedData = DataSetPartitioner .partition (baseVectors , numPartitions , splitDistribution );
452458 vectorsPerSourceCount = partitionedData .sizes ;
453459 } else {
454460 vectorsPerSourceCount = null ;
455461 }
456462
457- // Build partitions during setup for SEGMENTS_* (matches original benchmark structure)
458- if (workloadMode == WorkloadMode .PARTITION_ONLY || workloadMode == WorkloadMode .PARTITION_AND_COMPACT ) {
463+ if (workloadMode == WorkloadMode .PARTITION || workloadMode == WorkloadMode .PARTITION_AND_COMPACT ) {
459464 if (jfrPartitioning ) {
460465 jfrPartitioningRecorder .start (JFR_DIR , "partitioning-" + jfrParamSuffix () + ".jfr" , jfrObjectCount );
461466 }
@@ -472,8 +477,8 @@ public void setup() throws Exception {
472477 }
473478
474479 private void validateParams () {
475- if (workloadMode == WorkloadMode .BUILD_FROM_SCRATCH ) {
476- log .warn ("numPartitions={} ignored in BUILD_FROM_SCRATCH mode" , numPartitions );
480+ if (workloadMode == WorkloadMode .BUILD ) {
481+ log .warn ("numPartitions={} ignored in BUILD mode" , numPartitions );
477482 }
478483 else {
479484 if (numPartitions <= 1 ) throw new IllegalArgumentException ("numPartitions must be larger than one" );
@@ -570,7 +575,7 @@ private void verifyPartitionsExist(Path partitionsDir, int numPartitions) {
570575 for (int i = 0 ; i < numPartitions ; i ++) {
571576 Path seg = partitionsDir .resolve ("per-source-graph-" + i );
572577 if (!Files .exists (seg )) {
573- throw new IllegalStateException ("Missing partition file for COMPACT_ONLY or COMPACT_AND_RECALL : " + seg .toAbsolutePath ());
578+ throw new IllegalStateException ("Missing partition file for COMPACT mode : " + seg .toAbsolutePath ());
574579 }
575580 }
576581 }
@@ -701,6 +706,14 @@ private long buildFromScratch(List<VectorFloat<?>> baseVectors) throws Exception
701706
702707 int dimension = baseVectors .get (0 ).length ();
703708 var full = new ListRandomAccessVectorValues (baseVectors , dimension );
709+
710+ log .info ("Building from scratch: vectors={} dim={} sim={} deg={} bw={} precision={} pwThreads={} vp={} -> {}" ,
711+ full .size (), dimension , similarityFunction ,
712+ graphDegree , beamWidth , indexPrecision , parallelWriteThreads , resolvedVectorizationProvider ,
713+ scratchOutputPath .toAbsolutePath ());
714+
715+ long startNanos = System .nanoTime ();
716+
704717 ProductQuantization pq = null ;
705718 PQVectors pqVectors = null ;
706719 BuildScoreProvider bsp ;
@@ -714,11 +727,6 @@ private long buildFromScratch(List<VectorFloat<?>> baseVectors) throws Exception
714727 bsp = BuildScoreProvider .randomAccessScoreProvider (full , similarityFunction );
715728 }
716729
717- log .info ("Building from scratch: vectors={} dim={} sim={} deg={} bw={} precision={} pwThreads={} vp={} -> {}" ,
718- full .size (), dimension , similarityFunction ,
719- graphDegree , beamWidth , indexPrecision , parallelWriteThreads , resolvedVectorizationProvider ,
720- scratchOutputPath .toAbsolutePath ());
721-
722730 var builder = new GraphIndexBuilder (bsp , dimension , graphDegree , beamWidth , 1.2f , 1.2f , true );
723731 var graph = builder .build (full );
724732
@@ -730,19 +738,10 @@ private long buildFromScratch(List<VectorFloat<?>> baseVectors) throws Exception
730738
731739 writerBuilder .with (new InlineVectors (dimension ));
732740
733- // ProductQuantization pq = null;
734- // PQVectors pqVectors = null;
735- // if (indexPrecision == IndexPrecision.FUSEDPQ) {
736- // boolean centerData = similarityFunction == VectorSimilarityFunction.EUCLIDEAN;
737- // pq = ProductQuantization.compute(full, dimension / 8, 256, centerData);
738- // pqVectors = (PQVectors) pq.encodeAll(full);
739- // writerBuilder.with(new FusedPQ(graph.maxDegree(), pq));
740- // }
741741 if (indexPrecision == IndexPrecision .FUSEDPQ ) {
742742 writerBuilder .with (new FusedPQ (graph .maxDegree (), pq ));
743743 }
744744
745- long startNanos = System .nanoTime ();
746745 try (var writer = writerBuilder .build ()) {
747746 var suppliers = new EnumMap <FeatureId , IntFunction <Feature .State >>(FeatureId .class );
748747 suppliers .put (FeatureId .INLINE_VECTORS , ord -> new InlineVectors .State (full .getVector (ord )));
@@ -814,27 +813,28 @@ public void run(Blackhole blackhole, RecallResult recallResult) throws Exception
814813
815814 // Execute workload
816815 switch (workloadMode ) {
817- case PARTITION_ONLY :
816+ case PARTITION :
818817 break ;
819818
820- case COMPACT_ONLY :
819+ case COMPACT :
821820 durationMs = compactPartitions ();
821+ if (measureRecall ) {
822+ recall = runRecall (compactOutputPath );
823+ }
822824 break ;
823825
824- case COMPACT_AND_RECALL :
825- durationMs = compactPartitions ();
826- recall = runRecall (compactOutputPath );
827- break ;
828-
829- case BUILD_FROM_SCRATCH : {
826+ case BUILD :
830827 durationMs = buildFromScratch (baseVectors );
831- recall = runRecall (scratchOutputPath );
828+ if (measureRecall ) {
829+ recall = runRecall (scratchOutputPath );
830+ }
832831 break ;
833- }
834832
835833 case PARTITION_AND_COMPACT :
836834 durationMs = compactPartitions ();
837- recall = runRecall (compactOutputPath );
835+ if (measureRecall ) {
836+ recall = runRecall (compactOutputPath );
837+ }
838838 break ;
839839
840840 default :
@@ -902,6 +902,7 @@ private LinkedHashMap<String, Object> buildParams() {
902902 params .put ("parallelWriteThreads" , parallelWriteThreads );
903903 params .put ("vectorizationProvider" , resolvedVectorizationProvider );
904904 params .put ("datasetPortion" , datasetPortion );
905+ params .put ("measureRecall" , measureRecall );
905906 params .put ("jfrPartitioning" , jfrPartitioning );
906907 params .put ("jfrCompacting" , jfrCompacting );
907908 params .put ("jfrObjectCount" , jfrObjectCount );
0 commit comments