@@ -66,7 +66,7 @@ static DatasetConfig get_dataset_config(const DatasetName &dataset_name)
6666 else if (dataset_name == DatasetName::GLOVE)
6767 {
6868 conf.build_params .R = 32 ;
69- conf.build_params .L = 100 ;
69+ conf.build_params .L = 125 ;
7070 conf.build_params .alpha = 1 .2f ;
7171 conf.anns_k = 100 ;
7272 conf.Lvec = {100 , 110 , 120 , 130 , 140 , 150 , 160 , 170 , 180 , 190 , 200 , 250 , 300 };
@@ -119,6 +119,7 @@ void run_create_index(const std::string &index_path, const Dataset &ds, const Da
119119 auto build_params = conf.build_params ;
120120
121121 size_t data_num = ds.info ().base_count ;
122+ size_t data_dim = ds.info ().dims ;
122123
123124 auto data_wrapper = ds.load_base ();
124125 float *data = data_wrapper.data ;
@@ -146,7 +147,7 @@ void run_create_index(const std::string &index_path, const Dataset &ds, const Da
146147 .with_label_type (" uint" )
147148 .with_index_write_params (index_build_params)
148149 .with_index_search_params (index_search_params)
149- .is_dynamic_index (true ) // TODO can be false
150+ .is_dynamic_index (true )
150151 .is_enable_tags (true )
151152 .is_use_opq (build_params.use_opq )
152153 .is_pq_dist_build (build_params.build_PQ_bytes > 0 )
@@ -162,18 +163,26 @@ void run_create_index(const std::string &index_path, const Dataset &ds, const Da
162163 log (" ----------------------------------------\n " );
163164 log (" R (Max Degree) : %u\n " , build_params.R );
164165 log (" L (Build List Size): %u\n " , build_params.L );
166+ log (" Max Occlusion Size : %u\n " , build_params.max_occlusion_size );
165167 log (" Alpha : %.2f\n " , build_params.alpha );
166168 log (" PQ Chunks : %u\n " , build_params.build_PQ_bytes );
167169 log (" OPQ : %s\n " , build_params.use_opq ? " Yes" : " No" );
168170 log (" ----------------------------------------\n " );
169171
170172 StopW timer;
171173 log (" Building graph in one go...\n " );
172- index->build (data, data_num, tags);
174+ for (size_t i = 0 ; i < data_num; i++)
175+ {
176+ index->insert_point (&data[i * data_dim], tags[i]);
177+ if (i > 0 && i % 100000 == 0 )
178+ {
179+ log (" added %zu after %.2f seconds.\n " , i, (timer.getElapsedTimeMicro () / 1000000.0 ));
180+ }
181+ }
173182 log (" Graph built after %.2f seconds.\n " , (timer.getElapsedTimeMicro () / 1000000.0 ));
174183
175184 // Save dynamic index
176- index->save (index_path.c_str (), true );
185+ index->save (index_path.c_str ());
177186}
178187
179188// -----------------------------------------------------------------------------
@@ -208,7 +217,7 @@ void generate_graph_stats(const std::string &graph_file)
208217 size_t min_degree = std::numeric_limits<size_t >::max ();
209218 size_t max_degree = 0 ;
210219 size_t total_degree = 0 ;
211- size_t nodes_with_less_than_2_degree = 0 ;
220+ size_t vertex_with_degree1 = 0 ;
212221
213222 size_t bytes_read = sizeof (size_t ) + sizeof (uint32_t ) + sizeof (uint32_t ) + sizeof (size_t );
214223
@@ -223,8 +232,8 @@ void generate_graph_stats(const std::string &graph_file)
223232 min_degree = std::min (min_degree, (size_t )k);
224233 max_degree = std::max (max_degree, (size_t )k);
225234 total_degree += k;
226- if (k < 2 )
227- nodes_with_less_than_2_degree ++;
235+ if (k == 1 )
236+ vertex_with_degree1 ++;
228237
229238 num_nodes++;
230239 }
@@ -236,7 +245,7 @@ void generate_graph_stats(const std::string &graph_file)
236245 log (" Max Degree : %zu\n " , max_degree);
237246 log (" Min Degree : %zu\n " , min_degree);
238247 log (" Average Degree : %.2f\n " , num_nodes > 0 ? (float )total_degree / num_nodes : 0 .0f );
239- log (" Count (Degree<2) : %zu\n " , nodes_with_less_than_2_degree );
248+ log (" Count (Degree<2) : %zu\n " , vertex_with_degree1 );
240249 log (" ----------------------------------------\n\n " );
241250}
242251std::unique_ptr<diskann::AbstractIndex> load_index (const std::string &index_path, const Dataset &ds,
@@ -277,21 +286,11 @@ void run_anns_test(const std::string &index_path, const Dataset &ds, const Datas
277286 log (" Running ANNS Tests (k=%u)\n " , conf.anns_k );
278287 log (" ----------------------------------------\n " );
279288
280- if (ds.info ().metric == diskann::FAST_L2)
281- {
282- log (" Optimizing index layout for FAST_L2...\n " );
283- auto typed_index = dynamic_cast <diskann::Index<float , uint32_t , uint32_t > *>(index.get ());
284- if (typed_index)
285- {
286- typed_index->optimize_index_layout ();
287- }
288- }
289-
290289 auto typed_index = dynamic_cast <diskann::Index<float , uint32_t , uint32_t > *>(index.get ());
291290 if (typed_index)
292291 {
293- test_diskann_anns<float , uint32_t , uint32_t >(typed_index, query_data.data , query_num, query_dim, query_dim ,
294- ground_truth, conf.anns_k , conf.Lvec , num_threads);
292+ test_diskann_anns<float , uint32_t , uint32_t >(typed_index, query_data.data , query_num, query_dim, ground_truth ,
293+ conf.anns_k , conf.Lvec , num_threads);
295294 }
296295 else
297296 {
@@ -333,14 +332,12 @@ void run_explore_test(const std::string &index_path, const Dataset &ds, const Da
333332 unsigned num_explore = 0 , dim_explore = 0 ;
334333 float *explore_queries =
335334 load_fvecs ((ds.files_dir () / ds.info ().explore_query_file ).string ().c_str (), num_explore, dim_explore);
336- size_t aligned_dim_explore = dim_explore;
337335
338336 auto typed_index = dynamic_cast <diskann::Index<float , uint32_t , uint32_t > *>(index.get ());
339337 if (typed_index && explore_queries)
340338 {
341339 test_diskann_explore<float , uint32_t , uint32_t >(typed_index, explore_queries, num_explore, dim_explore,
342- aligned_dim_explore, explore_gt_vec, entry_indices,
343- conf.explore_k );
340+ explore_gt_vec, entry_indices, conf.explore_k );
344341 }
345342
346343 if (explore_queries)
@@ -370,7 +367,7 @@ inline const char *dynamic_scenario_str(DynamicScenario scenario)
370367 }
371368}
372369
373- void run_dynamic_data_test (const Dataset &ds, const DatasetConfig &conf, bool force_test, uint32_t num_threads)
370+ void run_dynamic_tests (const Dataset &ds, const DatasetConfig &conf, bool force_test, uint32_t num_threads)
374371{
375372 auto build_params = conf.build_params ;
376373 size_t data_num = ds.info ().base_count ;
@@ -445,6 +442,7 @@ void run_dynamic_data_test(const Dataset &ds, const DatasetConfig &conf, bool fo
445442 log (" ----------------------------------------\n " );
446443 log (" R (Max Degree) : %u\n " , build_params.R );
447444 log (" L (Build List Size): %u\n " , build_params.L );
445+ log (" Max Occlusion Size : %u\n " , build_params.max_occlusion_size );
448446 log (" Alpha : %.2f\n " , build_params.alpha );
449447 log (" PQ Chunks : %u\n " , build_params.build_PQ_bytes );
450448 log (" OPQ : %s\n " , build_params.use_opq ? " Yes" : " No" );
@@ -514,19 +512,19 @@ void run_dynamic_data_test(const Dataset &ds, const DatasetConfig &conf, bool fo
514512 index->save (index_path.c_str (), true );
515513 }
516514
517- // Generate Graph Statistics (after index object is destroyed)
518- generate_graph_stats (index_path);
519-
520- // Test the index by loading it from disk (out-of-context testing)
521- run_anns_test (index_path, ds, conf, num_threads);
522-
523515 log (" %s: Log written to: %s\n " , scenario_name.c_str (), log_file.c_str ());
524516 }
525517 catch (const std::exception &e)
526518 {
527519 log (" Exception in dynamic test '%s': %s\n " , scenario_name.c_str (), e.what ());
528520 }
529521
522+ // Generate Graph Statistics (after index object is destroyed)
523+ generate_graph_stats (index_path);
524+
525+ // Test the index by loading it from disk (out-of-context testing)
526+ run_anns_test (index_path, ds, conf, num_threads);
527+
530528 detach_cout_from_log ();
531529 reset_log_to_console ();
532530 }
@@ -538,20 +536,20 @@ void run_common_tests(const std::string &index_path, const Dataset &ds, const Da
538536 run_explore_test (index_path, ds, conf, false , num_threads);
539537}
540538
541- void run_test_suite (const Dataset &ds, const DatasetConfig &conf, bool force_test, bool only_test , uint32_t num_threads)
539+ void run_static_tests (const Dataset &ds, const DatasetConfig &conf, bool force_test, uint32_t num_threads)
542540{
543541 std::string index_path = get_index_path (ds, conf);
544542
545543 ensure_directory (ds.dataset_dir () / " diskann" );
546544 std::string log_file = index_path + " _benchmark.log" ;
547545
548- if (!force_test && !only_test && diskann::benchmark::file_exists (log_file))
546+ if (!force_test && diskann::benchmark::file_exists (log_file))
549547 {
550548 log (" Log file %s already exists. Skipping.\n " , log_file.c_str ());
551549 return ;
552550 }
553551
554- set_log_file (log_file, force_test || only_test );
552+ set_log_file (log_file, force_test);
555553 attach_cout_to_log ();
556554
557555 log (" ================================================================================\n " );
@@ -560,20 +558,14 @@ void run_test_suite(const Dataset &ds, const DatasetConfig &conf, bool force_tes
560558
561559 try
562560 {
563- if (!only_test && ! diskann::benchmark::file_exists (index_path + " _pq_pivots.bin" ) &&
561+ if (!diskann::benchmark::file_exists (index_path + " _pq_pivots.bin" ) &&
564562 !diskann::benchmark::file_exists (index_path + " _sample_data.bin" ) &&
565563 !diskann::benchmark::file_exists (index_path + " .data" ))
566564 {
567565 run_create_index (index_path, ds, conf, num_threads);
568- // Generate Graph Statistics after build (out-of-context)
569- generate_graph_stats (index_path);
570- }
571- else if (!only_test)
572- {
573- log (" Index files already exist at %s. Skipping build.\n " , index_path.c_str ());
574- generate_graph_stats (index_path);
575566 }
576567
568+ generate_graph_stats (index_path);
577569 run_anns_test (index_path, ds, conf, num_threads);
578570 run_explore_test (index_path, ds, conf, false , num_threads);
579571 }
@@ -584,9 +576,6 @@ void run_test_suite(const Dataset &ds, const DatasetConfig &conf, bool force_tes
584576
585577 detach_cout_from_log ();
586578 reset_log_to_console ();
587-
588- // Now run the dynamic tests, appending to the single unified process
589- // run_dynamic_data_test(ds, conf, force_test, num_threads);
590579}
591580
592581int main (int argc, char **argv)
@@ -603,18 +592,13 @@ int main(int argc, char **argv)
603592
604593 std::string data_root = DATA_PATH;
605594 DatasetName ds_name = DatasetName::GLOVE;
606- bool only_test = false ;
607- bool force_test = false ;
595+ bool force_test = true ;
608596 uint32_t num_threads = 1 ;
609597
610598 for (int i = 1 ; i < argc; ++i)
611599 {
612600 std::string arg = argv[i];
613- if (arg == " --only-test" || arg == " -t" )
614- {
615- only_test = true ;
616- }
617- else if (arg == " --force-test" || arg == " -f" )
601+ if (arg == " --force-test" || arg == " -f" )
618602 {
619603 force_test = true ;
620604 }
@@ -671,7 +655,8 @@ int main(int argc, char **argv)
671655 {
672656 Dataset dataset (ds_name_to_run, data_root);
673657 DatasetConfig conf = get_dataset_config (ds_name_to_run);
674- run_test_suite (dataset, conf, force_test, only_test, num_threads);
658+ run_static_tests (dataset, conf, force_test, num_threads);
659+ run_dynamic_tests (dataset, conf, force_test, num_threads);
675660 }
676661
677662 return 0 ;
0 commit comments