@@ -50,14 +50,14 @@ static DatasetConfig get_dataset_config(const DatasetName &dataset_name)
5050 // https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/algorithms/diskann/config.yml
5151 if (dataset_name == DatasetName::SIFT1M)
5252 {
53- conf.build_params .R = 32 ;
53+ conf.build_params .R = 64 ;
5454 conf.build_params .L = 125 ;
5555 conf.build_params .alpha = 1 .2f ;
5656 conf.Lvec = {100 , 110 , 120 , 130 , 140 , 150 , 160 , 170 , 180 , 190 , 200 , 250 , 300 };
5757 }
5858 else if (dataset_name == DatasetName::DEEP1M)
5959 {
60- conf.build_params .R = 32 ;
60+ conf.build_params .R = 64 ;
6161 conf.build_params .L = 125 ;
6262 conf.build_params .alpha = 1 .2f ;
6363 conf.anns_k = 100 ;
@@ -66,14 +66,14 @@ static DatasetConfig get_dataset_config(const DatasetName &dataset_name)
6666 else if (dataset_name == DatasetName::GLOVE)
6767 {
6868 conf.build_params .R = 32 ;
69- conf.build_params .L = 125 ;
69+ conf.build_params .L = 100 ;
7070 conf.build_params .alpha = 1 .2f ;
7171 conf.anns_k = 100 ;
72- conf.Lvec = {100 , 110 , 120 , 130 , 140 , 150 , 160 , 170 , 180 , 190 , 200 , 250 , 300 };
72+ conf.Lvec = {100 , 250 , 500 , 1000 , 1500 , 2500 , 5000 , 10000 };
7373 }
7474 else if (dataset_name == DatasetName::AUDIO)
7575 {
76- conf.build_params .R = 32 ;
76+ conf.build_params .R = 64 ;
7777 conf.build_params .L = 125 ;
7878 conf.build_params .alpha = 1 .2f ;
7979 conf.anns_k = 20 ;
@@ -83,7 +83,7 @@ static DatasetConfig get_dataset_config(const DatasetName &dataset_name)
8383 else if (dataset_name == DatasetName::ENRON)
8484 {
8585 // https://github.com/microsoft/DiskANN/blob/7762821dbfe91e838ee7f6db93d010f48f4c4d6d/diskann-benchmark/perf_test_inputs/async_scalar_mimir_enron.json
86- conf.build_params .R = 32 ;
86+ conf.build_params .R = 64 ;
8787 conf.build_params .L = 125 ;
8888 conf.build_params .alpha = 1 .2f ;
8989 conf.anns_k = 100 ;
@@ -126,6 +126,7 @@ void run_create_index(const std::string &index_path, const Dataset &ds, const Da
126126
127127 std::vector<uint32_t > tags (data_num);
128128 std::iota (tags.begin (), tags.end (), 1 ); // tag 0 is reserved for hidden points
129+ diskann::cout << " Tags from " << tags[0 ] << " to " << tags[data_num - 1 ] << std::endl;
129130
130131 auto index_build_params = diskann::IndexWriteParametersBuilder (build_params.L , build_params.R )
131132 .with_max_occlusion_size (build_params.max_occlusion_size )
@@ -377,9 +378,11 @@ void run_dynamic_tests(const Dataset &ds, const DatasetConfig &conf, bool force_
377378
378379 std::vector<uint32_t > tags (data_num);
379380 std::iota (tags.begin (), tags.end (), 1 );
381+ diskann::cout << " Tags from " << tags[0 ] << " to " << tags[data_num - 1 ] << std::endl;
380382
381- std::vector<DynamicScenario> scenarios = {DynamicScenario::AddHalf, DynamicScenario::AddAllRemoveHalf,
382- DynamicScenario::AddHalfRemoveAndAddOneAtATime};
383+ std::vector<DynamicScenario> scenarios = {DynamicScenario::AddHalf};
384+ // std::vector<DynamicScenario> scenarios = {DynamicScenario::AddHalf, DynamicScenario::AddAllRemoveHalf,
385+ // DynamicScenario::AddHalfRemoveAndAddOneAtATime};
383386
384387 for (auto scenario : scenarios)
385388 {
@@ -460,6 +463,9 @@ void run_dynamic_tests(const Dataset &ds, const DatasetConfig &conf, bool force_
460463 for (size_t i = 0 ; i < half_elements; ++i)
461464 {
462465 index->insert_point (&data[i * data_dim], tags[i]);
466+
467+ if (i % 100000 == 0 && i > 0 )
468+ log (" Inserted %zu points...\n " , i);
463469 }
464470 log (" Add time: %.2f s\n " , (add_timer.getElapsedTimeMicro () / 1e6 ));
465471 }
@@ -469,13 +475,22 @@ void run_dynamic_tests(const Dataset &ds, const DatasetConfig &conf, bool force_
469475 for (size_t i = 0 ; i < max_elements; ++i)
470476 {
471477 index->insert_point (&data[i * data_dim], tags[i]);
478+
479+ if (i % 100000 == 0 && i > 0 )
480+ log (" Inserted %zu points...\n " , i);
472481 }
473482 log (" Add time: %.2f s\n " , (add_timer.getElapsedTimeMicro () / 1e6 ));
474483
475484 StopW del_stopw;
476485 for (size_t i = half_elements; i < max_elements; ++i)
477486 {
478487 index->lazy_delete (tags[i]);
488+
489+ if ((i - half_elements) % 100000 == 0 && i > half_elements)
490+ log (" Deleted %zu points...\n " , (i - half_elements));
491+ size_t del_count = i - half_elements + 1 ;
492+ if (del_count > 0 && (del_count % (half_elements / 10 )) == 0 )
493+ index->consolidate_deletes (index_build_params);
479494 }
480495 log (" Delete time: %.2f s\n " , (del_stopw.getElapsedTimeMicro () / 1e6 ));
481496 }
@@ -488,14 +503,22 @@ void run_dynamic_tests(const Dataset &ds, const DatasetConfig &conf, bool force_
488503 for (size_t i = half_elements; i < max_elements; ++i)
489504 {
490505 index->insert_point (&data[i * data_dim], tags[i]);
506+
507+ if ((i - half_elements) % 100000 == 0 && i > half_elements)
508+ log (" Inserted %zu points...\n " , (i - half_elements));
491509 }
492510 log (" Add (second half) time: %.2f s\n " , (add_timer.getElapsedTimeMicro () / 1e6 ));
493511
494512 StopW update_stopw;
495513 for (size_t i = 0 ; i < half_elements; ++i)
496514 {
497- index->lazy_delete (tags[i + half_elements]);
498- index->insert_point (&data[i * data_dim], tags[i]);
515+ index->lazy_delete (tags[i + half_elements]); // delete second half
516+ index->insert_point (&data[i * data_dim], tags[i]); // add first half
517+
518+ if (i % 100000 == 0 && i > 0 )
519+ log (" Updated %zu points...\n " , i);
520+ if (i > 0 && (i % (half_elements / 10 )) == 0 )
521+ index->consolidate_deletes (index_build_params);
499522 }
500523 log (" Update (Delete + Add) time: %.2f s\n " , (update_stopw.getElapsedTimeMicro () / 1e6 ));
501524 }
@@ -655,7 +678,7 @@ int main(int argc, char **argv)
655678 {
656679 Dataset dataset (ds_name_to_run, data_root);
657680 DatasetConfig conf = get_dataset_config (ds_name_to_run);
658- run_static_tests (dataset, conf, force_test, num_threads);
681+ // run_static_tests(dataset, conf, force_test, num_threads);
659682 run_dynamic_tests (dataset, conf, force_test, num_threads);
660683 }
661684
0 commit comments