@@ -111,6 +111,8 @@ std::string get_index_path(const Dataset &ds, const DatasetConfig &conf)
111111 return prefix;
112112}
113113
114+ void generate_graph_stats (const std::string &graph_file);
115+
114116void run_create_index (const std::string &index_path, const Dataset &ds, const DatasetConfig &conf, uint32_t num_threads)
115117{
116118 log (" Building DiskANN index: %s\n " , index_path.c_str ());
@@ -156,8 +158,16 @@ void run_create_index(const std::string &index_path, const Dataset &ds, const Da
156158 auto index = index_factory.create_instance ();
157159 index->set_start_points_at_random (static_cast <float >(0 ));
158160
159- StopW timer;
161+ log (" \n Construction Parameters:\n " );
162+ log (" ----------------------------------------\n " );
163+ log (" R (Max Degree) : %u\n " , build_params.R );
164+ log (" L (Build List Size): %u\n " , build_params.L );
165+ log (" Alpha : %.2f\n " , build_params.alpha );
166+ log (" PQ Chunks : %u\n " , build_params.build_PQ_bytes );
167+ log (" OPQ : %s\n " , build_params.use_opq ? " Yes" : " No" );
168+ log (" ----------------------------------------\n " );
160169
170+ StopW timer;
161171 log (" Building graph in one go...\n " );
162172 index->build (data, data_num, tags);
163173 log (" Graph built after %.2f seconds.\n " , (timer.getElapsedTimeMicro () / 1000000.0 ));
@@ -169,6 +179,66 @@ void run_create_index(const std::string &index_path, const Dataset &ds, const Da
169179// -----------------------------------------------------------------------------
170180// Helpers
171181// -----------------------------------------------------------------------------
182+ void generate_graph_stats (const std::string &graph_file)
183+ {
184+ std::ifstream in;
185+ in.exceptions (std::ios::badbit | std::ios::failbit);
186+
187+ try
188+ {
189+ in.open (graph_file, std::ios::binary);
190+ }
191+ catch (const std::exception &)
192+ {
193+ log (" Warning: Could not open graph file %s for statistics calculation.\n " , graph_file.c_str ());
194+ return ;
195+ }
196+
197+ size_t expected_file_size;
198+ uint32_t max_observed_degree;
199+ uint32_t start;
200+ size_t file_frozen_pts;
201+
202+ in.read ((char *)&expected_file_size, sizeof (size_t ));
203+ in.read ((char *)&max_observed_degree, sizeof (uint32_t ));
204+ in.read ((char *)&start, sizeof (uint32_t ));
205+ in.read ((char *)&file_frozen_pts, sizeof (size_t ));
206+
207+ size_t num_nodes = 0 ;
208+ size_t min_degree = std::numeric_limits<size_t >::max ();
209+ size_t max_degree = 0 ;
210+ size_t total_degree = 0 ;
211+ size_t nodes_with_less_than_2_degree = 0 ;
212+
213+ size_t bytes_read = sizeof (size_t ) + sizeof (uint32_t ) + sizeof (uint32_t ) + sizeof (size_t );
214+
215+ while (bytes_read < expected_file_size)
216+ {
217+ uint32_t k;
218+ in.read ((char *)&k, sizeof (uint32_t ));
219+
220+ in.seekg (k * sizeof (uint32_t ), std::ios::cur);
221+ bytes_read += sizeof (uint32_t ) * (k + 1 );
222+
223+ min_degree = std::min (min_degree, (size_t )k);
224+ max_degree = std::max (max_degree, (size_t )k);
225+ total_degree += k;
226+ if (k < 2 )
227+ nodes_with_less_than_2_degree++;
228+
229+ num_nodes++;
230+ }
231+
232+ log (" \n ----------------------------------------\n " );
233+ log (" Graph Statistics:\n " );
234+ log (" ----------------------------------------\n " );
235+ log (" Total Nodes : %zu\n " , num_nodes);
236+ log (" Max Degree : %zu\n " , max_degree);
237+ log (" Min Degree : %zu\n " , min_degree);
238+ log (" Average Degree : %.2f\n " , num_nodes > 0 ? (float )total_degree / num_nodes : 0 .0f );
239+ log (" Count (Degree<2) : %zu\n " , nodes_with_less_than_2_degree);
240+ log (" ----------------------------------------\n\n " );
241+ }
172242std::unique_ptr<diskann::AbstractIndex> load_index (const std::string &index_path, const Dataset &ds,
173243 uint32_t num_threads, uint32_t scratch_size)
174244{
@@ -311,12 +381,6 @@ void run_dynamic_data_test(const Dataset &ds, const DatasetConfig &conf, bool fo
311381 std::vector<uint32_t > tags (data_num);
312382 std::iota (tags.begin (), tags.end (), 1 );
313383
314- auto query_data = ds.load_query ();
315- size_t query_num = ds.info ().query_count ;
316- size_t query_dim = ds.info ().dims ;
317-
318- auto ground_truth = ds.load_groundtruth (conf.anns_k , true );
319-
320384 std::vector<DynamicScenario> scenarios = {DynamicScenario::AddHalf, DynamicScenario::AddAllRemoveHalf,
321385 DynamicScenario::AddHalfRemoveAndAddOneAtATime};
322386
@@ -372,88 +436,89 @@ void run_dynamic_data_test(const Dataset &ds, const DatasetConfig &conf, bool fo
372436 .is_concurrent_consolidate (false )
373437 .build ();
374438
375- auto index_factory = diskann::IndexFactory (config);
376- auto index = index_factory.create_instance ();
377- index->set_start_points_at_random (static_cast <float >(0 ));
439+ {
440+ auto index_factory = diskann::IndexFactory (config);
441+ auto index = index_factory.create_instance ();
442+ index->set_start_points_at_random (static_cast <float >(0 ));
378443
379- const size_t max_elements = data_num;
380- const size_t half_elements = max_elements / 2 ;
444+ log (" \n Construction Parameters:\n " );
445+ log (" ----------------------------------------\n " );
446+ log (" R (Max Degree) : %u\n " , build_params.R );
447+ log (" L (Build List Size): %u\n " , build_params.L );
448+ log (" Alpha : %.2f\n " , build_params.alpha );
449+ log (" PQ Chunks : %u\n " , build_params.build_PQ_bytes );
450+ log (" OPQ : %s\n " , build_params.use_opq ? " Yes" : " No" );
451+ log (" ----------------------------------------\n " );
381452
382- StopW scenario_timer ;
383- log ( " \n --- Dynamic updates --- \n " ) ;
453+ const size_t max_elements = data_num ;
454+ const size_t half_elements = max_elements / 2 ;
384455
385- if (scenario == DynamicScenario::AddHalf)
386- {
387- StopW add_timer;
388- for (size_t i = 0 ; i < half_elements; ++i)
389- {
390- index->insert_point (&data[i * data_dim], tags[i]);
391- }
392- log (" Add time: %.2f s\n " , (add_timer.getElapsedTimeMicro () / 1e6 ));
393- }
394- else if (scenario == DynamicScenario::AddAllRemoveHalf)
395- {
396- StopW add_timer;
397- for (size_t i = 0 ; i < max_elements; ++i)
456+ StopW scenario_timer;
457+ log (" \n --- Dynamic updates ---\n " );
458+
459+ if (scenario == DynamicScenario::AddHalf)
398460 {
399- index->insert_point (&data[i * data_dim], tags[i]);
461+ StopW add_timer;
462+ for (size_t i = 0 ; i < half_elements; ++i)
463+ {
464+ index->insert_point (&data[i * data_dim], tags[i]);
465+ }
466+ log (" Add time: %.2f s\n " , (add_timer.getElapsedTimeMicro () / 1e6 ));
400467 }
401- log (" Add time: %.2f s\n " , (add_timer.getElapsedTimeMicro () / 1e6 ));
402-
403- StopW del_stopw;
404- for (size_t i = half_elements; i < max_elements; ++i)
468+ else if (scenario == DynamicScenario::AddAllRemoveHalf)
405469 {
406- index->lazy_delete (tags[i]);
470+ StopW add_timer;
471+ for (size_t i = 0 ; i < max_elements; ++i)
472+ {
473+ index->insert_point (&data[i * data_dim], tags[i]);
474+ }
475+ log (" Add time: %.2f s\n " , (add_timer.getElapsedTimeMicro () / 1e6 ));
476+
477+ StopW del_stopw;
478+ for (size_t i = half_elements; i < max_elements; ++i)
479+ {
480+ index->lazy_delete (tags[i]);
481+ }
482+ log (" Delete time: %.2f s\n " , (del_stopw.getElapsedTimeMicro () / 1e6 ));
407483 }
408- log (" Delete time: %.2f s\n " , (del_stopw.getElapsedTimeMicro () / 1e6 ));
409- }
410- else if (scenario == DynamicScenario::AddHalfRemoveAndAddOneAtATime)
411- {
412- // IMPORTANT: The half-dataset ground truth files correspond to the first half of labels [0..half-1].
413- // For this scenario we want to end up with exactly that active set.
414- // Therefore: start with the SECOND half in the index, then swap it out one-by-one.
415- StopW add_timer;
416- for (size_t i = half_elements; i < max_elements; ++i)
484+ else if (scenario == DynamicScenario::AddHalfRemoveAndAddOneAtATime)
417485 {
418- index->insert_point (&data[i * data_dim], tags[i]);
486+ // IMPORTANT: The half-dataset ground truth files correspond to the first half of labels
487+ // [0..half-1]. For this scenario we want to end up with exactly that active set. Therefore:
488+ // start with the SECOND half in the index, then swap it out one-by-one.
489+ StopW add_timer;
490+ for (size_t i = half_elements; i < max_elements; ++i)
491+ {
492+ index->insert_point (&data[i * data_dim], tags[i]);
493+ }
494+ log (" Add (second half) time: %.2f s\n " , (add_timer.getElapsedTimeMicro () / 1e6 ));
495+
496+ StopW update_stopw;
497+ for (size_t i = 0 ; i < half_elements; ++i)
498+ {
499+ index->lazy_delete (tags[i + half_elements]);
500+ index->insert_point (&data[i * data_dim], tags[i]);
501+ }
502+ log (" Update (Delete + Add) time: %.2f s\n " , (update_stopw.getElapsedTimeMicro () / 1e6 ));
419503 }
420- log (" Add (second half) time: %.2f s\n " , (add_timer.getElapsedTimeMicro () / 1e6 ));
421504
422- StopW update_stopw;
423- for (size_t i = 0 ; i < half_elements; ++i)
505+ if (scenario != DynamicScenario::AddHalf)
424506 {
425- index->lazy_delete (tags[i + half_elements]);
426- index->insert_point (&data[i * data_dim], tags[i]);
507+ StopW cons_stopw;
508+ index->consolidate_deletes (index_build_params);
509+ log (" Consolidate time: %.2f s\n " , (cons_stopw.getElapsedTimeMicro () / 1e6 ));
427510 }
428- log (" Update (Delete + Add) time: %.2f s\n " , (update_stopw.getElapsedTimeMicro () / 1e6 ));
429- }
430511
431- if (scenario != DynamicScenario::AddHalf)
432- {
433- StopW cons_stopw;
434- index->consolidate_deletes (index_build_params);
435- log (" Consolidate time: %.2f s\n " , (cons_stopw.getElapsedTimeMicro () / 1e6 ));
436- }
512+ log (" Gesamt Zeit (Dynamic Graph Construction): %.2f s\n " , (scenario_timer.getElapsedTimeMicro () / 1e6 ));
437513
438- log (" Gesamt Zeit (Dynamic Graph Construction): %.2f s\n " , (scenario_timer.getElapsedTimeMicro () / 1e6 ));
514+ index->save (index_path.c_str (), true );
515+ }
439516
440- index->save (index_path.c_str (), true );
517+ // Generate Graph Statistics (after index object is destroyed)
518+ generate_graph_stats (index_path);
441519
442- // Test the index in memory directly without reload
443- auto typed_index = dynamic_cast <diskann::Index<float , uint32_t , uint32_t > *>(index.get ());
444- if (typed_index)
445- {
446- if (ds.info ().metric == diskann::FAST_L2)
447- {
448- typed_index->optimize_index_layout ();
449- }
450- log (" ----------------------------------------\n " );
451- log (" Running ANNS Tests (k=%u)\n " , conf.anns_k );
452- log (" ----------------------------------------\n " );
453- test_diskann_anns<float , uint32_t , uint32_t >(typed_index, query_data.data , query_num, query_dim,
454- query_dim, ground_truth, conf.anns_k , conf.Lvec ,
455- num_threads);
456- }
520+ // Test the index by loading it from disk (out-of-context testing)
521+ run_anns_test (index_path, ds, conf, num_threads);
457522
458523 log (" %s: Log written to: %s\n " , scenario_name.c_str (), log_file.c_str ());
459524 }
@@ -500,10 +565,13 @@ void run_test_suite(const Dataset &ds, const DatasetConfig &conf, bool force_tes
500565 !diskann::benchmark::file_exists (index_path + " .data" ))
501566 {
502567 run_create_index (index_path, ds, conf, num_threads);
568+ // Generate Graph Statistics after build (out-of-context)
569+ generate_graph_stats (index_path);
503570 }
504571 else if (!only_test)
505572 {
506573 log (" Index files already exist at %s. Skipping build.\n " , index_path.c_str ());
574+ generate_graph_stats (index_path);
507575 }
508576
509577 run_anns_test (index_path, ds, conf, num_threads);
0 commit comments