Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 34 additions & 26 deletions cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,51 +265,59 @@ void parse_build_param(const nlohmann::json& conf, cuvs::neighbors::cagra::index
params.guarantee_connectivity = conf.at("guarantee_connectivity");
}

// Override the graph_build_algo if requested explicitly
if (conf.contains("variable_graph_degree_fraction")) {
params.variable_graph_degree_fraction = conf.at("variable_graph_degree_fraction");
}

// Extract build-algo-specific parameters
nlohmann::json ivf_pq_build_conf = collect_conf_with_prefix(conf, "ivf_pq_build_");
nlohmann::json ivf_pq_search_conf = collect_conf_with_prefix(conf, "ivf_pq_search_");
nlohmann::json nn_descent_conf = collect_conf_with_prefix(conf, "nn_descent_");
nlohmann::json ace_conf = collect_conf_with_prefix(conf, "ace_");

// Determine and initialize graph build algorithm.
// Priority 1: explicit "graph_build_algo" config key.
// Priority 2: infer from algorithm-specific prefixed config keys (only when monostate).
// Priority 3: leave as-is (from prior heuristics or monostate for AUTO at build time).
std::string graph_build_algo;
if (conf.contains("graph_build_algo")) {
if (conf.at("graph_build_algo") == "IVF_PQ") {
graph_build_algo = conf.at("graph_build_algo");
} else if (std::holds_alternative<std::monostate>(params.graph_build_params)) {
if (!ivf_pq_build_conf.empty() || !ivf_pq_search_conf.empty()) {
graph_build_algo = "IVF_PQ";
} else if (!nn_descent_conf.empty()) {
graph_build_algo = "NN_DESCENT";
} else if (!ace_conf.empty()) {
graph_build_algo = "ACE";
}
// else: leave as monostate → AUTO in cagra_build.cuh
}

if (!graph_build_algo.empty()) {
if (graph_build_algo == "IVF_PQ") {
if (!std::holds_alternative<cuvs::neighbors::graph_build_params::ivf_pq_params>(
params.graph_build_params)) {
params.graph_build_params = cuvs::neighbors::graph_build_params::ivf_pq_params{};
}
} else if (conf.at("graph_build_algo") == "NN_DESCENT") {
} else if (graph_build_algo == "NN_DESCENT") {
if (!std::holds_alternative<cuvs::neighbors::graph_build_params::nn_descent_params>(
params.graph_build_params)) {
params.graph_build_params = cuvs::neighbors::graph_build_params::nn_descent_params{};
params.graph_build_params = cuvs::neighbors::graph_build_params::nn_descent_params(
params.intermediate_graph_degree, params.metric);
}
} else if (conf.at("graph_build_algo") == "ACE") {
} else if (graph_build_algo == "ACE") {
if (!std::holds_alternative<cuvs::neighbors::graph_build_params::ace_params>(
params.graph_build_params)) {
params.graph_build_params = cuvs::neighbors::graph_build_params::ace_params{};
}
} else if (conf.at("graph_build_algo") == "ITERATIVE_SEARCH") {
} else if (graph_build_algo == "ITERATIVE_SEARCH") {
if (!std::holds_alternative<cuvs::neighbors::graph_build_params::iterative_search_params>(
params.graph_build_params)) {
params.graph_build_params = cuvs::neighbors::graph_build_params::iterative_search_params{};
}
}
}

// Parse build-algo-specific parameters and use them to decide on the algo type
nlohmann::json ivf_pq_build_conf = collect_conf_with_prefix(conf, "ivf_pq_build_");
nlohmann::json ivf_pq_search_conf = collect_conf_with_prefix(conf, "ivf_pq_search_");
nlohmann::json nn_descent_conf = collect_conf_with_prefix(conf, "nn_descent_");
nlohmann::json ace_conf = collect_conf_with_prefix(conf, "ace_");

// When graph_build_algo is not specified, leave graph_build_params as monostate so the
// CAGRA build uses AUTO selection (NN_DESCENT or IVF_PQ based on dataset/heuristics).
// Only infer from algo-specific config keys when present.
if (std::holds_alternative<std::monostate>(params.graph_build_params)) {
if (!ivf_pq_build_conf.empty() || !ivf_pq_search_conf.empty()) {
params.graph_build_params = cuvs::neighbors::graph_build_params::ivf_pq_params{};
} else if (!nn_descent_conf.empty()) {
params.graph_build_params = cuvs::neighbors::graph_build_params::nn_descent_params{};
} else if (!ace_conf.empty()) {
params.graph_build_params = cuvs::neighbors::graph_build_params::ace_params{};
}
// else: leave as monostate → AUTO in cagra_build.cuh
}

// Apply build-algo-specific parameters
std::visit(
[&](auto& arg) {
Expand Down
18 changes: 18 additions & 0 deletions cpp/include/cuvs/neighbors/cagra.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,24 @@ struct index_params : cuvs::neighbors::index_params {
size_t intermediate_graph_degree = 128;
/** Degree of output graph. */
size_t graph_degree = 64;
/**
* Fraction of output graph_degree to define the minimum output graph degree,
* allowing variable-degree neighbor graphs.
*
* This fraction is used as the target for low-detour edges
* during the pruning step. Must be in (0, 1]. The default value of 1.0
* disables variable-degree logic (normal CAGRA behavior). Values < 1.0
* enable variable-degree graphs: the optimize step finds the minimum detour
* threshold that covers at least ceil(graph_degree * fraction) edges per node,
* then lets reverse edges expand the degree further. Unused slots are filled
* with a sentinel value (IdxT(-1)).
*
* This is intended for the CAGRA-to-HNSW conversion pipeline: the resulting
* graph, when imported into hnswlib, produces variable-degree neighbor lists
* similar to natively-built HNSW graphs. Do not use this with CAGRA's native
* GPU search.
*/
double variable_graph_degree_fraction = 1.0;
/**
* Specify compression parameters if compression is desired. If set, overrides the
* attach_dataset_on_build (and the compressed dataset is always added to the index).
Expand Down
12 changes: 7 additions & 5 deletions cpp/src/neighbors/cagra.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -40,13 +40,15 @@ cagra::index_params index_params::from_hnsw_params(raft::matrix_extent<int64_t>
cagra::index_params params;
switch (heuristic) {
case hnsw_heuristic_type::SAME_GRAPH_FOOTPRINT:
params.graph_degree = M * 2;
params.intermediate_graph_degree = M * 3;
params.graph_degree = M * 2;
params.intermediate_graph_degree = M * 3;
params.variable_graph_degree_fraction = 0.35;
break;
case hnsw_heuristic_type::SIMILAR_SEARCH_PERFORMANCE:
default:
params.graph_degree = 2 + M * 2 / 3;
params.intermediate_graph_degree = M + M * ef_construction / 256;
params.graph_degree = M;
params.intermediate_graph_degree = M + M * ef_construction / 256;
params.variable_graph_degree_fraction = 0.7;
break;
}
params.graph_build_params =
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/neighbors/cagra.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,11 @@ void optimize(
raft::resources const& res,
raft::mdspan<IdxT, raft::matrix_extent<int64_t>, raft::row_major, g_accessor> knn_graph,
raft::host_matrix_view<IdxT, int64_t, raft::row_major> new_graph,
const bool guarantee_connectivity = false)
const bool guarantee_connectivity = false,
const double variable_graph_degree_fraction = 1.0)
{
detail::optimize(res, knn_graph, new_graph, guarantee_connectivity);
detail::optimize(
res, knn_graph, new_graph, guarantee_connectivity, variable_graph_degree_fraction);
}

template <typename T,
Expand Down
24 changes: 18 additions & 6 deletions cpp/src/neighbors/detail/cagra/cagra_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1904,7 +1904,8 @@ void optimize(
raft::resources const& res,
raft::mdspan<IdxT, raft::matrix_extent<int64_t>, raft::row_major, g_accessor> knn_graph,
raft::host_matrix_view<IdxT, int64_t, raft::row_major> new_graph,
const bool guarantee_connectivity = false)
const bool guarantee_connectivity = false,
const double variable_graph_degree_fraction = 1.0)
{
using internal_IdxT = typename std::make_unsigned<IdxT>::type;

Expand All @@ -1921,8 +1922,12 @@ void optimize(
knn_graph.extent(0),
knn_graph.extent(1));

cagra::detail::graph::optimize(
res, knn_graph_internal, new_graph_internal, guarantee_connectivity);
cagra::detail::graph::optimize(res,
knn_graph_internal,
new_graph_internal,
guarantee_connectivity,
true,
variable_graph_degree_fraction);
}

// RAII wrapper for allocating memory with Transparent HugePage
Expand Down Expand Up @@ -2141,8 +2146,11 @@ auto iterative_build_graph(
auto next_graph_size = curr_query_size;
cagra_graph = raft::make_host_matrix<IdxT, int64_t>(0, 0); // delete existing grahp
cagra_graph = raft::make_host_matrix<IdxT, int64_t>(next_graph_size, next_graph_degree);
optimize<IdxT>(
res, neighbors_view, cagra_graph.view(), flag_last ? params.guarantee_connectivity : 0);
optimize<IdxT>(res,
neighbors_view,
cagra_graph.view(),
flag_last ? params.guarantee_connectivity : false,
flag_last ? params.variable_graph_degree_fraction : 1.0);

auto end = std::chrono::high_resolution_clock::now();
auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
Expand Down Expand Up @@ -2262,7 +2270,11 @@ index<T, IdxT> build(
cagra_graph = raft::make_host_matrix<IdxT, int64_t>(dataset.extent(0), graph_degree);

RAFT_LOG_TRACE("optimizing graph");
optimize<IdxT>(res, knn_graph->view(), cagra_graph.view(), params.guarantee_connectivity);
optimize<IdxT>(res,
knn_graph->view(),
cagra_graph.view(),
params.guarantee_connectivity,
params.variable_graph_degree_fraction);

// free intermediate graph before trying to create the index
knn_graph.reset();
Expand Down
10 changes: 8 additions & 2 deletions cpp/src/neighbors/detail/cagra/cagra_serialize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,14 @@ void serialize_to_hnswlib(
size_t bytes_written = 0;
float GiB = 1 << 30;
for (std::size_t i = 0; i < index_.size(); i++) {
auto graph_degree = static_cast<int>(index_.graph_degree());
os.write(reinterpret_cast<char*>(&graph_degree), sizeof(int));
int actual_degree = static_cast<int>(index_.graph_degree());
for (int j = 0; j < actual_degree; j++) {
if (host_graph(i, j) == static_cast<IdxT>(-1)) {
actual_degree = j;
break;
}
}
os.write(reinterpret_cast<char*>(&actual_degree), sizeof(int));

IdxT* graph_row = &host_graph(i, 0);
os.write(reinterpret_cast<char*>(graph_row), sizeof(IdxT) * index_.graph_degree());
Expand Down
67 changes: 52 additions & 15 deletions cpp/src/neighbors/detail/cagra/graph_core.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1151,26 +1151,34 @@ void optimize(
raft::resources const& res,
raft::mdspan<IdxT, raft::matrix_extent<int64_t>, raft::row_major, g_accessor> knn_graph,
raft::host_matrix_view<IdxT, int64_t, raft::row_major> new_graph,
const bool guarantee_connectivity = true,
const bool use_gpu = true)
const bool guarantee_connectivity = true,
const bool use_gpu = true,
const double variable_graph_degree_fraction = 1.0)
{
RAFT_LOG_DEBUG(
"# Pruning kNN graph (size=%lu, degree=%lu)\n", knn_graph.extent(0), knn_graph.extent(1));
auto large_tmp_mr = raft::resource::get_large_workspace_resource(res);

RAFT_EXPECTS(knn_graph.extent(0) == new_graph.extent(0),
"Each input array is expected to have the same number of rows");
RAFT_EXPECTS(new_graph.extent(1) <= knn_graph.extent(1),
"output graph cannot have more columns than input graph");
// const uint64_t input_graph_degree = knn_graph.extent(1);
const uint64_t knn_graph_degree = knn_graph.extent(1);
const uint64_t output_graph_degree = new_graph.extent(1);
const uint64_t graph_size = new_graph.extent(0);
const uint64_t knn_graph_degree = knn_graph.extent(1);
const uint64_t output_graph_degree = new_graph.extent(1);
const uint64_t graph_size = new_graph.extent(0);
const uint64_t target_pruned_degree = std::max<uint64_t>(
1, static_cast<uint64_t>(std::ceil(output_graph_degree * variable_graph_degree_fraction)));
// auto input_graph_ptr = knn_graph.data_handle();
auto output_graph_ptr = new_graph.data_handle();
RAFT_LOG_INFO("# Pruning kNN graph (size=%lu, degree=%lu, target_pruned_degree=%lu)\n",
graph_size,
knn_graph_degree,
target_pruned_degree);
raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
"cagra::graph::optimize(%zu, %zu, %u)", graph_size, knn_graph_degree, output_graph_degree);

const bool variable_graph_degree = (target_pruned_degree < output_graph_degree);
auto natural_degree_vec = raft::make_host_vector<uint32_t, int64_t>(graph_size);

// MST optimization
auto mst_graph = raft::make_host_matrix<IdxT, int64_t, raft::row_major>(0, 0);
auto mst_graph_num_edges = raft::make_host_vector<uint32_t, int64_t>(graph_size);
Expand Down Expand Up @@ -1333,8 +1341,9 @@ void optimize(
for (uint64_t i = 0; i < graph_size; i++) {
// Find the `output_graph_degree` smallest detourable count nodes by checking the detourable
// count of the neighbors while increasing the target detourable count from zero.
uint64_t pk = 0;
uint32_t num_detour = 0;
uint64_t pk = 0;
uint32_t num_detour = 0;
uint64_t num_low_detour = 0;
for (uint32_t l = 0; l < knn_graph_degree && pk < output_graph_degree; l++) {
uint32_t next_num_detour = std::numeric_limits<uint32_t>::max();
for (uint64_t k = 0; k < knn_graph_degree; k++) {
Expand Down Expand Up @@ -1362,6 +1371,7 @@ void optimize(
}
if (pk >= output_graph_degree) break;
}
if (num_low_detour < target_pruned_degree) { num_low_detour = pk; }
if (pk >= output_graph_degree) break;

if (next_num_detour == std::numeric_limits<uint32_t>::max()) {
Expand All @@ -1379,6 +1389,9 @@ void optimize(
i);
invalid_neighbor_list = true;
}
natural_degree_vec(i) = variable_graph_degree
? std::min<uint64_t>(num_low_detour, output_graph_degree)
: output_graph_degree;
}
RAFT_EXPECTS(
!invalid_neighbor_list,
Expand Down Expand Up @@ -1418,8 +1431,9 @@ void optimize(
for (uint64_t k = 0; k < output_graph_degree; k++) {
#pragma omp parallel for
for (uint64_t i = 0; i < graph_size; i++) {
// dest_nodes.data_handle()[i] = output_graph_ptr[k + (output_graph_degree * i)];
dest_nodes(i) = output_graph_ptr[k + (output_graph_degree * i)];
dest_nodes(i) = (k < natural_degree_vec(i))
? output_graph_ptr[k + (output_graph_degree * i)]
: static_cast<IdxT>(graph_size);
}
raft::resource::sync_stream(res);

Expand Down Expand Up @@ -1460,6 +1474,8 @@ void optimize(
bool check_num_protected_edges = true;
#pragma omp parallel for
for (uint64_t i = 0; i < graph_size; i++) {
auto effective_degree = variable_graph_degree ? natural_degree_vec(i) : output_graph_degree;

auto my_rev_graph = rev_graph.data_handle() + (output_graph_degree * i);
auto my_out_graph = output_graph_ptr + (output_graph_degree * i);

Expand Down Expand Up @@ -1498,10 +1514,9 @@ void optimize(
}
}

const auto num_protected_edges =
std::max<uint64_t>(mst_graph_num_edges_ptr[i], output_graph_degree / 2);
if (num_protected_edges > output_graph_degree) { check_num_protected_edges = false; }
if (num_protected_edges == output_graph_degree) continue;
const auto num_protected_edges = std::max<uint64_t>(
mst_graph_num_edges_ptr[i], std::min<uint64_t>(effective_degree, output_graph_degree / 2));
if (num_protected_edges > effective_degree) { check_num_protected_edges = false; }

// Replace some edges of the output graph with edges of the reverse graph.
auto kr = std::min<uint32_t>(rev_graph_count.data_handle()[i], output_graph_degree);
Expand All @@ -1510,13 +1525,22 @@ void optimize(
if (my_rev_graph[kr] < graph_size) {
uint64_t pos = pos_in_array<IdxT>(my_rev_graph[kr], my_out_graph, output_graph_degree);
if (pos < num_protected_edges) { continue; }

uint64_t num_shift = pos - num_protected_edges;
if (pos >= output_graph_degree) {
num_shift = output_graph_degree - num_protected_edges - 1;
}
shift_array<IdxT>(my_out_graph + num_protected_edges, num_shift);
my_out_graph[num_protected_edges] = my_rev_graph[kr];
if (effective_degree < output_graph_degree) { effective_degree++; }
}
}

if (variable_graph_degree) {
for (uint32_t j = effective_degree; j < output_graph_degree; j++) {
my_out_graph[j] = static_cast<IdxT>(-1);
}
natural_degree_vec(i) = effective_degree;
}

// If guarantee_connectivity == true, move the output neighbor list from the temporal list to
Expand All @@ -1532,6 +1556,17 @@ void optimize(
"Failed to merge the MST, pruned, and reverse edge graphs. Some nodes have too "
"many MST optimization edges.");

if (variable_graph_degree) {
uint64_t total_natural = 0;
#pragma omp parallel for reduction(+ : total_natural)
for (uint64_t i = 0; i < graph_size; i++) {
total_natural += natural_degree_vec(i);
}
RAFT_LOG_INFO("# Variable graph degree: avg natural degree = %.2f / %u",
static_cast<double>(total_natural) / graph_size,
output_graph_degree);
}

const double time_replace_end = cur_time();
RAFT_LOG_DEBUG("# Replacing edges time: %.1lf ms",
(time_replace_end - time_replace_start) * 1000.0);
Expand Down Expand Up @@ -1607,6 +1642,8 @@ void optimize(
for (uint32_t j = 0; j < output_graph_degree; j++) {
const auto neighbor_a = my_out_graph[j];

if (neighbor_a == static_cast<IdxT>(-1)) { continue; }

// Check oor
if (neighbor_a > graph_size) {
num_oor++;
Expand Down
Loading
Loading