From 7fe04abe1efd4653dca8047c6b024eccfa2d05a5 Mon Sep 17 00:00:00 2001 From: "min.tian" Date: Fri, 6 Jun 2025 16:45:52 +0800 Subject: [PATCH] use iterator-based rangesearch if support ann_iterator Signed-off-by: min.tian --- src/index/hnsw/faiss_hnsw.cc | 41 +++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/src/index/hnsw/faiss_hnsw.cc b/src/index/hnsw/faiss_hnsw.cc index 35729fa9d..a27c9876f 100644 --- a/src/index/hnsw/faiss_hnsw.cc +++ b/src/index/hnsw/faiss_hnsw.cc @@ -748,8 +748,13 @@ class FaissHnswIterator : public IndexIterator { FaissHnswIterator(const std::shared_ptr& index_in, const std::shared_ptr>& labels_in, std::unique_ptr&& query_in, const BitsetView& bitset_in, const int32_t ef_in, bool larger_is_closer, - const float refine_ratio = 0.5f, bool use_knowhere_search_pool = true) - : IndexIterator(larger_is_closer, use_knowhere_search_pool, refine_ratio), index{index_in}, labels{labels_in} { + const float refine_ratio = 0.5f, const std::vector& label_to_internal_offset_in = {}, + const uint32_t mv_base_offset_in = 0, bool use_knowhere_search_pool = true) + : IndexIterator(larger_is_closer, use_knowhere_search_pool, refine_ratio), + index{index_in}, + labels{labels_in}, + label_to_internal_offset(label_to_internal_offset_in), + mv_base_offset(mv_base_offset_in) { workspace.accumulated_alpha = (bitset_in.count() >= (index->ntotal * HnswSearchThresholds::kHnswSearchKnnBFFilterThreshold)) ? std::numeric_limits::max() @@ -989,13 +994,18 @@ class FaissHnswIterator : public IndexIterator { float raw_distance(int64_t id) override { - const float refined_distance = workspace.qdis_refine->operator()(id); - return refined_distance; + if (label_to_internal_offset.empty()) { + return workspace.qdis_refine->operator()(id); + } + auto mv_internal_offset = label_to_internal_offset[id] - mv_base_offset; + return workspace.qdis_refine->operator()(mv_internal_offset); } private: std::shared_ptr index; std::shared_ptr> labels; + const std::vector& label_to_internal_offset; // internal_offset = label_to_internal_offset[label_id]; + const uint32_t mv_base_offset; // mv_internal_offset = internal_offset - mv_base_offset; FaissHnswIteratorWorkspace workspace; }; @@ -1328,6 +1338,10 @@ class BaseFaissRegularIndexHNSWNode : public BaseFaissRegularIndexNode { expected RangeSearch(const DataSetPtr dataset, std::unique_ptr cfg, const BitsetView& bitset) const override { + // if support ann_iterator, use iterator-based range_search (IndexNode::RangeSearch) + if (is_ann_iterator_supported()) { + return IndexNode::RangeSearch(dataset, std::move(cfg), bitset); + } if (this->indexes.empty()) { return expected::Err(Status::empty_index, "index not loaded"); } @@ -1637,7 +1651,15 @@ class BaseFaissRegularIndexHNSWNode : public BaseFaissRegularIndexNode { } public: - // + bool + is_ann_iterator_supported() const { + if (data_format != DataFormatEnum::fp32 && data_format != DataFormatEnum::fp16 && + data_format != DataFormatEnum::bf16) { + return false; + } + return true; + } + expected> AnnIterator(const DataSetPtr dataset, std::unique_ptr cfg, const BitsetView& bitset, bool use_knowhere_search_pool) const override { @@ -1646,8 +1668,7 @@ class BaseFaissRegularIndexHNSWNode : public BaseFaissRegularIndexNode { return expected>::Err(Status::empty_index, "index not loaded"); } - if (data_format != DataFormatEnum::fp32 && data_format != DataFormatEnum::fp16 && - data_format != DataFormatEnum::bf16) { + if (!is_ann_iterator_supported()) { LOG_KNOWHERE_ERROR_ << "Unsupported data format"; return expected>::Err(Status::invalid_args, "unsupported data format"); } @@ -1698,9 +1719,13 @@ class BaseFaissRegularIndexHNSWNode : public BaseFaissRegularIndexNode { // create an iterator and initialize it // refine is not needed for flat // hnsw_cfg.iterator_refine_ratio.value_or(0.5f) + + uint32_t mv_base_offset = index_rows_sum.size() > index_id ? index_rows_sum[index_id] : 0; + auto it = std::make_shared( indexes[index_id], labels.empty() ? nullptr : labels[index_id], std::move(cur_query), bitset, ef, - larger_is_closer, iterator_refine_ratio, use_knowhere_search_pool); + larger_is_closer, iterator_refine_ratio, label_to_internal_offset, mv_base_offset, + use_knowhere_search_pool); // store vec[i] = it; }