Skip to content

Commit c21d72a

Browse files
authored
fix: handle empty bitset in IndexBruteForceWrapper::range_search (#1562)
IndexBruteForceWrapper::range_search passed a BitsetViewIDSelector verbatim to brute_force_range_search_impl regardless of whether the underlying BitsetView was empty. When empty, BitsetView::test short-circuits to true (out_id >= num_bits_=0), so is_member returns false for every id and the brute force loop emits zero results. Mirror the empty-bitset guard already present in IndexBruteForceWrapper::search (topk variant): if sel is null, or is a BitsetViewIDSelector wrapping an empty bitset, fall back to IDSelectorAll. This latent bug has existed since #853 but was unreachable for HNSW until #1535 routed the BF range_search path. Now any HNSW range_search with ef >= ntotal * kHnswSearchBFTopkThreshold (= 0.5) and no scalar filter returns 0 results. issue: #1561 Signed-off-by: xianliang.li <xianliang.li@zilliz.com>
1 parent cb02903 commit c21d72a

2 files changed

Lines changed: 120 additions & 2 deletions

File tree

src/index/hnsw/impl/IndexBruteForceWrapper.cc

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,20 @@ IndexBruteForceWrapper::range_search(faiss::idx_t n, const float* x, float radiu
126126
// set up a filter
127127
faiss::IDSelector* __restrict sel = (params == nullptr) ? nullptr : params->sel;
128128

129+
// If `sel` is a knowhere BitsetViewIDSelector wrapping an empty bitset, BitsetView::test
130+
// returns true for every id (out_id >= num_bits_=0 short-circuits), so is_member returns
131+
// false for every id and the BF range_search would emit zero results. Fall back to
132+
// IDSelectorAll in that case, mirroring the guard already present in
133+
// IndexBruteForceWrapper::search above.
134+
const knowhere::BitsetViewIDSelector* __restrict bw_idselector =
135+
dynamic_cast<const knowhere::BitsetViewIDSelector*>(sel);
136+
const bool sel_accepts_all = (sel == nullptr) || (bw_idselector && bw_idselector->bitset_view.empty());
137+
129138
if (faiss::cppcontrib::knowhere::is_similarity_metric(index->metric_type)) {
130139
typename RH_max::SingleResultHandler res_max(bres_max);
131140
res_max.begin(i);
132141

133-
if (sel == nullptr) {
142+
if (sel_accepts_all) {
134143
// Compiler is expected to de-virtualize virtual method calls
135144
faiss::IDSelectorAll sel_all;
136145

@@ -148,7 +157,7 @@ IndexBruteForceWrapper::range_search(faiss::idx_t n, const float* x, float radiu
148157
typename RH_min::SingleResultHandler res_min(bres_min);
149158
res_min.begin(i);
150159

151-
if (sel == nullptr) {
160+
if (sel_accepts_all) {
152161
// Compiler is expected to de-virtualize virtual method calls
153162
faiss::IDSelectorAll sel_all;
154163

tests/ut/test_faiss_hnsw.cc

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2437,3 +2437,112 @@ TEST_CASE("External ID Map for 1-hop Bitset Check", "[external_id_map]") {
24372437
}
24382438
}
24392439
}
2440+
2441+
// Regression test for knowhere#1561 / milvus-io/milvus#44854.
2442+
//
2443+
// Before the fix, IndexBruteForceWrapper::range_search unconditionally passed
2444+
// the caller's IDSelector (a BitsetViewIDSelector wrapping an empty BitsetView
2445+
// when no scalar filter is present) to brute_force_range_search_impl.
2446+
// BitsetView::test(idx) short-circuits to true when num_bits_ == 0 (see
2447+
// bitsetview.h), so BitsetViewIDSelector::is_member returned false for every
2448+
// id and the BF loop emitted zero results.
2449+
//
2450+
// The fix mirrors the empty-bitset fallback already present in the topk
2451+
// IndexBruteForceWrapper::search: if sel is null or wraps an empty bitset,
2452+
// fall back to IDSelectorAll.
2453+
//
2454+
// The BF range_search path is only reachable when
2455+
// `ef >= ntotal * kHnswSearchBFTopkThreshold` (= 0.5), added by #1535 on main
2456+
// and #1536 on 2.6. Setting ef == nb guarantees the BF path is taken.
2457+
TEST_CASE("HNSW RangeSearch BF path with empty bitset", "[faiss_hnsw][range_search][regression]") {
2458+
const int32_t nb = 256;
2459+
const int32_t dim = 16;
2460+
const int32_t nq = 1;
2461+
// ef >= nb * kHnswSearchBFTopkThreshold (= 0.5) forces the BF path.
2462+
const int32_t ef = nb;
2463+
// Pick a distance band inside the sorted topk list (positions [5, 50])
2464+
// so the band is known to contain a non-trivial number of vectors.
2465+
const int32_t band_near_pos = 5;
2466+
const int32_t band_far_pos = 50;
2467+
2468+
// Covers both BF range_search branches: similarity (RH_max) for IP/COSINE
2469+
// and non-similarity (RH_min) for L2. Both had the same latent bug and
2470+
// share the same fix.
2471+
const std::vector<std::string> metrics = {
2472+
knowhere::metric::IP,
2473+
knowhere::metric::L2,
2474+
knowhere::metric::COSINE,
2475+
};
2476+
2477+
const auto version = knowhere::Version::GetCurrentVersion().VersionNumber();
2478+
2479+
for (const auto& metric : metrics) {
2480+
CAPTURE(metric);
2481+
2482+
auto train_ds = GenDataSet(nb, dim, /*seed=*/42);
2483+
auto query_ds = GenDataSet(nq, dim, /*seed=*/43);
2484+
2485+
knowhere::Json base_conf;
2486+
base_conf[knowhere::meta::METRIC_TYPE] = metric;
2487+
base_conf[knowhere::meta::DIM] = dim;
2488+
base_conf[knowhere::meta::ROWS] = nb;
2489+
2490+
// FLAT oracle index
2491+
auto flat_index = knowhere::IndexFactory::Instance()
2492+
.Create<knowhere::fp32>(knowhere::IndexEnum::INDEX_FAISS_IDMAP, version)
2493+
.value();
2494+
REQUIRE(flat_index.Build(train_ds, base_conf) == knowhere::Status::success);
2495+
2496+
// Probe distance distribution with a topk=nb FLAT search to pick a
2497+
// data-driven band; this avoids hard-coding radius/range_filter values.
2498+
// For every supported metric, FLAT topk returns results in "closest
2499+
// first" order, so position 5 is closer than position 50 regardless
2500+
// of whether larger_is_closer holds.
2501+
knowhere::Json topk_conf = base_conf;
2502+
topk_conf[knowhere::meta::TOPK] = nb;
2503+
auto topk_res = flat_index.Search(query_ds, topk_conf, nullptr);
2504+
REQUIRE(topk_res.has_value());
2505+
const float* dists = topk_res.value()->GetDistance();
2506+
const float range_filter = dists[band_near_pos];
2507+
const float radius = dists[band_far_pos];
2508+
2509+
// Build HNSW with ef large enough to trigger the faiss BF range_search path.
2510+
knowhere::Json range_conf = base_conf;
2511+
range_conf[knowhere::meta::INDEX_TYPE] = knowhere::IndexEnum::INDEX_HNSW;
2512+
range_conf[knowhere::indexparam::HNSW_M] = 16;
2513+
range_conf[knowhere::indexparam::EFCONSTRUCTION] = 96;
2514+
range_conf[knowhere::indexparam::EF] = ef;
2515+
range_conf[knowhere::meta::RADIUS] = radius;
2516+
range_conf[knowhere::meta::RANGE_FILTER] = range_filter;
2517+
range_conf[knowhere::meta::RANGE_SEARCH_K] = -1;
2518+
2519+
auto hnsw_index =
2520+
knowhere::IndexFactory::Instance().Create<knowhere::fp32>(knowhere::IndexEnum::INDEX_HNSW, version).value();
2521+
REQUIRE(hnsw_index.Build(train_ds, range_conf) == knowhere::Status::success);
2522+
2523+
// FLAT oracle for the band.
2524+
knowhere::Json flat_range_conf = range_conf;
2525+
flat_range_conf[knowhere::meta::INDEX_TYPE] = knowhere::IndexEnum::INDEX_FAISS_IDMAP;
2526+
auto flat_range_res = flat_index.RangeSearch(query_ds, flat_range_conf, nullptr);
2527+
REQUIRE(flat_range_res.has_value());
2528+
const size_t flat_total = flat_range_res.value()->GetLims()[nq];
2529+
REQUIRE(flat_total > 0); // sanity: oracle must see vectors inside the band
2530+
2531+
// HNSW range_search with empty bitset (the regression target). A
2532+
// nullptr-initialized BitsetView is empty() and triggers the bug path.
2533+
knowhere::BitsetView empty_bitset_view = nullptr;
2534+
REQUIRE(empty_bitset_view.empty());
2535+
2536+
auto hnsw_range_res = hnsw_index.RangeSearch(query_ds, range_conf, empty_bitset_view);
2537+
REQUIRE(hnsw_range_res.has_value());
2538+
const size_t hnsw_total = hnsw_range_res.value()->GetLims()[nq];
2539+
2540+
INFO("flat_total=" << flat_total << " hnsw_total=" << hnsw_total);
2541+
2542+
// Critical regression assertion: without the fix, range_search returned
2543+
// zero here because BitsetViewIDSelector::is_member rejected every id.
2544+
// After the fix, the BF path is exhaustive and returns a result set
2545+
// that covers the FLAT oracle.
2546+
REQUIRE(hnsw_total > 0);
2547+
}
2548+
}

0 commit comments

Comments
 (0)