@@ -2437,3 +2437,112 @@ TEST_CASE("External ID Map for 1-hop Bitset Check", "[external_id_map]") {
24372437 }
24382438 }
24392439}
2440+
2441+ // Regression test for knowhere#1561 / milvus-io/milvus#44854.
2442+ //
2443+ // Before the fix, IndexBruteForceWrapper::range_search unconditionally passed
2444+ // the caller's IDSelector (a BitsetViewIDSelector wrapping an empty BitsetView
2445+ // when no scalar filter is present) to brute_force_range_search_impl.
2446+ // BitsetView::test(idx) short-circuits to true when num_bits_ == 0 (see
2447+ // bitsetview.h), so BitsetViewIDSelector::is_member returned false for every
2448+ // id and the BF loop emitted zero results.
2449+ //
2450+ // The fix mirrors the empty-bitset fallback already present in the topk
2451+ // IndexBruteForceWrapper::search: if sel is null or wraps an empty bitset,
2452+ // fall back to IDSelectorAll.
2453+ //
2454+ // The BF range_search path is only reachable when
2455+ // `ef >= ntotal * kHnswSearchBFTopkThreshold` (= 0.5), added by #1535 on main
2456+ // and #1536 on 2.6. Setting ef == nb guarantees the BF path is taken.
2457+ TEST_CASE (" HNSW RangeSearch BF path with empty bitset" , " [faiss_hnsw][range_search][regression]" ) {
2458+ const int32_t nb = 256 ;
2459+ const int32_t dim = 16 ;
2460+ const int32_t nq = 1 ;
2461+ // ef >= nb * kHnswSearchBFTopkThreshold (= 0.5) forces the BF path.
2462+ const int32_t ef = nb;
2463+ // Pick a distance band inside the sorted topk list (positions [5, 50])
2464+ // so the band is known to contain a non-trivial number of vectors.
2465+ const int32_t band_near_pos = 5 ;
2466+ const int32_t band_far_pos = 50 ;
2467+
2468+ // Covers both BF range_search branches: similarity (RH_max) for IP/COSINE
2469+ // and non-similarity (RH_min) for L2. Both had the same latent bug and
2470+ // share the same fix.
2471+ const std::vector<std::string> metrics = {
2472+ knowhere::metric::IP,
2473+ knowhere::metric::L2,
2474+ knowhere::metric::COSINE,
2475+ };
2476+
2477+ const auto version = knowhere::Version::GetCurrentVersion ().VersionNumber ();
2478+
2479+ for (const auto & metric : metrics) {
2480+ CAPTURE (metric);
2481+
2482+ auto train_ds = GenDataSet (nb, dim, /* seed=*/ 42 );
2483+ auto query_ds = GenDataSet (nq, dim, /* seed=*/ 43 );
2484+
2485+ knowhere::Json base_conf;
2486+ base_conf[knowhere::meta::METRIC_TYPE] = metric;
2487+ base_conf[knowhere::meta::DIM] = dim;
2488+ base_conf[knowhere::meta::ROWS] = nb;
2489+
2490+ // FLAT oracle index
2491+ auto flat_index = knowhere::IndexFactory::Instance ()
2492+ .Create <knowhere::fp32>(knowhere::IndexEnum::INDEX_FAISS_IDMAP, version)
2493+ .value ();
2494+ REQUIRE (flat_index.Build (train_ds, base_conf) == knowhere::Status::success);
2495+
2496+ // Probe distance distribution with a topk=nb FLAT search to pick a
2497+ // data-driven band; this avoids hard-coding radius/range_filter values.
2498+ // For every supported metric, FLAT topk returns results in "closest
2499+ // first" order, so position 5 is closer than position 50 regardless
2500+ // of whether larger_is_closer holds.
2501+ knowhere::Json topk_conf = base_conf;
2502+ topk_conf[knowhere::meta::TOPK] = nb;
2503+ auto topk_res = flat_index.Search (query_ds, topk_conf, nullptr );
2504+ REQUIRE (topk_res.has_value ());
2505+ const float * dists = topk_res.value ()->GetDistance ();
2506+ const float range_filter = dists[band_near_pos];
2507+ const float radius = dists[band_far_pos];
2508+
2509+ // Build HNSW with ef large enough to trigger the faiss BF range_search path.
2510+ knowhere::Json range_conf = base_conf;
2511+ range_conf[knowhere::meta::INDEX_TYPE] = knowhere::IndexEnum::INDEX_HNSW;
2512+ range_conf[knowhere::indexparam::HNSW_M] = 16 ;
2513+ range_conf[knowhere::indexparam::EFCONSTRUCTION] = 96 ;
2514+ range_conf[knowhere::indexparam::EF] = ef;
2515+ range_conf[knowhere::meta::RADIUS] = radius;
2516+ range_conf[knowhere::meta::RANGE_FILTER] = range_filter;
2517+ range_conf[knowhere::meta::RANGE_SEARCH_K] = -1 ;
2518+
2519+ auto hnsw_index =
2520+ knowhere::IndexFactory::Instance ().Create <knowhere::fp32>(knowhere::IndexEnum::INDEX_HNSW, version).value ();
2521+ REQUIRE (hnsw_index.Build (train_ds, range_conf) == knowhere::Status::success);
2522+
2523+ // FLAT oracle for the band.
2524+ knowhere::Json flat_range_conf = range_conf;
2525+ flat_range_conf[knowhere::meta::INDEX_TYPE] = knowhere::IndexEnum::INDEX_FAISS_IDMAP;
2526+ auto flat_range_res = flat_index.RangeSearch (query_ds, flat_range_conf, nullptr );
2527+ REQUIRE (flat_range_res.has_value ());
2528+ const size_t flat_total = flat_range_res.value ()->GetLims ()[nq];
2529+ REQUIRE (flat_total > 0 ); // sanity: oracle must see vectors inside the band
2530+
2531+ // HNSW range_search with empty bitset (the regression target). A
2532+ // nullptr-initialized BitsetView is empty() and triggers the bug path.
2533+ knowhere::BitsetView empty_bitset_view = nullptr ;
2534+ REQUIRE (empty_bitset_view.empty ());
2535+
2536+ auto hnsw_range_res = hnsw_index.RangeSearch (query_ds, range_conf, empty_bitset_view);
2537+ REQUIRE (hnsw_range_res.has_value ());
2538+ const size_t hnsw_total = hnsw_range_res.value ()->GetLims ()[nq];
2539+
2540+ INFO (" flat_total=" << flat_total << " hnsw_total=" << hnsw_total);
2541+
2542+ // Critical regression assertion: without the fix, range_search returned
2543+ // zero here because BitsetViewIDSelector::is_member rejected every id.
2544+ // After the fix, the BF path is exhaustive and returns a result set
2545+ // that covers the FLAT oracle.
2546+ REQUIRE (hnsw_total > 0 );
2547+ }
2548+ }
0 commit comments