Skip to content

Commit 4746db1

Browse files
authored
Fix scann filter search performance and cleanup ensure_topk_full config (#1179)
Signed-off-by: chasingegg <chao.gao@zilliz.com>
1 parent 0191ec6 commit 4746db1

5 files changed

Lines changed: 39 additions & 17 deletions

File tree

src/index/ivf/ivf.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,7 @@ IvfIndexNode<DataType, IndexType>::Search(const DataSetPtr dataset, std::unique_
810810
faiss::IVFSearchParameters base_search_params;
811811
base_search_params.sel = id_selector;
812812
base_search_params.nprobe = nprobe;
813-
base_search_params.ensure_topk_full = ivf_cfg.ensure_topk_full.value();
813+
base_search_params.ensure_topk_full = scann_cfg.ensure_topk_full.value();
814814
if (base_search_params.ensure_topk_full) {
815815
if (auto base_index_ptr = reinterpret_cast<faiss::IndexIVFPQFastScan*>(index_->base_index)) {
816816
auto nlist = base_index_ptr->nlist;

src/index/ivf/ivf_config.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class IvfConfig : public BaseConfig {
2727
CFG_INT nlist;
2828
CFG_INT nprobe;
2929
CFG_BOOL use_elkan;
30-
CFG_BOOL ensure_topk_full; // only take affect on temp index(IVF_FLAT_CC) now
30+
CFG_BOOL ensure_topk_full; // internal config, used for temp index
3131
CFG_INT max_empty_result_buckets;
3232
KNOHWERE_DECLARE_CONFIG(IvfConfig) {
3333
KNOWHERE_CONFIG_DECLARE_FIELD(nlist)
@@ -109,6 +109,7 @@ class ScannConfig : public IvfFlatConfig {
109109
CFG_INT reorder_k;
110110
CFG_BOOL with_raw_data;
111111
CFG_INT sub_dim;
112+
CFG_BOOL ensure_topk_full;
112113
KNOHWERE_DECLARE_CONFIG(ScannConfig) {
113114
KNOWHERE_CONFIG_DECLARE_FIELD(reorder_k)
114115
.description("reorder k used for refining")
@@ -125,6 +126,10 @@ class ScannConfig : public IvfFlatConfig {
125126
.set_default(2)
126127
.for_train()
127128
.set_range(1, 65536);
129+
KNOWHERE_CONFIG_DECLARE_FIELD(ensure_topk_full)
130+
.set_default(false)
131+
.description("whether to make sure topk results full")
132+
.for_search();
128133
}
129134

130135
Status

thirdparty/faiss/faiss/impl/pq4_fast_scan_search_1.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,12 +125,29 @@ void accumulate_fixed_blocks(
125125
ResultHandler& res,
126126
const Scaler& scaler) {
127127
constexpr int bbs = 32 * BB;
128-
for (size_t j0 = 0; j0 < nb; j0 += bbs) {
128+
for (size_t j0 = 0; j0 < nb; j0 += bbs, codes += bbs * nsq / 2) {
129+
res.set_block_origin(0, j0);
130+
// skip computing distances if all vectors inside a block are filtered out
131+
if constexpr(has_sel_member_v<ResultHandler>) {
132+
if (res.sel != nullptr) { // we have filter here
133+
bool skip_flag = true;
134+
for (size_t jj = 0; jj < std::min<size_t>(bbs, res.ntotal - j0);
135+
jj++) {
136+
auto real_idx = res.adjust_id(0, jj);
137+
if (res.sel->is_member(real_idx)) { // id is not filtered out, can not skip computing
138+
skip_flag = false;
139+
break;
140+
}
141+
}
142+
143+
if (skip_flag) {
144+
continue;
145+
}
146+
}
147+
}
129148
FixedStorageHandler<NQ, 2 * BB> res2;
130149
kernel_accumulate_block<NQ, BB>(nsq, codes, LUT, res2, scaler);
131-
res.set_block_origin(0, j0);
132150
res2.to_other_handler(res);
133-
codes += bbs * nsq / 2;
134151
}
135152
}
136153

thirdparty/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -113,18 +113,6 @@ void kernel_accumulate_block(
113113
}
114114
}
115115

116-
namespace {
117-
118-
// a helper that checks whether a ResultHandler has a .sel member
119-
template<typename T, typename = void>
120-
struct has_sel_member : std::false_type {};
121-
template<typename T>
122-
struct has_sel_member<T, std::void_t<decltype(T::sel)>> : std::true_type {};
123-
template<typename T>
124-
inline constexpr bool has_sel_member_v = has_sel_member<T>::value;
125-
126-
}
127-
128116
// handle at most 4 blocks of queries
129117
template <int QBS, class ResultHandler, class Scaler>
130118
void accumulate_q_4step(

thirdparty/faiss/faiss/impl/simd_result_handlers.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,18 @@
2626

2727
namespace faiss {
2828

29+
namespace {
30+
31+
// a helper that checks whether a ResultHandler has a .sel member
32+
template<typename T, typename = void>
33+
struct has_sel_member : std::false_type {};
34+
template<typename T>
35+
struct has_sel_member<T, std::void_t<decltype(T::sel)>> : std::true_type {};
36+
template<typename T>
37+
inline constexpr bool has_sel_member_v = has_sel_member<T>::value;
38+
39+
}
40+
2941
struct SIMDResultHandler {
3042
// used to dispatch templates
3143
bool is_CMax = false;

0 commit comments

Comments
 (0)