Skip to content

Commit c884fa6

Browse files
committed
Fix scann filter search performance and cleanup ensure_topk_full config
Signed-off-by: chasingegg <chao.gao@zilliz.com>
1 parent f8c4269 commit c884fa6

5 files changed

Lines changed: 39 additions & 17 deletions

File tree

src/index/ivf/ivf.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -805,7 +805,7 @@ IvfIndexNode<DataType, IndexType>::Search(const DataSetPtr dataset, std::unique_
805805
faiss::IVFSearchParameters base_search_params;
806806
base_search_params.sel = id_selector;
807807
base_search_params.nprobe = nprobe;
808-
base_search_params.ensure_topk_full = ivf_cfg.ensure_topk_full.value();
808+
base_search_params.ensure_topk_full = scann_cfg.ensure_topk_full.value();
809809
if (base_search_params.ensure_topk_full) {
810810
if (auto base_index_ptr = reinterpret_cast<faiss::IndexIVFPQFastScan*>(index_->base_index)) {
811811
auto nlist = base_index_ptr->nlist;

src/index/ivf/ivf_config.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class IvfConfig : public BaseConfig {
2121
CFG_INT nlist;
2222
CFG_INT nprobe;
2323
CFG_BOOL use_elkan;
24-
CFG_BOOL ensure_topk_full; // only take affect on temp index(IVF_FLAT_CC) now
24+
CFG_BOOL ensure_topk_full; // internal config, used for temp index
2525
CFG_INT max_empty_result_buckets;
2626
KNOHWERE_DECLARE_CONFIG(IvfConfig) {
2727
KNOWHERE_CONFIG_DECLARE_FIELD(nlist)
@@ -103,6 +103,7 @@ class ScannConfig : public IvfFlatConfig {
103103
CFG_INT reorder_k;
104104
CFG_BOOL with_raw_data;
105105
CFG_INT sub_dim;
106+
CFG_BOOL ensure_topk_full;
106107
KNOHWERE_DECLARE_CONFIG(ScannConfig) {
107108
KNOWHERE_CONFIG_DECLARE_FIELD(reorder_k)
108109
.description("reorder k used for refining")
@@ -119,6 +120,10 @@ class ScannConfig : public IvfFlatConfig {
119120
.set_default(2)
120121
.for_train()
121122
.set_range(1, 65536);
123+
KNOWHERE_CONFIG_DECLARE_FIELD(ensure_topk_full)
124+
.set_default(false)
125+
.description("whether to make sure topk results full")
126+
.for_search();
122127
}
123128

124129
Status

thirdparty/faiss/faiss/impl/pq4_fast_scan_search_1.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,12 +125,29 @@ void accumulate_fixed_blocks(
125125
ResultHandler& res,
126126
const Scaler& scaler) {
127127
constexpr int bbs = 32 * BB;
128-
for (size_t j0 = 0; j0 < nb; j0 += bbs) {
128+
for (size_t j0 = 0; j0 < nb; j0 += bbs, codes += bbs * nsq / 2) {
129+
res.set_block_origin(0, j0);
130+
// skip computing distances if all vectors inside a block are filtered out
131+
if constexpr(has_sel_member_v<ResultHandler>) {
132+
if (res.sel != nullptr) { // we have filter here
133+
bool skip_flag = true;
134+
for (size_t jj = 0; jj < std::min<size_t>(bbs, res.ntotal - j0);
135+
jj++) {
136+
auto real_idx = res.adjust_id(0, jj);
137+
if (res.sel->is_member(real_idx)) { // id is not filtered out, can not skip computing
138+
skip_flag = false;
139+
break;
140+
}
141+
}
142+
143+
if (skip_flag) {
144+
continue;
145+
}
146+
}
147+
}
129148
FixedStorageHandler<NQ, 2 * BB> res2;
130149
kernel_accumulate_block<NQ, BB>(nsq, codes, LUT, res2, scaler);
131-
res.set_block_origin(0, j0);
132150
res2.to_other_handler(res);
133-
codes += bbs * nsq / 2;
134151
}
135152
}
136153

thirdparty/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -113,18 +113,6 @@ void kernel_accumulate_block(
113113
}
114114
}
115115

116-
namespace {
117-
118-
// a helper that checks whether a ResultHandler has a .sel member
119-
template<typename T, typename = void>
120-
struct has_sel_member : std::false_type {};
121-
template<typename T>
122-
struct has_sel_member<T, std::void_t<decltype(T::sel)>> : std::true_type {};
123-
template<typename T>
124-
inline constexpr bool has_sel_member_v = has_sel_member<T>::value;
125-
126-
}
127-
128116
// handle at most 4 blocks of queries
129117
template <int QBS, class ResultHandler, class Scaler>
130118
void accumulate_q_4step(

thirdparty/faiss/faiss/impl/simd_result_handlers.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,18 @@
2626

2727
namespace faiss {
2828

29+
namespace {
30+
31+
// a helper that checks whether a ResultHandler has a .sel member
32+
template<typename T, typename = void>
33+
struct has_sel_member : std::false_type {};
34+
template<typename T>
35+
struct has_sel_member<T, std::void_t<decltype(T::sel)>> : std::true_type {};
36+
template<typename T>
37+
inline constexpr bool has_sel_member_v = has_sel_member<T>::value;
38+
39+
}
40+
2941
struct SIMDResultHandler {
3042
// used to dispatch templates
3143
bool is_CMax = false;

0 commit comments

Comments
 (0)