Skip to content

Commit 3fcc1db

Browse files
upgrade faiss baseline f9f116d30 -> 9d567497e (22 upstream commits) (#1606)
Signed-off-by: Alexandr Guzhva <alexanderguzhva@gmail.com>
1 parent 2d9d473 commit 3fcc1db

47 files changed

Lines changed: 1991 additions & 661 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

thirdparty/faiss/benchs/bench_hnsw.py

Lines changed: 60 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
nq, d = xq.shape
3333

3434
if todo == []:
35-
todo = 'hnsw hnsw_sq ivf ivf_hnsw_quantizer kmeans kmeans_hnsw nsg'.split()
35+
todo = "hnsw hnsw_sq ivf ivf_hnsw_quantizer kmeans kmeans_hnsw nsg".split()
3636

3737

3838
def evaluate(index):
@@ -45,11 +45,13 @@ def evaluate(index):
4545

4646
missing_rate = (I == -1).sum() / float(k * nq)
4747
recall_at_1 = (I == gt[:, :1]).sum() / float(nq)
48-
print("\t %7.3f ms per query, R@1 %.4f, missing rate %.4f" % (
49-
(t1 - t0) * 1000.0 / nq, recall_at_1, missing_rate))
48+
print(
49+
"\t %7.3f ms per query, R@1 %.4f, missing rate %.4f"
50+
% ((t1 - t0) * 1000.0 / nq, recall_at_1, missing_rate)
51+
)
5052

5153

52-
if 'hnsw' in todo:
54+
if "hnsw" in todo:
5355

5456
print("Testing HNSW Flat")
5557

@@ -69,12 +71,12 @@ def evaluate(index):
6971
print("search")
7072
for efSearch in 16, 32, 64, 128, 256:
7173
for bounded_queue in [True, False]:
72-
print("efSearch", efSearch, "bounded queue", bounded_queue, end=' ')
74+
print("efSearch", efSearch, "bounded queue", bounded_queue, end=" ")
7375
index.hnsw.search_bounded_queue = bounded_queue
7476
index.hnsw.efSearch = efSearch
7577
evaluate(index)
7678

77-
if 'hnsw_sq' in todo:
79+
if "hnsw_sq" in todo:
7880

7981
print("Testing HNSW with a scalar quantizer")
8082
# also set M so that the vectors and links both use 128 bytes per
@@ -96,16 +98,16 @@ def evaluate(index):
9698

9799
print("search")
98100
for efSearch in 16, 32, 64, 128, 256:
99-
print("efSearch", efSearch, end=' ')
101+
print("efSearch", efSearch, end=" ")
100102
index.hnsw.efSearch = efSearch
101103
evaluate(index)
102104

103-
if 'ivf' in todo:
105+
if "ivf" in todo:
104106

105107
print("Testing IVF Flat (baseline)")
106108
quantizer = faiss.IndexFlatL2(d)
107109
index = faiss.IndexIVFFlat(quantizer, d, 16384)
108-
index.cp.min_points_per_centroid = 5 # quiet warning
110+
index.cp.min_points_per_centroid = 5 # quiet warning
109111

110112
# to see progress
111113
index.verbose = True
@@ -118,16 +120,16 @@ def evaluate(index):
118120

119121
print("search")
120122
for nprobe in 1, 4, 16, 64, 256:
121-
print("nprobe", nprobe, end=' ')
123+
print("nprobe", nprobe, end=" ")
122124
index.nprobe = nprobe
123125
evaluate(index)
124126

125-
if 'ivf_hnsw_quantizer' in todo:
127+
if "ivf_hnsw_quantizer" in todo:
126128

127129
print("Testing IVF Flat with HNSW quantizer")
128130
quantizer = faiss.IndexHNSWFlat(d, 32)
129131
index = faiss.IndexIVFFlat(quantizer, d, 16384)
130-
index.cp.min_points_per_centroid = 5 # quiet warning
132+
index.cp.min_points_per_centroid = 5 # quiet warning
131133
index.quantizer_trains_alone = 2
132134

133135
# to see progress
@@ -142,13 +144,13 @@ def evaluate(index):
142144
print("search")
143145
quantizer.hnsw.efSearch = 64
144146
for nprobe in 1, 4, 16, 64, 256:
145-
print("nprobe", nprobe, end=' ')
147+
print("nprobe", nprobe, end=" ")
146148
index.nprobe = nprobe
147149
evaluate(index)
148150

149151
# Bonus: 2 kmeans tests
150152

151-
if 'kmeans' in todo:
153+
if "kmeans" in todo:
152154
print("Performing kmeans on sift1M database vectors (baseline)")
153155
clus = faiss.Clustering(d, 16384)
154156
clus.verbose = True
@@ -157,7 +159,7 @@ def evaluate(index):
157159
clus.train(xb, index)
158160

159161

160-
if 'kmeans_hnsw' in todo:
162+
if "kmeans_hnsw" in todo:
161163
print("Performing kmeans on sift1M using HNSW assignment")
162164
clus = faiss.Clustering(d, 16384)
163165
clus.verbose = True
@@ -168,7 +170,7 @@ def evaluate(index):
168170
index.hnsw.efSearch = 128
169171
clus.train(xb, index)
170172

171-
if 'nsg' in todo:
173+
if "nsg" in todo:
172174

173175
print("Testing NSG Flat")
174176

@@ -186,6 +188,47 @@ def evaluate(index):
186188

187189
print("search")
188190
for search_L in -1, 16, 32, 64, 128, 256:
189-
print("search_L", search_L, end=' ')
191+
print("search_L", search_L, end=" ")
190192
index.nsg.search_L = search_L
191193
evaluate(index)
194+
195+
196+
if "hnsw_locks" in todo:
197+
198+
ntotal, _ = xb.shape
199+
batch_size = ntotal // 100
200+
print(
201+
f"Testing HNSW Flat: add with {batch_size=}, "
202+
"with and without retaining locks"
203+
)
204+
205+
# Unbatched
206+
t0 = time.time()
207+
index = faiss.IndexHNSWFlat(d, 32)
208+
index.add(xb)
209+
t1 = time.time()
210+
print(
211+
f"\t single bulk add(): {index.ntotal} added in {t1 - t0:6.3f}s"
212+
f" = {index.ntotal / (t1 - t0):.0f}/s"
213+
)
214+
215+
for retain_locks in [False, True]:
216+
index = faiss.IndexHNSWFlat(d, 32)
217+
index.retain_locks = retain_locks
218+
219+
t0 = time.time()
220+
t1 = None
221+
t2 = None
222+
for i in range(0, len(xb), batch_size):
223+
t1 = time.time()
224+
index.add(xb[i: i + batch_size])
225+
t2 = time.time()
226+
if i > 2 and t2 - t0 > 2:
227+
break
228+
229+
assert t1 and t2
230+
dt = t2 - t0
231+
print(
232+
f"\t {retain_locks=:1}: {index.ntotal} added in {t2 - t0:6.3f}s"
233+
f" = {index.ntotal / (t2 - t0):.0f}/s"
234+
)

thirdparty/faiss/conda/faiss-gpu/meta.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ outputs:
4949
- FAISS_FLATTEN_CONDA_INCLUDES
5050
requirements:
5151
build:
52-
- {{ compiler('cxx') }} =12.4
52+
- {{ compiler('cxx') }} >=13.4,<14
5353
- sysroot_linux-64 =2.17 # [linux64]
5454
- llvm-openmp # [osx]
5555
- cmake >=3.24.0
@@ -85,7 +85,7 @@ outputs:
8585
string: "py{{ PY_VER }}_h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}{{ suffix }}"
8686
requirements:
8787
build:
88-
- {{ compiler('cxx') }} =12.4
88+
- {{ compiler('cxx') }} >=13.4,<14
8989
- sysroot_linux-64 =2.17 # [linux64]
9090
- swig =4.0
9191
- cmake >=3.24.0

thirdparty/faiss/faiss/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ set(FAISS_SRC
117117
impl/IDSelector.cpp
118118
impl/FaissException.cpp
119119
impl/HNSW.cpp
120+
impl/hnsw/LockVector.cpp
120121
impl/hnsw/MinimaxHeap.cpp
121122
impl/NSG.cpp
122123
impl/PolysemousTraining.cpp
@@ -242,6 +243,7 @@ set(FAISS_HEADERS
242243
impl/FaissAssert.h
243244
impl/FaissException.h
244245
impl/HNSW.h
246+
impl/hnsw/LockVector.h
245247
impl/hnsw/MinimaxHeap.h
246248
impl/LocalSearchQuantizer.h
247249
impl/ProductAdditiveQuantizer.h
@@ -280,6 +282,7 @@ set(FAISS_HEADERS
280282
impl/lattice_Zn.h
281283
impl/platform_macros.h
282284
impl/fast_scan/accumulate_loops.h
285+
impl/fast_scan/accumulate_loops_512.h
283286
impl/fast_scan/dispatching.h
284287
impl/fast_scan/fast_scan.h
285288
impl/fast_scan/decompose_qbs.h

thirdparty/faiss/faiss/Clustering.cpp

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <cmath>
1717
#include <cstdio>
1818
#include <cstring>
19+
#include <limits>
1920

2021
#include <omp.h>
2122

@@ -84,20 +85,27 @@ idx_t subsample_training_set(
8485

8586
const uint64_t actual_seed = get_actual_rng_seed(clus.seed);
8687

87-
std::vector<int> perm;
88+
std::vector<idx_t> perm;
8889
if (clus.use_faster_subsampling) {
8990
// use subsampling with splitmix64 rng
9091
SplitMix64RandomGenerator rng(actual_seed);
9192

9293
const idx_t new_nx = clus.k * clus.max_points_per_centroid;
9394
perm.resize(new_nx);
9495
for (idx_t i = 0; i < new_nx; i++) {
95-
perm[i] = rng.rand_int(nx);
96+
perm[i] = rng.rand_int64() % nx;
9697
}
9798
} else {
9899
// use subsampling with a default std rng
99-
perm.resize(nx);
100-
rand_perm(perm.data(), nx, actual_seed);
100+
FAISS_THROW_IF_NOT_FMT(
101+
nx <= static_cast<idx_t>(std::numeric_limits<int>::max()),
102+
"Dataset too large (%" PRId64
103+
") for standard subsampling; "
104+
"set use_faster_subsampling=true",
105+
nx);
106+
std::vector<int> int_perm(nx);
107+
rand_perm(int_perm.data(), nx, actual_seed);
108+
perm.assign(int_perm.begin(), int_perm.end());
101109
}
102110

103111
nx = clus.k * clus.max_points_per_centroid;
@@ -232,12 +240,27 @@ int split_clusters(
232240
for (size_t ci = 0; ci < k; ci++) {
233241
if (hassign[ci] == 0) { /* need to redefine a centroid */
234242
size_t cj;
235-
for (cj = 0; true; cj = (cj + 1) % k) {
236-
/* probability to pick this cluster for split */
243+
// Try probabilistic selection, with a deterministic fallback
244+
// to the largest cluster if too many iterations pass.
245+
size_t max_tries = 10 * k;
246+
size_t n_tries = 0;
247+
bool found = false;
248+
for (cj = 0; n_tries < max_tries; cj = (cj + 1) % k) {
237249
float p = (hassign[cj] - 1.0) / (float)(n - k);
238250
float r = rng.rand_float();
239251
if (r < p) {
240-
break; /* found our cluster to be split */
252+
found = true;
253+
break;
254+
}
255+
n_tries++;
256+
}
257+
if (!found) {
258+
// Deterministic fallback: split the largest cluster.
259+
cj = 0;
260+
for (size_t j = 1; j < k; j++) {
261+
if (hassign[j] > hassign[cj]) {
262+
cj = j;
263+
}
241264
}
242265
}
243266
memcpy(centroids + ci * d,
@@ -510,7 +533,7 @@ void Clustering::train_encoded(
510533

511534
// accumulate objective
512535
obj = 0;
513-
for (int j = 0; j < nx; j++) {
536+
for (idx_t j = 0; j < nx; j++) {
514537
obj += dis[j];
515538
}
516539

0 commit comments

Comments
 (0)