3232nq , d = xq .shape
3333
3434if todo == []:
35- todo = ' hnsw hnsw_sq ivf ivf_hnsw_quantizer kmeans kmeans_hnsw nsg' .split ()
35+ todo = " hnsw hnsw_sq ivf ivf_hnsw_quantizer kmeans kmeans_hnsw nsg" .split ()
3636
3737
3838def evaluate (index ):
@@ -45,11 +45,13 @@ def evaluate(index):
4545
4646 missing_rate = (I == - 1 ).sum () / float (k * nq )
4747 recall_at_1 = (I == gt [:, :1 ]).sum () / float (nq )
48- print ("\t %7.3f ms per query, R@1 %.4f, missing rate %.4f" % (
49- (t1 - t0 ) * 1000.0 / nq , recall_at_1 , missing_rate ))
48+ print (
49+ "\t %7.3f ms per query, R@1 %.4f, missing rate %.4f"
50+ % ((t1 - t0 ) * 1000.0 / nq , recall_at_1 , missing_rate )
51+ )
5052
5153
52- if ' hnsw' in todo :
54+ if " hnsw" in todo :
5355
5456 print ("Testing HNSW Flat" )
5557
@@ -69,12 +71,12 @@ def evaluate(index):
6971 print ("search" )
7072 for efSearch in 16 , 32 , 64 , 128 , 256 :
7173 for bounded_queue in [True , False ]:
72- print ("efSearch" , efSearch , "bounded queue" , bounded_queue , end = ' ' )
74+ print ("efSearch" , efSearch , "bounded queue" , bounded_queue , end = " " )
7375 index .hnsw .search_bounded_queue = bounded_queue
7476 index .hnsw .efSearch = efSearch
7577 evaluate (index )
7678
77- if ' hnsw_sq' in todo :
79+ if " hnsw_sq" in todo :
7880
7981 print ("Testing HNSW with a scalar quantizer" )
8082 # also set M so that the vectors and links both use 128 bytes per
@@ -96,16 +98,16 @@ def evaluate(index):
9698
9799 print ("search" )
98100 for efSearch in 16 , 32 , 64 , 128 , 256 :
99- print ("efSearch" , efSearch , end = ' ' )
101+ print ("efSearch" , efSearch , end = " " )
100102 index .hnsw .efSearch = efSearch
101103 evaluate (index )
102104
103- if ' ivf' in todo :
105+ if " ivf" in todo :
104106
105107 print ("Testing IVF Flat (baseline)" )
106108 quantizer = faiss .IndexFlatL2 (d )
107109 index = faiss .IndexIVFFlat (quantizer , d , 16384 )
108- index .cp .min_points_per_centroid = 5 # quiet warning
110+ index .cp .min_points_per_centroid = 5 # quiet warning
109111
110112 # to see progress
111113 index .verbose = True
@@ -118,16 +120,16 @@ def evaluate(index):
118120
119121 print ("search" )
120122 for nprobe in 1 , 4 , 16 , 64 , 256 :
121- print ("nprobe" , nprobe , end = ' ' )
123+ print ("nprobe" , nprobe , end = " " )
122124 index .nprobe = nprobe
123125 evaluate (index )
124126
125- if ' ivf_hnsw_quantizer' in todo :
127+ if " ivf_hnsw_quantizer" in todo :
126128
127129 print ("Testing IVF Flat with HNSW quantizer" )
128130 quantizer = faiss .IndexHNSWFlat (d , 32 )
129131 index = faiss .IndexIVFFlat (quantizer , d , 16384 )
130- index .cp .min_points_per_centroid = 5 # quiet warning
132+ index .cp .min_points_per_centroid = 5 # quiet warning
131133 index .quantizer_trains_alone = 2
132134
133135 # to see progress
@@ -142,13 +144,13 @@ def evaluate(index):
142144 print ("search" )
143145 quantizer .hnsw .efSearch = 64
144146 for nprobe in 1 , 4 , 16 , 64 , 256 :
145- print ("nprobe" , nprobe , end = ' ' )
147+ print ("nprobe" , nprobe , end = " " )
146148 index .nprobe = nprobe
147149 evaluate (index )
148150
149151# Bonus: 2 kmeans tests
150152
151- if ' kmeans' in todo :
153+ if " kmeans" in todo :
152154 print ("Performing kmeans on sift1M database vectors (baseline)" )
153155 clus = faiss .Clustering (d , 16384 )
154156 clus .verbose = True
@@ -157,7 +159,7 @@ def evaluate(index):
157159 clus .train (xb , index )
158160
159161
160- if ' kmeans_hnsw' in todo :
162+ if " kmeans_hnsw" in todo :
161163 print ("Performing kmeans on sift1M using HNSW assignment" )
162164 clus = faiss .Clustering (d , 16384 )
163165 clus .verbose = True
@@ -168,7 +170,7 @@ def evaluate(index):
168170 index .hnsw .efSearch = 128
169171 clus .train (xb , index )
170172
171- if ' nsg' in todo :
173+ if " nsg" in todo :
172174
173175 print ("Testing NSG Flat" )
174176
@@ -186,6 +188,47 @@ def evaluate(index):
186188
187189 print ("search" )
188190 for search_L in - 1 , 16 , 32 , 64 , 128 , 256 :
189- print ("search_L" , search_L , end = ' ' )
191+ print ("search_L" , search_L , end = " " )
190192 index .nsg .search_L = search_L
191193 evaluate (index )
194+
195+
196+ if "hnsw_locks" in todo :
197+
198+ ntotal , _ = xb .shape
199+ batch_size = ntotal // 100
200+ print (
201+ f"Testing HNSW Flat: add with { batch_size = } , "
202+ "with and without retaining locks"
203+ )
204+
205+ # Unbatched
206+ t0 = time .time ()
207+ index = faiss .IndexHNSWFlat (d , 32 )
208+ index .add (xb )
209+ t1 = time .time ()
210+ print (
211+ f"\t single bulk add(): { index .ntotal } added in { t1 - t0 :6.3f} s"
212+ f" = { index .ntotal / (t1 - t0 ):.0f} /s"
213+ )
214+
215+ for retain_locks in [False , True ]:
216+ index = faiss .IndexHNSWFlat (d , 32 )
217+ index .retain_locks = retain_locks
218+
219+ t0 = time .time ()
220+ t1 = None
221+ t2 = None
222+ for i in range (0 , len (xb ), batch_size ):
223+ t1 = time .time ()
224+ index .add (xb [i : i + batch_size ])
225+ t2 = time .time ()
226+ if i > 2 and t2 - t0 > 2 :
227+ break
228+
229+ assert t1 and t2
230+ dt = t2 - t0
231+ print (
232+ f"\t { retain_locks = :1} : { index .ntotal } added in { t2 - t0 :6.3f} s"
233+ f" = { index .ntotal / (t2 - t0 ):.0f} /s"
234+ )
0 commit comments