Skip to content

Commit 2d9b9c7

Browse files
committed
Run all tuned submission slots in search.py (task1 15 slots, task2 mode5+mode7)
1 parent 0e427a0 commit 2d9b9c7

1 file changed

Lines changed: 156 additions & 114 deletions

File tree

submission/search.py

Lines changed: 156 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -37,35 +37,68 @@
3737
THREADS = os.environ.get("DEGLIB_THREADS", "8")
3838

3939
# --- per-dataset parameter profiles ------------------------------------------
40-
# The graph is built ONCE per profile; max_dist is swept to yield several
41-
# operating points (build/recall trade-off) from that single build. Unknown
40+
# Each dataset maps to a LIST of configs; every config is one binary invocation
41+
# (one graph build) that emits one or more operating points via its max_dist (and,
42+
# for task 2, eps_search) sweep. Configs that share build params are grouped into a
43+
# single sweep; configs with distinct build params build a separate graph. Unknown
4244
# datasets fail fast (see _profile_or_die) rather than silently using bad params.
45+
#
46+
# Task 1 config keys: mode, non_zeros, k_graph, k_ext, eps_ext, prune_worst, evpK,
47+
# max_dist (list). The submission slots are the tuned (mode4 + mode7) configs from
48+
# the old python/submission_task1_*.py — 15 operating points per dataset.
4349
TASK1_PROFILES = {
44-
# 1024-dim BGE-M3 (normalised, inner product)
45-
"wikipedia-small": dict(mode="mode4", non_zeros=576, k_graph=22, k_ext=29,
46-
eps_ext=0.001, prune_worst=10, evpK=113,
47-
max_dist=[200, 300, 400, 500, 700, 900]),
48-
"wikipedia": dict(mode="mode4", non_zeros=608, k_graph=26, k_ext=32,
49-
eps_ext=0.002, prune_worst=9, evpK=50,
50-
max_dist=[500, 700, 900, 1200, 1400]),
50+
# 1024-dim BGE-M3 (normalized, inner product) — 200K dev set; 15 tuned slots.
51+
"wikipedia-small": [
52+
dict(mode="mode4", non_zeros=768, k_graph=16, k_ext=19, eps_ext=0.001, prune_worst=2, evpK=28, max_dist=[576]), # slot 1 ~0.757
53+
dict(mode="mode4", non_zeros=704, k_graph=14, k_ext=29, eps_ext=0.001, prune_worst=3, evpK=31, max_dist=[707]), # slot 2 ~0.787
54+
dict(mode="mode4", non_zeros=704, k_graph=18, k_ext=26, eps_ext=0.001, prune_worst=4, evpK=37, max_dist=[404]), # slot 3 ~0.806
55+
dict(mode="mode4", non_zeros=768, k_graph=14, k_ext=38, eps_ext=0.001, prune_worst=3, evpK=37, max_dist=[666]), # slot 4 ~0.815
56+
dict(mode="mode4", non_zeros=640, k_graph=16, k_ext=37, eps_ext=0.001, prune_worst=8, evpK=38, max_dist=[620]), # slot 5 ~0.826
57+
dict(mode="mode4", non_zeros=704, k_graph=16, k_ext=31, eps_ext=0.001, prune_worst=3, evpK=86, max_dist=[431]), # slot 6 ~0.831
58+
dict(mode="mode4", non_zeros=384, k_graph=12, k_ext=16, eps_ext=0.001, prune_worst=6, evpK=95, max_dist=[742]), # slot 7 ~0.838
59+
dict(mode="mode4", non_zeros=768, k_graph=18, k_ext=41, eps_ext=0.001, prune_worst=9, evpK=78, max_dist=[586]), # slot 8 ~0.881
60+
dict(mode="mode4", non_zeros=576, k_graph=16, k_ext=74, eps_ext=0.001, prune_worst=6, evpK=48, max_dist=[561]), # slot 9 ~0.888
61+
dict(mode="mode4", non_zeros=576, k_graph=22, k_ext=29, eps_ext=0.001, prune_worst=10, evpK=113, max_dist=[533]), # slot 10 ~0.894
62+
dict(mode="mode7", non_zeros=768, k_graph=20, k_ext=18, eps_ext=0.001, prune_worst=10, evpK=41, max_dist=[220]), # slot 11 ~0.776
63+
dict(mode="mode7", non_zeros=704, k_graph=22, k_ext=20, eps_ext=0.001, prune_worst=11, evpK=35, max_dist=[192]), # slot 12 ~0.783
64+
dict(mode="mode7", non_zeros=640, k_graph=18, k_ext=48, eps_ext=0.001, prune_worst=9, evpK=33, max_dist=[221]), # slot 13 ~0.815
65+
dict(mode="mode7", non_zeros=640, k_graph=26, k_ext=27, eps_ext=0.001, prune_worst=12, evpK=33, max_dist=[226]), # slot 14 ~0.840
66+
dict(mode="mode7", non_zeros=768, k_graph=20, k_ext=39, eps_ext=0.001, prune_worst=10, evpK=77, max_dist=[384]), # slot 15 ~0.859
67+
],
68+
# 6.35M BGE-M3 — submission slots (eps_ext=0.002). Slots 1-8 share one build
69+
# (max_dist sweep); slots 9/10 and 11/15 are tuned individually.
70+
"wikipedia": [
71+
dict(mode="mode4", non_zeros=608, k_graph=26, k_ext=32, eps_ext=0.002, prune_worst=9, evpK=50, max_dist=[500, 600, 700, 800, 900, 1000, 1200, 1400]), # slots 1-8
72+
dict(mode="mode4", non_zeros=512, k_graph=32, k_ext=24, eps_ext=0.002, prune_worst=11, evpK=50, max_dist=[900]), # slot 9
73+
dict(mode="mode4", non_zeros=512, k_graph=32, k_ext=24, eps_ext=0.002, prune_worst=11, evpK=100, max_dist=[800]), # slot 10
74+
dict(mode="mode7", non_zeros=576, k_graph=28, k_ext=34, eps_ext=0.002, prune_worst=10, evpK=50, max_dist=[400]), # slot 11
75+
dict(mode="mode7", non_zeros=512, k_graph=32, k_ext=24, eps_ext=0.002, prune_worst=11, evpK=50, max_dist=[400, 500, 600]), # slots 12-14
76+
dict(mode="mode7", non_zeros=576, k_graph=28, k_ext=34, eps_ext=0.002, prune_worst=10, evpK=75, max_dist=[800]), # slot 15
77+
],
5178
# 384-dim gooaq spot-check (different family; smoke test only, non_zeros<dim)
52-
"gooaq-small": dict(mode="mode4", non_zeros=300, k_graph=24, k_ext=24,
53-
eps_ext=0.001, prune_worst=8, evpK=50,
54-
max_dist=[200, 400, 800]),
79+
"gooaq-small": [
80+
dict(mode="mode4", non_zeros=300, k_graph=24, k_ext=24, eps_ext=0.001, prune_worst=8, evpK=50, max_dist=[200, 400, 800]),
81+
],
5582
}
5683

57-
# Task 2 (MIPS): graph built once (single-threaded per the rules), then a
58-
# (eps_search x max_dist) sweep yields the operating points. mode5 = L2-build +
59-
# FP16 inner-product search; FLAS pre-sort improves the build.
84+
# Task 2 (MIPS): each config builds the graph once (single-threaded per the rules),
85+
# then sweeps (eps_search x max_dist). The submission candidates are mode5 (L2-build +
86+
# FP16 inner-product search) and mode7 (L2 d+2 build + FP16 L2 search), both with FLAS.
87+
# Task 2 config keys: mode, k_graph, k_ext, eps_ext, build_threads, use_flas, num_runs,
88+
# max_dist (list), eps_search (list).
6089
TASK2_PROFILES = {
61-
# 128-dim Llama-3 attention (unnormalised inner product)
62-
"llama-dev": dict(mode="mode5", k_graph=32, k_ext=64, eps_ext=0.001, build_threads=1,
63-
use_flas=True, num_runs=3,
64-
max_dist=[5000, 6000, 7000, 8000], eps_search=[0.18, 0.19, 0.2]),
90+
# 128-dim Llama-3 attention (unnormalized inner product) — submission candidates.
91+
"llama-dev": [
92+
dict(mode="mode5", k_graph=32, k_ext=64, eps_ext=0.001, build_threads=1, use_flas=True,
93+
num_runs=10, max_dist=[5000, 6000, 7000, 8000], eps_search=[0.18]),
94+
dict(mode="mode7", k_graph=32, k_ext=64, eps_ext=0.001, build_threads=1, use_flas=True,
95+
num_runs=10, max_dist=[5000, 5500, 6000, 6200, 6300, 6500, 7000], eps_search=[0.007]),
96+
],
6597
# spot-check (14k vectors); smoke test only
66-
"llama-small": dict(mode="mode5", k_graph=32, k_ext=64, eps_ext=0.001, build_threads=1,
67-
use_flas=True, num_runs=1,
68-
max_dist=[2000, 4000, 8000], eps_search=[0.2, 0.3]),
98+
"llama-small": [
99+
dict(mode="mode5", k_graph=32, k_ext=64, eps_ext=0.001, build_threads=1, use_flas=True,
100+
num_runs=1, max_dist=[2000, 4000, 8000], eps_search=[0.2, 0.3]),
101+
],
69102
}
70103

71104

@@ -190,61 +223,66 @@ def _require_binary():
190223
def run_task1(input_path, cfg, output_dir):
191224
dataset = cfg["dataset_name"]
192225
k = int(cfg.get("k", 15))
193-
profile = _profile_or_die(TASK1_PROFILES, dataset, "task1")
226+
configs = _profile_or_die(TASK1_PROFILES, dataset, "task1")
194227
_require_binary()
195-
print(f"[task1] dataset={dataset} mode={profile['mode']} "
196-
f"non_zeros={profile['non_zeros']} max_dist={profile['max_dist']}")
228+
print(f"[task1] dataset={dataset}: {len(configs)} config(s) / build(s)")
197229

198230
bin_input, tmp = maybe_decompress(input_path, ["train"])
199-
op_dir = None
231+
op_root = None
200232
try:
201-
op_dir = tempfile.mkdtemp(prefix="deglib_op_")
202-
cmd = [
203-
DEGLIB_BIN, "task1", bin_input, profile["mode"],
204-
"--threads", THREADS, "--k-top", str(k),
205-
"--non-zeros", str(profile["non_zeros"]),
206-
"--k-graph", str(profile["k_graph"]),
207-
"--k-ext", str(profile["k_ext"]),
208-
"--eps-ext", str(profile["eps_ext"]),
209-
"--evpK", str(profile["evpK"]),
210-
"--max-dist", ",".join(str(m) for m in profile["max_dist"]),
211-
"--prune-worst", str(profile["prune_worst"]),
212-
"--no-recall", "--output", op_dir,
213-
]
214-
print("[task1] running:", " ".join(cmd))
215-
subprocess.run(cmd, check=True)
216-
217-
ops = sorted(Path(op_dir).glob("op_*.bin"))
218-
if not ops:
219-
sys.exit("Error: deglib produced no operating-point files for task1.")
233+
op_root = tempfile.mkdtemp(prefix="deglib_op_")
220234
sentinel = np.iinfo(np.uint32).max
221-
for op in ops:
222-
n, kk, t_build, t_explore, ids, dists = read_op_file(op)
223-
ids = ids.astype(np.int64)
224-
# Padding slots (fewer than k candidates) -> map to the node's own
225-
# 0-based id; after +1 that is a harmless duplicate of the self column,
226-
# keeping every id a valid 1-based label (and never overflowing int32).
227-
row0 = np.arange(n, dtype=np.int64)[:, None]
228-
ids = np.where(ids == sentinel, row0, ids)
229-
# 0-based ids -> 1-based, then prepend the self-reference at column 0
230-
# (k+1 columns) to match the ground-truth layout the evaluator uses.
231-
self_ids = np.arange(1, n + 1, dtype=np.int64)[:, None]
232-
knns = np.concatenate([self_ids, ids + 1], axis=1).astype(np.int32)
233-
self_d = np.zeros((n, 1), dtype=np.float32)
234-
out_d = np.concatenate([self_d, dists], axis=1)
235-
mobj = re.search(r"op_evpK(\d+)_md(\d+)", op.name)
236-
evpK, md = mobj.group(1), mobj.group(2)
237-
params = (f"mode={profile['mode']},non_zeros={profile['non_zeros']},"
238-
f"k_graph={profile['k_graph']},k_ext={profile['k_ext']},"
239-
f"prune_worst={profile['prune_worst']},evpK={evpK},max_dist={md}")
240-
# Task 1 is scored on construction time: buildtime = build + explore.
241-
buildtime = t_build + t_explore
242-
fn = os.path.join(output_dir, f"deglib_evpK{evpK}_md{md}.h5")
243-
store_results(fn, ALGO, dataset, "task1", out_d, knns, buildtime, 0.0, params)
244-
print(f" wrote {fn} buildtime={buildtime:.3f}s knns={knns.shape}")
235+
for ci, c in enumerate(configs):
236+
# One binary invocation per config = one graph build + its own sweep.
237+
op_dir = os.path.join(op_root, f"c{ci}")
238+
os.makedirs(op_dir, exist_ok=True)
239+
cmd = [
240+
DEGLIB_BIN, "task1", bin_input, c["mode"],
241+
"--threads", THREADS, "--k-top", str(k),
242+
"--non-zeros", str(c["non_zeros"]),
243+
"--k-graph", str(c["k_graph"]),
244+
"--k-ext", str(c["k_ext"]),
245+
"--eps-ext", str(c["eps_ext"]),
246+
"--evpK", str(c["evpK"]),
247+
"--max-dist", ",".join(str(m) for m in c["max_dist"]),
248+
"--prune-worst", str(c["prune_worst"]),
249+
"--no-recall", "--output", op_dir,
250+
]
251+
print(f"[task1] config {ci + 1}/{len(configs)} ({c['mode']}):", " ".join(cmd))
252+
subprocess.run(cmd, check=True)
253+
254+
ops = sorted(Path(op_dir).glob("op_*.bin"))
255+
if not ops:
256+
sys.exit(f"Error: deglib produced no operating-point files for task1 config {ci}.")
257+
for op in ops:
258+
n, kk, t_build, t_explore, ids, dists = read_op_file(op)
259+
ids = ids.astype(np.int64)
260+
# Padding slots (fewer than k candidates) -> map to the node's own
261+
# 0-based id; after +1 that is a harmless duplicate of the self column,
262+
# keeping every id a valid 1-based label (and never overflowing int32).
263+
row0 = np.arange(n, dtype=np.int64)[:, None]
264+
ids = np.where(ids == sentinel, row0, ids)
265+
# 0-based ids -> 1-based, then prepend the self-reference at column 0
266+
# (k+1 columns) to match the ground-truth layout the evaluator uses.
267+
self_ids = np.arange(1, n + 1, dtype=np.int64)[:, None]
268+
knns = np.concatenate([self_ids, ids + 1], axis=1).astype(np.int32)
269+
self_d = np.zeros((n, 1), dtype=np.float32)
270+
out_d = np.concatenate([self_d, dists], axis=1)
271+
mobj = re.search(r"op_evpK(\d+)_md(\d+)", op.name)
272+
evpK, md = mobj.group(1), mobj.group(2)
273+
params = (f"mode={c['mode']},non_zeros={c['non_zeros']},"
274+
f"k_graph={c['k_graph']},k_ext={c['k_ext']},"
275+
f"prune_worst={c['prune_worst']},evpK={evpK},max_dist={md}")
276+
# Task 1 is scored on construction time: buildtime = build + explore.
277+
buildtime = t_build + t_explore
278+
# Config index keeps filenames unique across builds that happen to
279+
# share an (evpK, max_dist) pair (e.g. mode4 vs mode7 at the same md).
280+
fn = os.path.join(output_dir, f"deglib_c{ci}_evpK{evpK}_md{md}.h5")
281+
store_results(fn, ALGO, dataset, "task1", out_d, knns, buildtime, 0.0, params)
282+
print(f" wrote {fn} buildtime={buildtime:.3f}s knns={knns.shape}")
245283
finally:
246-
if op_dir is not None:
247-
shutil.rmtree(op_dir, ignore_errors=True)
284+
if op_root is not None:
285+
shutil.rmtree(op_root, ignore_errors=True)
248286
if tmp:
249287
try:
250288
os.unlink(tmp)
@@ -256,55 +294,59 @@ def run_task2(input_path, cfg, output_dir):
256294
dataset = cfg["dataset_name"]
257295
k = int(cfg.get("k", 30))
258296
queries_key = cfg.get("queries", "test/queries")
259-
profile = _profile_or_die(TASK2_PROFILES, dataset, "task2")
297+
configs = _profile_or_die(TASK2_PROFILES, dataset, "task2")
260298
_require_binary()
261-
print(f"[task2] dataset={dataset} mode={profile['mode']} flas={profile.get('use_flas')} "
262-
f"eps_search={profile['eps_search']} max_dist={profile['max_dist']}")
299+
print(f"[task2] dataset={dataset}: {len(configs)} config(s) / build(s)")
263300

264301
bin_input, tmp = maybe_decompress(input_path, ["train", queries_key])
265-
op_dir = None
302+
op_root = None
266303
try:
267-
op_dir = tempfile.mkdtemp(prefix="deglib_op_")
268-
cmd = [
269-
DEGLIB_BIN, "task2", bin_input, profile["mode"],
270-
"--threads", THREADS, "--build-threads", str(profile["build_threads"]),
271-
"--k-top", str(k), "--k-graph", str(profile["k_graph"]),
272-
"--k-ext", str(profile["k_ext"]), "--eps-ext", str(profile["eps_ext"]),
273-
"--max-dist", ",".join(str(m) for m in profile["max_dist"]),
274-
"--eps-search", ",".join(str(e) for e in profile["eps_search"]),
275-
"--num-runs", str(profile["num_runs"]),
276-
"--no-recall", "--output", op_dir,
277-
]
278-
if profile.get("use_flas"):
279-
cmd.append("--flas")
280-
print("[task2] running:", " ".join(cmd))
281-
subprocess.run(cmd, check=True)
282-
283-
ops = sorted(Path(op_dir).glob("op_*.bin"))
284-
if not ops:
285-
sys.exit("Error: deglib produced no operating-point files for task2.")
304+
op_root = tempfile.mkdtemp(prefix="deglib_op_")
286305
sentinel = np.iinfo(np.uint32).max
287-
for op in ops:
288-
n, kk, t_build, t_search, ids, dists = read_op_file(op)
289-
# task2 ids are ALREADY 1-based (the binary adds +1 to match test/knns).
290-
# No self column (queries are separate from the database). Padding slots
291-
# -> 0, the baseline's "missing" marker (never matches a 1-based id).
292-
ids = ids.astype(np.int64)
293-
ids = np.where(ids == sentinel, 0, ids)
294-
knns = ids.astype(np.int32)
295-
mobj = re.search(r"op_eps(\d+)_md(\d+)", op.name)
296-
eps_i, md = mobj.group(1), mobj.group(2)
297-
eps = int(eps_i) / 1000.0
298-
params = (f"mode={profile['mode']},k_graph={profile['k_graph']},k_ext={profile['k_ext']},"
299-
f"flas={int(bool(profile.get('use_flas')))},num_runs={profile['num_runs']},"
300-
f"eps_search={eps},max_dist={md}")
301-
# Task 2 is scored on search time: querytime = search, buildtime = one-time build.
302-
fn = os.path.join(output_dir, f"deglib_eps{eps_i}_md{md}.h5")
303-
store_results(fn, ALGO, dataset, "task2", dists, knns, t_build, t_search, params)
304-
print(f" wrote {fn} buildtime={t_build:.3f}s querytime={t_search:.4f}s knns={knns.shape}")
306+
for ci, c in enumerate(configs):
307+
# One binary invocation per config = one graph build + its own sweep.
308+
op_dir = os.path.join(op_root, f"c{ci}")
309+
os.makedirs(op_dir, exist_ok=True)
310+
cmd = [
311+
DEGLIB_BIN, "task2", bin_input, c["mode"],
312+
"--threads", THREADS, "--build-threads", str(c["build_threads"]),
313+
"--k-top", str(k), "--k-graph", str(c["k_graph"]),
314+
"--k-ext", str(c["k_ext"]), "--eps-ext", str(c["eps_ext"]),
315+
"--max-dist", ",".join(str(m) for m in c["max_dist"]),
316+
"--eps-search", ",".join(str(e) for e in c["eps_search"]),
317+
"--num-runs", str(c["num_runs"]),
318+
"--no-recall", "--output", op_dir,
319+
]
320+
if c.get("use_flas"):
321+
cmd.append("--flas")
322+
print(f"[task2] config {ci + 1}/{len(configs)} ({c['mode']}):", " ".join(cmd))
323+
subprocess.run(cmd, check=True)
324+
325+
ops = sorted(Path(op_dir).glob("op_*.bin"))
326+
if not ops:
327+
sys.exit(f"Error: deglib produced no operating-point files for task2 config {ci}.")
328+
for op in ops:
329+
n, kk, t_build, t_search, ids, dists = read_op_file(op)
330+
# task2 ids are ALREADY 1-based (the binary adds +1 to match test/knns).
331+
# No self column (queries are separate from the database). Padding slots
332+
# -> 0, the baseline's "missing" marker (never matches a 1-based id).
333+
ids = ids.astype(np.int64)
334+
ids = np.where(ids == sentinel, 0, ids)
335+
knns = ids.astype(np.int32)
336+
mobj = re.search(r"op_eps(\d+)_md(\d+)", op.name)
337+
eps_i, md = mobj.group(1), mobj.group(2)
338+
eps = int(eps_i) / 1000.0
339+
params = (f"mode={c['mode']},k_graph={c['k_graph']},k_ext={c['k_ext']},"
340+
f"flas={int(bool(c.get('use_flas')))},num_runs={c['num_runs']},"
341+
f"eps_search={eps},max_dist={md}")
342+
# Task 2 is scored on search time: querytime = search, buildtime = one-time build.
343+
# Config index keeps filenames unique across builds (e.g. mode5 vs mode7).
344+
fn = os.path.join(output_dir, f"deglib_c{ci}_eps{eps_i}_md{md}.h5")
345+
store_results(fn, ALGO, dataset, "task2", dists, knns, t_build, t_search, params)
346+
print(f" wrote {fn} buildtime={t_build:.3f}s querytime={t_search:.4f}s knns={knns.shape}")
305347
finally:
306-
if op_dir is not None:
307-
shutil.rmtree(op_dir, ignore_errors=True)
348+
if op_root is not None:
349+
shutil.rmtree(op_root, ignore_errors=True)
308350
if tmp:
309351
try:
310352
os.unlink(tmp)

0 commit comments

Comments
 (0)