Skip to content

Commit 1c5b6c6

Browse files
committed
Enhance download functionality and server readiness logging in examples
- Added support for downloading files using wget, curl, and Python with progress reporting. - Improved logging for server readiness checks in vector search and collection load functions. - Updated benchmark results documentation for clarity and completeness. - Adjusted dataset parameters in run scripts for better performance.
1 parent f9dc975 commit 1c5b6c6

7 files changed

Lines changed: 262 additions & 54 deletions

File tree

bindings/python/examples/12_vector_search.py

Lines changed: 49 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,17 +1358,29 @@ def wait_for_milvus_ready(host: str, port: int, timeout_sec: int = 300) -> None:
13581358
logging.getLogger("pymilvus").setLevel(logging.CRITICAL)
13591359

13601360
start = time.perf_counter()
1361+
last_report = 0.0
1362+
print(
1363+
f"[milvus] Waiting for server readiness at {host}:{port} "
1364+
f"(timeout={timeout_sec}s)"
1365+
)
13611366
while True:
13621367
try:
13631368
connections.connect(alias="_bootstrap", host=host, port=str(port))
13641369
_ = utility.get_server_version(using="_bootstrap")
13651370
connections.disconnect(alias="_bootstrap")
1371+
elapsed = time.perf_counter() - start
1372+
print(f"[milvus] Server ready after {elapsed:.1f}s")
13661373
return
13671374
except Exception:
1368-
if time.perf_counter() - start > timeout_sec:
1375+
elapsed = time.perf_counter() - start
1376+
if elapsed > timeout_sec:
13691377
raise SystemExit(
1370-
f"Milvus did not become ready within {timeout_sec}s at {host}:{port}"
1378+
"Milvus did not become ready within "
1379+
f"{timeout_sec}s at {host}:{port}"
13711380
)
1381+
if elapsed - last_report >= 5.0:
1382+
print(f"[milvus] Still waiting for server readiness... {elapsed:.1f}s")
1383+
last_report = elapsed
13721384
time.sleep(1)
13731385

13741386

@@ -1382,9 +1394,17 @@ def wait_for_milvus_collection_load(
13821394

13831395
logging.getLogger("pymilvus").setLevel(logging.CRITICAL)
13841396
start = time.perf_counter()
1397+
last_report = 0.0
1398+
collection_name = getattr(collection, "name", "<unknown>")
1399+
print(
1400+
f"[milvus] Waiting for collection load: {collection_name} "
1401+
f"(timeout={timeout_sec}s)"
1402+
)
13851403
while True:
13861404
try:
13871405
collection.load()
1406+
elapsed = time.perf_counter() - start
1407+
print(f"[milvus] Collection loaded after {elapsed:.1f}s: {collection_name}")
13881408
return
13891409
except Exception as exc:
13901410
msg = str(exc)
@@ -1396,11 +1416,15 @@ def wait_for_milvus_collection_load(
13961416
if not transient:
13971417
raise
13981418

1399-
if time.perf_counter() - start > timeout_sec:
1419+
elapsed = time.perf_counter() - start
1420+
if elapsed > timeout_sec:
14001421
raise SystemExit(
14011422
"Milvus collection load did not become ready within "
14021423
f"{timeout_sec}s"
14031424
) from exc
1425+
if elapsed - last_report >= 5.0:
1426+
print(f"[milvus] Still waiting for collection load... {elapsed:.1f}s")
1427+
last_report = elapsed
14041428
time.sleep(poll_sec)
14051429

14061430

@@ -2096,7 +2120,7 @@ def main() -> None:
20962120
if not qids:
20972121
raise SystemExit("No valid query IDs with ground truth found")
20982122

2099-
(queries, _dur, _r0, _r1) = timed_section(
2123+
queries, _dur, _r0, _r1 = timed_section(
21002124
"load_queries",
21012125
lambda: materialize_queries(sources, qids, dim=dim),
21022126
)
@@ -2139,13 +2163,13 @@ def record_phase(
21392163

21402164
phases: List[dict] = []
21412165

2142-
(db, dur, r0, r1) = timed_section(
2166+
db, dur, r0, r1 = timed_section(
21432167
"open_db",
21442168
lambda: arcadedb.open_database(str(db_path), jvm_kwargs=jvm_kwargs),
21452169
)
21462170
phases.append(record_phase("open_db", {}, dur, r0, r1))
21472171

2148-
(stats, dur, r0, r1) = timed_section(
2172+
stats, dur, r0, r1 = timed_section(
21492173
"search",
21502174
lambda: run_repeated_search(
21512175
lambda run_queries, run_qids: search_arcadedb(
@@ -2166,7 +2190,7 @@ def record_phase(
21662190
)
21672191
phases.append(record_phase("search", stats, dur, r0, r1))
21682192

2169-
(_, dur, r0, r1) = timed_section("close_db", lambda: db.close())
2193+
_, dur, r0, r1 = timed_section("close_db", lambda: db.close())
21702194
phases.append(record_phase("close_db", {}, dur, r0, r1))
21712195

21722196
sweeps.append(
@@ -2213,13 +2237,13 @@ def record_phase(
22132237

22142238
phases: List[dict] = []
22152239

2216-
(index, dur, r0, r1) = timed_section(
2240+
index, dur, r0, r1 = timed_section(
22172241
"open_db",
22182242
lambda: faiss.read_index(str(index_path)),
22192243
)
22202244
phases.append(record_phase("open_db", {}, dur, r0, r1))
22212245

2222-
(stats, dur, r0, r1) = timed_section(
2246+
stats, dur, r0, r1 = timed_section(
22232247
"search",
22242248
lambda: run_repeated_search(
22252249
lambda run_queries, run_qids: search_faiss(
@@ -2239,7 +2263,7 @@ def record_phase(
22392263
)
22402264
phases.append(record_phase("search", stats, dur, r0, r1))
22412265

2242-
(_, dur, r0, r1) = timed_section("close_db", lambda: None)
2266+
_, dur, r0, r1 = timed_section("close_db", lambda: None)
22432267
phases.append(record_phase("close_db", {}, dur, r0, r1))
22442268

22452269
sweeps.append(
@@ -2285,13 +2309,13 @@ def record_phase(
22852309

22862310
phases: List[dict] = []
22872311

2288-
((db, table), dur, r0, r1) = timed_section(
2312+
(db, table), dur, r0, r1 = timed_section(
22892313
"open_db",
22902314
lambda: open_lancedb_table(lancedb_dir, table_name),
22912315
)
22922316
phases.append(record_phase("open_db", {}, dur, r0, r1))
22932317

2294-
(stats, dur, r0, r1) = timed_section(
2318+
stats, dur, r0, r1 = timed_section(
22952319
"search",
22962320
lambda: run_repeated_search(
22972321
lambda run_queries, run_qids: search_lancedb(
@@ -2313,7 +2337,7 @@ def record_phase(
23132337
phases.append(record_phase("search", stats, dur, r0, r1))
23142338

23152339
close_db_fn = getattr(db, "close", None)
2316-
(_, dur, r0, r1) = timed_section(
2340+
_, dur, r0, r1 = timed_section(
23172341
"close_db",
23182342
lambda: close_db_fn() if callable(close_db_fn) else None,
23192343
)
@@ -2353,13 +2377,13 @@ def record_phase(
23532377

23542378
phases: List[dict] = []
23552379

2356-
(corpus_vectors_normalized, dur, r0, r1) = timed_section(
2380+
corpus_vectors_normalized, dur, r0, r1 = timed_section(
23572381
"open_db",
23582382
lambda: normalize_rows(materialize_corpus_vectors(sources, dim)),
23592383
)
23602384
phases.append(record_phase("open_db", {}, dur, r0, r1))
23612385

2362-
(stats, dur, r0, r1) = timed_section(
2386+
stats, dur, r0, r1 = timed_section(
23632387
"search",
23642388
lambda: run_repeated_search(
23652389
lambda run_queries, run_qids: search_bruteforce(
@@ -2378,7 +2402,7 @@ def record_phase(
23782402
)
23792403
phases.append(record_phase("search", stats, dur, r0, r1))
23802404

2381-
(_, dur, r0, r1) = timed_section("close_db", lambda: None)
2405+
_, dur, r0, r1 = timed_section("close_db", lambda: None)
23822406
phases.append(record_phase("close_db", {}, dur, r0, r1))
23832407

23842408
sweeps.append(
@@ -2440,7 +2464,7 @@ def qdrant_pid_provider() -> int | None:
24402464

24412465
phases: List[dict] = []
24422466

2443-
(client, dur, r0, r1) = timed_section(
2467+
client, dur, r0, r1 = timed_section(
24442468
"open_db",
24452469
lambda: QdrantClient(
24462470
host=args.qdrant_host,
@@ -2453,7 +2477,7 @@ def qdrant_pid_provider() -> int | None:
24532477
phases.append(record_phase("open_db", {}, dur, r0, r1))
24542478

24552479
try:
2456-
(stats, dur, r0, r1) = timed_section(
2480+
stats, dur, r0, r1 = timed_section(
24572481
"search",
24582482
lambda: run_repeated_search(
24592483
lambda run_queries, run_qids: search_qdrant(
@@ -2475,7 +2499,7 @@ def qdrant_pid_provider() -> int | None:
24752499
)
24762500
phases.append(record_phase("search", stats, dur, r0, r1))
24772501
finally:
2478-
(_, dur, r0, r1) = timed_section(
2502+
_, dur, r0, r1 = timed_section(
24792503
"close_db",
24802504
lambda: client.close(),
24812505
rss_provider=rss_provider,
@@ -2551,7 +2575,7 @@ def milvus_pids_provider() -> List[int]:
25512575

25522576
phases: List[dict] = []
25532577

2554-
(_, dur, r0, r1) = timed_section(
2578+
_, dur, r0, r1 = timed_section(
25552579
"open_db",
25562580
lambda: connections.connect(
25572581
alias=alias,
@@ -2566,7 +2590,7 @@ def milvus_pids_provider() -> List[int]:
25662590
collection = Collection(args.milvus_collection, using=alias)
25672591
wait_for_milvus_collection_load(collection)
25682592

2569-
(stats, dur, r0, r1) = timed_section(
2593+
stats, dur, r0, r1 = timed_section(
25702594
"search",
25712595
lambda: run_repeated_search(
25722596
lambda run_queries, run_qids: search_milvus(
@@ -2587,7 +2611,7 @@ def milvus_pids_provider() -> List[int]:
25872611
)
25882612
phases.append(record_phase("search", stats, dur, r0, r1))
25892613
finally:
2590-
(_, dur, r0, r1) = timed_section(
2614+
_, dur, r0, r1 = timed_section(
25912615
"close_db",
25922616
lambda: connections.disconnect(alias=alias),
25932617
rss_provider=rss_provider,
@@ -2650,7 +2674,7 @@ def server_pid_provider() -> int | None:
26502674

26512675
phases: List[dict] = []
26522676

2653-
(conn, dur, r0, r1) = timed_section(
2677+
conn, dur, r0, r1 = timed_section(
26542678
"open_db",
26552679
lambda: psycopg.connect(
26562680
host=args.pg_host,
@@ -2665,7 +2689,7 @@ def server_pid_provider() -> int | None:
26652689
phases.append(record_phase("open_db", {}, dur, r0, r1))
26662690

26672691
try:
2668-
(stats, dur, r0, r1) = timed_section(
2692+
stats, dur, r0, r1 = timed_section(
26692693
"search",
26702694
lambda: run_repeated_search(
26712695
lambda run_queries, run_qids: search_pgvector(
@@ -2686,7 +2710,7 @@ def server_pid_provider() -> int | None:
26862710
)
26872711
phases.append(record_phase("search", stats, dur, r0, r1))
26882712
finally:
2689-
(_, dur, r0, r1) = timed_section(
2713+
_, dur, r0, r1 = timed_section(
26902714
"close_db",
26912715
lambda: conn.close(),
26922716
rss_provider=rss_provider,
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# 11 Vector Index Build Matrix Summary — All Dataset Sizes
2+
3+
- Generated (UTC): 2026-03-14T10:21:05Z
4+
- Dataset: all
5+
- Dataset size profile: all
6+
- Label prefix: sweep11
7+
- Total runs: 6
8+
- Versions/digest observed:
9+
- arcadedb: 26.4.1.dev0
10+
- arcadedb_docker_digest: arcadedata/arcadedb@sha256:f4dfd7e19a88145e67d3fd0852d59504374247f59b9e91ab30b9d0d726f4e46f
11+
- arcadedb_docker_tag: 26.4.1-SNAPSHOT
12+
- arcadedb_embedded: auto
13+
- faiss: 1.13.2
14+
- faiss_cpu: auto
15+
- lancedb: 0.29.2
16+
- milvus: 2.6.10
17+
- milvus_compose_version: v2.6.10
18+
- pgvector_image: pgvector/pgvector:pg18-trixie
19+
- postgres: 18.3 (Debian 18.3-1.pgdg13+1)
20+
- qdrant: 1.11.3
21+
- qdrant_image: qdrant/qdrant:v1.11.3
22+
- wheel_file: arcadedb_embedded-26.4.1.dev0-cp312-cp312-manylinux_2_35_x86_64.whl
23+
- wheel_source: local_bindings_source
24+
- wheel_version: 26.4.1.dev0
25+
- Run status files: total=6, success=6, failed=0
26+
- Note: LanceDB prefers pure `HNSW` when supported by the installed version; otherwise it falls back to single-partition `IVF_HNSW_SQ`.
27+
- Note: heuristic HNSW similarity only, not a formal metric: Faiss `HNSWFlat` ~= 100%; pgvector/Qdrant/Milvus HNSW ~= 85-95%; LanceDB pure `HNSW` ~= 90-95%; LanceDB single-partition `IVF_HNSW_SQ` ~= 75%; bruteforce is exact search, not HNSW.
28+
- Note: times are phase-level benchmark timings from each run result.
29+
- Note: `du_mib` is measured filesystem usage from `disk_usage_du.json`.
30+
31+
## Dataset: stackoverflow-medium
32+
33+
| backend | run_label | lancedb_index_type | lancedb_num_partitions | seed | mem_limit | threads | batch_size | count | rows | run_total_s | create_db_s | create_index_s | ingest_s | close_db_s | peak_rss_mib | db_size_mib | du_mib | status | exit_code |
34+
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
35+
| arcadedb_sql | sweep11_r01_arcadedb_sql_s00005_mem4g | | | 5 | 4g | 8 | 5,000 | 1,242,391 | 1,242,391 | 40,471.9 | 0.411 | 40,429.629 | 41.761 | 0.067 | 3,531.727 | 2,780.691 | 2,780.738 | success | 0 |
36+
| faiss | sweep11_r01_faiss_s00002_mem4g | | | 2 | 4g | 8 | 5,000 | 1,242,391 | 1,242,391 | 272.291 | 0 | 0 | 268.91 | 3.288 | 2,183.59 | 2,000.328 | 2,000.352 | success | 0 |
37+
| lancedb | sweep11_r01_lancedb_s00000_mem4g | IVF_HNSW_SQ | 1 | 0 | 4g | 8 | 5,000 | 1,242,391 | 1,242,391 | 102.742 | 0.001 | 65.878 | 35.92 | 0 | 1,820.965 | 2,534.132 | 2,535.824 | success | 0 |
38+
| milvus | sweep11_r01_milvus_s00004_mem4g | | | 4 | 4g | 8 | 5,000 | 1,242,391 | 1,242,391 | 177.853 | 0.21 | 0.783 | 164.208 | 0.297 | 1,875.211 | 9,370.606 | 9,387.688 | success | 0 |
39+
| pgvector | sweep11_r01_pgvector_s00003_mem4g | | | 3 | 4g | 8 | 5,000 | 1,242,391 | 1,242,391 | 3,071.21 | 0.007 | 2,867.267 | 200.276 | 0.003 | 3,913.195 | 5,439.027 | 5,439.277 | success | 0 |
40+
| qdrant | sweep11_r01_qdrant_s00001_mem4g | | | 1 | 4g | 8 | 5,000 | 1,242,391 | 1,242,391 | 403.535 | 0.612 | 0.248 | 400.614 | 0.066 | 3,530.176 | 2,669.249 | 2,608.531 | success | 0 |

0 commit comments

Comments
 (0)