Skip to content

Commit 99c3115

Browse files
authored
enhance: Migrate PyMilvus orm to MilvusClient (#738)
* enhance: Migrate PyMilvus orm to MilvusClient * chore: add .worktrees/ to .gitignore Signed-off-by: yangxuan <xuan.yang@zilliz.com>
1 parent ea9d3ec commit 99c3115

7 files changed

Lines changed: 147 additions & 151 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ venv/
1313
results/
1414
logs/
1515

16+
# Worktrees
17+
.worktrees/
18+
1619
# AI rules
1720
CLAUDE.md
1821
AGENTS.md

README.md

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ python >= 3.11
2727
pip install vectordb-bench
2828
```
2929

30-
**Install all database clients**
31-
32-
``` shell
33-
pip install 'vectordb-bench[all]'
34-
```
3530
**Install the specific database client**
3631

3732
```shell
@@ -42,7 +37,6 @@ All the database client supported
4237
| Optional database client | install command |
4338
|--------------------------|---------------------------------------------|
4439
| pymilvus, zilliz_cloud (*default*) | `pip install vectordb-bench` |
45-
| all (*clients requirements might be conflict with each other*) | `pip install vectordb-bench[all]` |
4640
| qdrant | `pip install vectordb-bench[qdrant]` |
4741
| pinecone | `pip install vectordb-bench[pinecone]` |
4842
| weaviate | `pip install vectordb-bench[weaviate]` |
@@ -225,7 +219,6 @@ Options:
225219
226220
--ondisk Ondisk mode with binary quantization(32x compression)
227221
--oversample-factor Controls the degree of oversampling applied to minority classes in imbalanced datasets to improve model performance by balancing class distributions.(default 1.0)
228-
229222
230223
# Quantization Type
231224
--quantization-type TEXT which type of quantization to use valid values [fp32, fp16, bq]
@@ -294,13 +287,13 @@ Options:
294287
# Connection
295288
--cloud-id TEXT Elastic Cloud ID [required]
296289
--password TEXT Elastic Cloud password [required]
297-
290+
298291
# HNSW Index Parameters
299292
--m INTEGER HNSW M parameter [default: 16]
300293
--ef-construction INTEGER HNSW efConstruction parameter [default: 100]
301294
--num-candidates INTEGER Number of candidates for search [default: 100]
302295
--element-type [float|byte] Element type for vectors (float: 4 bytes, byte: 1 byte) [default: float]
303-
296+
304297
# Index Configuration
305298
--number-of-shards INTEGER Number of shards [default: 1]
306299
--number-of-replicas INTEGER Number of replicas [default: 0]
@@ -311,7 +304,7 @@ Options:
311304
--use-routing BOOLEAN Whether to use routing [default: False]
312305
--use-rescore BOOLEAN Whether to use rescore [default: False]
313306
--oversample-ratio FLOAT Oversample ratio for rescore [default: 2.0]
314-
307+
315308
# Common Options
316309
--case-type [CapacityDim128|CapacityDim960|Performance768D100M|...]
317310
Case type

pyproject.toml

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ dependencies = [
4040
"pydantic<v2",
4141
"scikit-learn",
4242
"pymilvus", # with pandas, numpy, ujson
43-
"ujson",
4443
"hdrhistogram>=0.10.1",
4544
]
4645
dynamic = ["version"]
@@ -51,37 +50,7 @@ test = [
5150
"ruff",
5251
"pytest",
5352
]
54-
restful = [ "flask" ]
55-
56-
all = [
57-
"grpcio==1.53.0", # for qdrant-client and pymilvus
58-
"grpcio-tools==1.53.0", # for qdrant-client and pymilvus
59-
"qdrant-client",
60-
"pinecone",
61-
"weaviate-client",
62-
"elasticsearch",
63-
"sqlalchemy",
64-
"redis",
65-
"chromadb",
66-
"pgvector",
67-
"psycopg",
68-
"psycopg-binary",
69-
"pgvecto_rs[psycopg3]>=0.2.2",
70-
"opensearch-dsl",
71-
"opensearch-py",
72-
"memorydb",
73-
"alibabacloud_ha3engine_vector",
74-
"mariadb",
75-
"PyMySQL",
76-
"clickhouse-connect",
77-
"pyvespa",
78-
"lancedb",
79-
"mysql-connector-python",
80-
"turbopuffer[fast]",
81-
'zvec',
82-
"endee==0.1.10", # compatible with pydantic<2
83-
]
84-
53+
restful = [ "flask" ]
8554
qdrant = [ "qdrant-client" ]
8655
pinecone = [ "pinecone" ]
8756
weaviate = [ "weaviate-client" ]

tests/pytest.ini

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
[pytest]
22

3-
filterwarnings =
3+
filterwarnings =
44
ignore::UserWarning
5+
ignore::DeprecationWarning

tests/test_bench_runner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import time
22
import logging
3+
4+
import ujson
35
from vectordb_bench.interface import BenchMarkRunner
46
from vectordb_bench.models import (
57
DB, IndexType, CaseType, TaskConfig, CaseConfig,
@@ -55,6 +57,5 @@ def test_performance_case_no_error(self):
5557
d = t.json(exclude={'db_config': {'password', 'api_key'}})
5658
log.info(f"{d}")
5759

58-
import ujson
5960
loads = ujson.loads(d)
6061
log.info(f"{loads}")

tests/test_milvus.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""E2E test for Milvus client using MilvusClient API.
2+
3+
Requires a running Milvus instance at localhost:19530.
4+
"""
5+
6+
import logging
7+
8+
from pydantic import SecretStr
9+
10+
from vectordb_bench.backend.clients import DB
11+
from vectordb_bench.backend.clients.api import IndexType
12+
from vectordb_bench.backend.clients.milvus.config import MilvusConfig
13+
from vectordb_bench.backend.cases import CaseType
14+
from vectordb_bench.interface import BenchMarkRunner
15+
from vectordb_bench.models import CaseConfig, TaskConfig
16+
17+
18+
log = logging.getLogger(__name__)
19+
20+
21+
class TestMilvus:
22+
"""E2E test for Milvus using Performance1536D50K (OpenAI 50K dataset)."""
23+
24+
def test_performance_1536d_50k(self):
25+
"""Full benchmark: download dataset, insert, optimize (force merge), search."""
26+
runner = BenchMarkRunner()
27+
28+
task_config = TaskConfig(
29+
db=DB.Milvus,
30+
db_config=MilvusConfig(uri=SecretStr("http://localhost:19530")),
31+
db_case_config=DB.Milvus.case_config_cls(index_type=IndexType.Flat)(),
32+
case_config=CaseConfig(case_id=CaseType.Performance1536D50K),
33+
)
34+
35+
runner.run([task_config])
36+
runner._sync_running_task()
37+
result = runner.get_results()
38+
log.info(f"test result: {result}")
39+
assert len(result) > 0

0 commit comments

Comments
 (0)