Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ venv/
results/
logs/

# Worktrees
.worktrees/

# AI rules
CLAUDE.md
AGENTS.md
13 changes: 3 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ python >= 3.11
pip install vectordb-bench
```

**Install all database clients**

``` shell
pip install 'vectordb-bench[all]'
```
**Install the specific database client**

```shell
Expand All @@ -42,7 +37,6 @@ All the database client supported
| Optional database client | install command |
|--------------------------|---------------------------------------------|
| pymilvus, zilliz_cloud (*default*) | `pip install vectordb-bench` |
| all (*clients requirements might be conflict with each other*) | `pip install vectordb-bench[all]` |
| qdrant | `pip install vectordb-bench[qdrant]` |
| pinecone | `pip install vectordb-bench[pinecone]` |
| weaviate | `pip install vectordb-bench[weaviate]` |
Expand Down Expand Up @@ -225,7 +219,6 @@ Options:

--ondisk Ondisk mode with binary quantization(32x compression)
--oversample-factor Controls the degree of oversampling applied to minority classes in imbalanced datasets to improve model performance by balancing class distributions.(default 1.0)


# Quantization Type
--quantization-type TEXT which type of quantization to use valid values [fp32, fp16, bq]
Expand Down Expand Up @@ -294,13 +287,13 @@ Options:
# Connection
--cloud-id TEXT Elastic Cloud ID [required]
--password TEXT Elastic Cloud password [required]

# HNSW Index Parameters
--m INTEGER HNSW M parameter [default: 16]
--ef-construction INTEGER HNSW efConstruction parameter [default: 100]
--num-candidates INTEGER Number of candidates for search [default: 100]
--element-type [float|byte] Element type for vectors (float: 4 bytes, byte: 1 byte) [default: float]

# Index Configuration
--number-of-shards INTEGER Number of shards [default: 1]
--number-of-replicas INTEGER Number of replicas [default: 0]
Expand All @@ -311,7 +304,7 @@ Options:
--use-routing BOOLEAN Whether to use routing [default: False]
--use-rescore BOOLEAN Whether to use rescore [default: False]
--oversample-ratio FLOAT Oversample ratio for rescore [default: 2.0]

# Common Options
--case-type [CapacityDim128|CapacityDim960|Performance768D100M|...]
Case type
Expand Down
33 changes: 1 addition & 32 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ dependencies = [
"pydantic<v2",
"scikit-learn",
"pymilvus", # with pandas, numpy, ujson
"ujson",
"hdrhistogram>=0.10.1",
]
dynamic = ["version"]
Expand All @@ -51,37 +50,7 @@ test = [
"ruff",
"pytest",
]
restful = [ "flask" ]

all = [
"grpcio==1.53.0", # for qdrant-client and pymilvus
"grpcio-tools==1.53.0", # for qdrant-client and pymilvus
"qdrant-client",
"pinecone",
"weaviate-client",
"elasticsearch",
"sqlalchemy",
"redis",
"chromadb",
"pgvector",
"psycopg",
"psycopg-binary",
"pgvecto_rs[psycopg3]>=0.2.2",
"opensearch-dsl",
"opensearch-py",
"memorydb",
"alibabacloud_ha3engine_vector",
"mariadb",
"PyMySQL",
"clickhouse-connect",
"pyvespa",
"lancedb",
"mysql-connector-python",
"turbopuffer[fast]",
'zvec',
"endee==0.1.10", # compatible with pydantic<2
]

restful = [ "flask" ]
qdrant = [ "qdrant-client" ]
pinecone = [ "pinecone" ]
weaviate = [ "weaviate-client" ]
Expand Down
3 changes: 2 additions & 1 deletion tests/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[pytest]

filterwarnings =
filterwarnings =
ignore::UserWarning
ignore::DeprecationWarning
3 changes: 2 additions & 1 deletion tests/test_bench_runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import time
import logging

import ujson
from vectordb_bench.interface import BenchMarkRunner
from vectordb_bench.models import (
DB, IndexType, CaseType, TaskConfig, CaseConfig,
Expand Down Expand Up @@ -55,6 +57,5 @@ def test_performance_case_no_error(self):
d = t.json(exclude={'db_config': {'password', 'api_key'}})
log.info(f"{d}")

import ujson
loads = ujson.loads(d)
log.info(f"{loads}")
39 changes: 39 additions & 0 deletions tests/test_milvus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""E2E test for Milvus client using MilvusClient API.

Requires a running Milvus instance at localhost:19530.
"""

import logging

from pydantic import SecretStr

from vectordb_bench.backend.clients import DB
from vectordb_bench.backend.clients.api import IndexType
from vectordb_bench.backend.clients.milvus.config import MilvusConfig
from vectordb_bench.backend.cases import CaseType
from vectordb_bench.interface import BenchMarkRunner
from vectordb_bench.models import CaseConfig, TaskConfig


log = logging.getLogger(__name__)


class TestMilvus:
"""E2E test for Milvus using Performance1536D50K (OpenAI 50K dataset)."""

def test_performance_1536d_50k(self):
"""Full benchmark: download dataset, insert, optimize (force merge), search."""
runner = BenchMarkRunner()

task_config = TaskConfig(
db=DB.Milvus,
db_config=MilvusConfig(uri=SecretStr("http://localhost:19530")),
db_case_config=DB.Milvus.case_config_cls(index_type=IndexType.Flat)(),
case_config=CaseConfig(case_id=CaseType.Performance1536D50K),
)

runner.run([task_config])
runner._sync_running_task()
result = runner.get_results()
log.info(f"test result: {result}")
assert len(result) > 0
Loading
Loading