Skip to content

Commit 24ba987

Browse files
committed
feat: add support for PolarDB
- add PolarDB vector search client with FAISS_HNSW_FLAT, FAISS_HNSW_PQ, and FAISS_HNSW_SQ index types - add CLI integration with hnswflat, hnswpq, and hnswsq benchmark commands - add frontend (Streamlit) UI support with index type selection, HNSW/PQ/SQ parameter configuration
1 parent cf536a9 commit 24ba987

12 files changed

Lines changed: 900 additions & 1 deletion

File tree

README.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ All the database client supported
6262
| hologres | `pip install vectordb-bench[hologres]` |
6363
| tencent_es | `pip install vectordb-bench[tencent_es]` |
6464
| alisql | `pip install 'vectordb-bench[alisql]'` |
65+
| polardb | `pip install vectordb-bench[polardb]` |
6566
| doris | `pip install vectordb-bench[doris]` |
6667
| zvec | `pip install vectordb-bench[zvec]` |
6768
| endee | `pip install vectordb-bench[endee]` |
@@ -527,6 +528,47 @@ To list the options for Lindorm, execute `vectordbbench lindormhnsw --help`, The
527528
--ef-search INTEGER hnsw ef-search [required]
528529
```
529530

531+
### Run PolarDB from command line
532+
533+
PolarDB supports index types: faiss_hnsw_flat, faiss_hnsw_pq, and faiss_hnsw_sq.
534+
535+
**Example: Run faiss_hnsw_flat benchmark**
536+
537+
```shell
538+
vectordbbench polardbhnswflat \
539+
--case-type Performance768D1M \
540+
--username <db_user> \
541+
--password '<db_password>' \
542+
--host <db_host> \
543+
--port 3306 \
544+
--m 16 \
545+
--ef-construction 256 \
546+
--ef-search 256 \
547+
--insert-workers 64 \
548+
--num-concurrency '10,20,40,60,80' \
549+
--concurrency-duration 60 \
550+
--task-label <task_label> \
551+
--db-label <db_label> \
552+
--skip-search-serial \
553+
--post-load-index
554+
```
555+
556+
To list the options for PolarDB, execute `vectordbbench polardbhnswflat --help`. The following are some PolarDB-specific command-line options.
557+
558+
```text
559+
--username TEXT Username [required]
560+
--password TEXT Password
561+
--host TEXT Db host [default: 127.0.0.1]
562+
--port INTEGER Db Port [default: 3306]
563+
--database TEXT Database name [default: vectordbbench]
564+
--m INTEGER M parameter (max_degree) in HNSW
565+
--ef-construction INTEGER ef_construction parameter in HNSW
566+
--ef-search INTEGER polar_vector_index_hnsw_ef_search session variable
567+
--insert-workers INTEGER Number of concurrent threads for data insertion
568+
--post-load-index / --inline-index
569+
Create index after load or inline at table creation
570+
```
571+
530572
#### Using a configuration file.
531573

532574
The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.

install/requirements_py3.11.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,6 @@ pymilvus
2626
clickhouse_connect
2727
pyvespa
2828
mysql-connector-python
29+
PyMySQL
2930
packaging
30-
hdrhistogram>=0.10.1
31+
hdrhistogram>=0.10.1

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ vespa = [ "pyvespa" ]
106106
lancedb = [ "lancedb" ]
107107
oceanbase = [ "mysql-connector-python" ]
108108
alisql = [ "mysql-connector-python" ]
109+
polardb = [ "PyMySQL" ]
109110
doris = [ "doris-vector-search" ]
110111
turbopuffer = [ "turbopuffer" ]
111112
zvec = [ "zvec" ]

vectordb_bench/backend/clients/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class DB(Enum):
5959
Zvec = "Zvec"
6060
Endee = "Endee"
6161
Lindorm = "Lindorm"
62+
PolarDB = "PolarDB"
6263

6364
@property
6465
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
@@ -246,6 +247,11 @@ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
246247

247248
return LindormVector
248249

250+
if self == DB.PolarDB:
251+
from .polardb.polardb import PolarDB
252+
253+
return PolarDB
254+
249255
msg = f"Unknown DB: {self.name}"
250256
raise ValueError(msg)
251257

@@ -435,6 +441,11 @@ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901, PLR0915
435441

436442
return LindormConfig
437443

444+
if self == DB.PolarDB:
445+
from .polardb.config import PolarDBConfig
446+
447+
return PolarDBConfig
448+
438449
msg = f"Unknown DB: {self.name}"
439450
raise ValueError(msg)
440451

@@ -581,6 +592,11 @@ def case_config_cls( # noqa: C901, PLR0911, PLR0912, PLR0915
581592

582593
return AliSQLIndexConfig
583594

595+
if self == DB.PolarDB:
596+
from .polardb.config import _polardb_case_config
597+
598+
return _polardb_case_config.get(index_type)
599+
584600
if self == DB.Doris:
585601
from .doris.config import DorisCaseConfig
586602

vectordb_bench/backend/clients/polardb/__init__.py

Whitespace-only changes.
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
from typing import Annotated, Unpack
2+
3+
import click
4+
from pydantic import SecretStr
5+
6+
from vectordb_bench.backend.clients import DB
7+
8+
from ....cli.cli import (
9+
CommonTypedDict,
10+
cli,
11+
click_parameter_decorators_from_typed_dict,
12+
run,
13+
)
14+
15+
16+
class PolarDBTypedDict(CommonTypedDict):
17+
user_name: Annotated[
18+
str,
19+
click.option(
20+
"--username",
21+
type=str,
22+
help="Username",
23+
required=True,
24+
),
25+
]
26+
password: Annotated[
27+
str,
28+
click.option(
29+
"--password",
30+
type=str,
31+
help="Password",
32+
default="",
33+
),
34+
]
35+
36+
host: Annotated[
37+
str,
38+
click.option(
39+
"--host",
40+
type=str,
41+
help="Db host",
42+
default="127.0.0.1",
43+
),
44+
]
45+
46+
port: Annotated[
47+
int,
48+
click.option(
49+
"--port",
50+
type=int,
51+
default=3306,
52+
help="Db Port",
53+
),
54+
]
55+
56+
database: Annotated[
57+
str,
58+
click.option(
59+
"--database",
60+
type=str,
61+
help="Database name",
62+
default="vectordbbench",
63+
),
64+
]
65+
66+
unix_socket: Annotated[
67+
str,
68+
click.option(
69+
"--unix-socket",
70+
type=str,
71+
help="Unix socket path (overrides host/port if set)",
72+
default="",
73+
),
74+
]
75+
76+
77+
class PolarDBHNSWTypedDict(PolarDBTypedDict):
78+
m: Annotated[
79+
int,
80+
click.option(
81+
"--m",
82+
type=int,
83+
help="M parameter (max_degree) in HNSW",
84+
default=16,
85+
),
86+
]
87+
88+
ef_construction: Annotated[
89+
int,
90+
click.option(
91+
"--ef-construction",
92+
type=int,
93+
help="ef_construction parameter in HNSW",
94+
default=200,
95+
),
96+
]
97+
98+
ef_search: Annotated[
99+
int,
100+
click.option(
101+
"--ef-search",
102+
type=int,
103+
help="polar_vector_index_hnsw_ef_search session variable",
104+
default=64,
105+
),
106+
]
107+
108+
insert_workers: Annotated[
109+
int,
110+
click.option(
111+
"--insert-workers",
112+
type=int,
113+
help="Number of concurrent threads for data insertion",
114+
default=10,
115+
),
116+
]
117+
118+
post_load_index: Annotated[
119+
bool,
120+
click.option(
121+
"--post-load-index/--inline-index",
122+
type=bool,
123+
help="If set, create vector index via ALTER TABLE after data load; otherwise create index inline during table creation",
124+
default=False,
125+
),
126+
]
127+
128+
129+
class PolarDBHNSWPQTypedDict(PolarDBHNSWTypedDict):
130+
pq_m: Annotated[
131+
int,
132+
click.option(
133+
"--pq-m",
134+
type=int,
135+
help="PQ subquantizer count (must divide dimension)",
136+
default=1,
137+
),
138+
]
139+
140+
pq_nbits: Annotated[
141+
int,
142+
click.option(
143+
"--pq-nbits",
144+
type=int,
145+
help="PQ bits per subquantizer (max 24)",
146+
default=8,
147+
),
148+
]
149+
150+
151+
class PolarDBHNSWSQTypedDict(PolarDBHNSWTypedDict):
152+
sq_type: Annotated[
153+
str,
154+
click.option(
155+
"--sq-type",
156+
type=str,
157+
help="SQ quantizer type (8bit, 4bit, fp16, bf16, 6bit, etc.)",
158+
default="8bit",
159+
),
160+
]
161+
162+
163+
def _build_db_config(parameters):
164+
from .config import PolarDBConfig
165+
166+
pwd = parameters["password"]
167+
sock = parameters["unix_socket"]
168+
return PolarDBConfig(
169+
db_label=parameters["db_label"],
170+
user_name=parameters["username"],
171+
password=SecretStr(pwd) if pwd else None,
172+
host=parameters["host"],
173+
port=parameters["port"],
174+
database=parameters["database"],
175+
unix_socket=sock if sock else None,
176+
)
177+
178+
179+
@cli.command()
180+
@click_parameter_decorators_from_typed_dict(PolarDBHNSWTypedDict)
181+
def PolarDBHNSWFlat(
182+
**parameters: Unpack[PolarDBHNSWTypedDict],
183+
):
184+
from .config import PolarDBHNSWFlatConfig
185+
186+
run(
187+
db=DB.PolarDB,
188+
db_config=_build_db_config(parameters),
189+
db_case_config=PolarDBHNSWFlatConfig(
190+
M=parameters["m"],
191+
ef_construction=parameters["ef_construction"],
192+
ef_search=parameters["ef_search"],
193+
insert_workers=parameters["insert_workers"],
194+
post_load_index=parameters["post_load_index"],
195+
),
196+
**parameters,
197+
)
198+
199+
200+
@cli.command()
201+
@click_parameter_decorators_from_typed_dict(PolarDBHNSWPQTypedDict)
202+
def PolarDBHNSWPQ(
203+
**parameters: Unpack[PolarDBHNSWPQTypedDict],
204+
):
205+
from .config import PolarDBHNSWPQConfig
206+
207+
run(
208+
db=DB.PolarDB,
209+
db_config=_build_db_config(parameters),
210+
db_case_config=PolarDBHNSWPQConfig(
211+
M=parameters["m"],
212+
ef_construction=parameters["ef_construction"],
213+
ef_search=parameters["ef_search"],
214+
insert_workers=parameters["insert_workers"],
215+
post_load_index=parameters["post_load_index"],
216+
pq_m=parameters["pq_m"],
217+
pq_nbits=parameters["pq_nbits"],
218+
),
219+
**parameters,
220+
)
221+
222+
223+
@cli.command()
224+
@click_parameter_decorators_from_typed_dict(PolarDBHNSWSQTypedDict)
225+
def PolarDBHNSWSQ(
226+
**parameters: Unpack[PolarDBHNSWSQTypedDict],
227+
):
228+
from .config import PolarDBHNSWSQConfig
229+
230+
run(
231+
db=DB.PolarDB,
232+
db_config=_build_db_config(parameters),
233+
db_case_config=PolarDBHNSWSQConfig(
234+
M=parameters["m"],
235+
ef_construction=parameters["ef_construction"],
236+
ef_search=parameters["ef_search"],
237+
insert_workers=parameters["insert_workers"],
238+
post_load_index=parameters["post_load_index"],
239+
sq_type=parameters["sq_type"],
240+
),
241+
**parameters,
242+
)

0 commit comments

Comments
 (0)