Skip to content

Commit e83a6eb

Browse files
author
ningshuo
committed
feat: add support for Lindorm
1 parent 34e8f25 commit e83a6eb

9 files changed

Lines changed: 980 additions & 0 deletions

File tree

vectordb_bench/backend/clients/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ class DB(Enum):
5858
TurboPuffer = "TurboPuffer"
5959
Zvec = "Zvec"
6060
Endee = "Endee"
61+
Lindorm = "Lindorm"
6162

6263
@property
6364
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
@@ -240,6 +241,11 @@ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
240241

241242
return Zvec
242243

244+
if self == DB.Lindorm:
245+
from .lindorm.lindorm_search import LindormVector
246+
247+
return LindormVector
248+
243249
msg = f"Unknown DB: {self.name}"
244250
raise ValueError(msg)
245251

@@ -424,6 +430,11 @@ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901, PLR0915
424430

425431
return ZvecConfig
426432

433+
if self == DB.Lindorm:
434+
from .lindorm.config import LindormConfig
435+
436+
return LindormConfig
437+
427438
msg = f"Unknown DB: {self.name}"
428439
raise ValueError(msg)
429440

@@ -585,6 +596,10 @@ def case_config_cls( # noqa: C901, PLR0911, PLR0912, PLR0915
585596

586597
return ChromaIndexConfig
587598

599+
if self == DB.Lindorm:
600+
from .lindorm.config import _lindorm_vector_case_config
601+
return _lindorm_vector_case_config.get(index_type)
602+
588603
# DB.Pinecone, DB.Redis
589604
return EmptyDBCaseConfig
590605

vectordb_bench/backend/clients/api.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class IndexType(str, Enum):
2626
STREAMING_DISKANN = "DISKANN"
2727
IVFFlat = "IVF_FLAT"
2828
IVFPQ = "IVF_PQ"
29+
IVFBQ = "IVF_BQ"
2930
IVFSQ8 = "IVF_SQ8"
3031
IVF_RABITQ = "IVF_RABITQ"
3132
Flat = "FLAT"
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
from typing import Annotated, Unpack
2+
3+
import click
4+
from pydantic import SecretStr
5+
6+
from vectordb_bench.cli.cli import (
7+
CommonTypedDict,
8+
cli,
9+
click_parameter_decorators_from_typed_dict,
10+
run, HNSWFlavor3, IVFFlatTypedDictN,
11+
)
12+
from vectordb_bench.backend.clients import DB
13+
14+
15+
class LindormTypedDict(CommonTypedDict):
16+
host: Annotated[
17+
str, click.option("--host", type=str, help="host connection string", required=True)
18+
]
19+
20+
port: Annotated[int, click.option("--port", type=int, default=30070, help="Db Port")]
21+
22+
user: Annotated[
23+
str, click.option("--user", type=str, help="Db username", required=True)
24+
]
25+
26+
password: Annotated[str, click.option("--password", type=str, help="Db password")]
27+
28+
index_name: Annotated[str, click.option("--index-name", type=str, help="Db index name", required=True)]
29+
30+
filter_type: Annotated[
31+
str, click.option("--filter-type", type=str, help="post_filter|pre_filter|efficient_filter", required=False)]
32+
33+
34+
class LindormHNSWTypedDict(CommonTypedDict, LindormTypedDict, HNSWFlavor3):
35+
...
36+
37+
38+
@cli.command()
39+
@click_parameter_decorators_from_typed_dict(LindormHNSWTypedDict)
40+
def LindormHNSW(**parameters: Unpack[LindormHNSWTypedDict]):
41+
from .config import HNSWConfig, LindormConfig
42+
run(
43+
db=DB.Lindorm,
44+
db_config=LindormConfig(
45+
host=parameters["host"],
46+
port=parameters["port"],
47+
user=parameters["user"],
48+
password=SecretStr(parameters["password"]),
49+
index_name=parameters["index_name"],
50+
),
51+
db_case_config=HNSWConfig(
52+
M=parameters["m"],
53+
efConstruction=parameters["ef_construction"],
54+
efSearch=parameters["ef_search"],
55+
filter_type=parameters["filter_type"],
56+
),
57+
**parameters,
58+
)
59+
60+
class LindormIVFBQTypedMinDict(CommonTypedDict, LindormTypedDict, IVFFlatTypedDictN):
61+
exbits: Annotated[
62+
str, click.option("--exbits",
63+
type=int, help="Exbits",
64+
required=True)
65+
]
66+
67+
class LindormIVFPQTypedDict(CommonTypedDict, LindormTypedDict, IVFFlatTypedDictN, HNSWFlavor3):
68+
reorder_factor: Annotated[str, click.option("--reorder-factor", type=str, help="reorder factor", required=False)]
69+
70+
client_refactor: Annotated[
71+
bool, click.option("--client-refactor", type=bool, help="client refactor", required=False)]
72+
73+
k_expand_scope: Annotated[
74+
int, click.option("--k-expand-scope", type=int, help="k expand scope", required=False)
75+
]
76+
77+
@cli.command()
78+
@click_parameter_decorators_from_typed_dict(LindormIVFPQTypedDict)
79+
def LindormIVFPQ(**parameters: Unpack[LindormIVFPQTypedDict]):
80+
from .config import IVFPQConfig, LindormConfig
81+
run(
82+
db=DB.Lindorm,
83+
db_config=LindormConfig(
84+
host=parameters["host"],
85+
port=parameters["port"],
86+
user=parameters["user"],
87+
password=SecretStr(parameters["password"]),
88+
index_name=parameters["index_name"],
89+
),
90+
db_case_config=IVFPQConfig(
91+
nlist=parameters["nlist"],
92+
nprobe=parameters["nprobe"],
93+
centroids_hnsw_M=parameters["m"],
94+
centroids_hnsw_efConstruction=parameters["ef_construction"],
95+
centroids_hnsw_efSearch=parameters["ef_search"],
96+
filter_type=parameters["filter_type"],
97+
reorder_factor=parameters["reorder_factor"],
98+
client_refactor=parameters["client_refactor"],
99+
k_expand_scope=parameters["k_expand_scope"],
100+
),
101+
**parameters,
102+
)
103+
104+
class LindormIVFBQTypedDict(CommonTypedDict, LindormTypedDict, LindormIVFBQTypedMinDict, HNSWFlavor3):
105+
reorder_factor: Annotated[str, click.option("--reorder-factor", type=str, help="reorder factor", required=False)]
106+
107+
client_refactor: Annotated[
108+
bool, click.option("--client-refactor", type=bool, help="client refactor", required=False)]
109+
110+
k_expand_scope: Annotated[
111+
int, click.option("--k-expand-scope", type=int, help="k expand scope", required=False)
112+
]
113+
114+
@cli.command()
115+
@click_parameter_decorators_from_typed_dict(LindormIVFBQTypedDict)
116+
def LindormIVFBQ(**parameters: Unpack[LindormIVFBQTypedDict]):
117+
from .config import IVFBQConfig, LindormConfig
118+
run(
119+
db=DB.Lindorm,
120+
db_config=LindormConfig(
121+
host=parameters["host"],
122+
port=parameters["port"],
123+
user=parameters["user"],
124+
password=SecretStr(parameters["password"]),
125+
index_name=parameters["index_name"],
126+
),
127+
db_case_config=IVFBQConfig(
128+
nlist=parameters["nlist"],
129+
exbits=parameters["exbits"],
130+
nprobe=parameters["nprobe"],
131+
centroids_hnsw_M=parameters["m"],
132+
centroids_hnsw_efConstruction=parameters["ef_construction"],
133+
centroids_hnsw_efSearch=parameters["ef_search"],
134+
filter_type=parameters["filter_type"],
135+
reorder_factor=parameters["reorder_factor"],
136+
client_refactor=parameters["client_refactor"],
137+
k_expand_scope=parameters["k_expand_scope"],
138+
),
139+
**parameters,
140+
)
141+
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import click
2+
from pydantic import SecretStr, validator
3+
from vectordb_bench.base import BaseModel
4+
from ..api import DBConfig, DBCaseConfig, IndexType, MetricType
5+
6+
7+
class LindormConfig(DBConfig):
8+
# Implement the required configuration fields for the database connection
9+
# ...
10+
host: str = ""
11+
port: int = 30070
12+
user: str = ""
13+
password: SecretStr = ""
14+
index_name: str = ""
15+
16+
def to_dict(self) -> dict:
17+
return {
18+
"hosts": [{'host': self.host, 'port': self.port}],
19+
"http_auth": (self.user, self.password.get_secret_value()),
20+
"use_ssl": False,
21+
"http_compress": False,
22+
"verify_certs": False,
23+
"ssl_assert_hostname": False,
24+
"ssl_show_warn": False,
25+
"timeout": 600,
26+
"index_name": self.index_name
27+
}
28+
29+
30+
class LindormIndexConfig(BaseModel):
31+
index: IndexType
32+
metric_type: MetricType | None = MetricType.L2
33+
34+
def parse_metric(self) -> str:
35+
if self.metric_type == MetricType.IP:
36+
return "innerproduct"
37+
elif self.metric_type == MetricType.COSINE:
38+
return "cosinesimil"
39+
return "l2"
40+
41+
42+
class HNSWConfig(LindormIndexConfig, DBCaseConfig):
43+
index: IndexType = IndexType.HNSW
44+
M: int | None
45+
efConstruction: int | None
46+
efSearch: int | None
47+
filter_type: str | None = "efficient_filter"
48+
k_expand_scope: int | None = 1000
49+
50+
def index_param(self, dim: int|None = None) -> dict:
51+
return {
52+
"engine": "lvector",
53+
"name": "hnswq",
54+
"space_type": self.parse_metric(),
55+
"parameters": {
56+
"m": dim if dim is not None else self.M,
57+
"ef_construction": self.efConstruction,
58+
}
59+
}
60+
61+
def search_param(self, do_filter: bool = False) -> dict:
62+
search_ext_param = {
63+
"lvector": {
64+
"ef_search": str(self.efSearch)
65+
}
66+
}
67+
if do_filter:
68+
search_ext_param["lvector"]["filter_type"] = self.filter_type
69+
if self.filter_type == "efficient_filter":
70+
search_ext_param["lvector"]["k_expand_scope"] = str(self.k_expand_scope)
71+
return search_ext_param
72+
73+
# first layer searching for cluster centroids is hnsw
74+
class IVFPQConfig(LindormIndexConfig, DBCaseConfig):
75+
index: IndexType = IndexType.IVFPQ
76+
nlist: int | None
77+
nprobe: int | None
78+
# search parameters
79+
centroids_hnsw_M: int | None
80+
centroids_hnsw_efConstruction: int | None
81+
centroids_hnsw_efSearch: int | None
82+
filter_type: str | None = "efficient_filter"
83+
84+
reorder_factor: int | None = 10
85+
client_refactor: bool = False
86+
k_expand_scope: int | None = 1000
87+
88+
def index_param(self) -> dict:
89+
return {
90+
"engine": "lvector",
91+
"name": "ivfpq",
92+
"space_type": self.parse_metric(),
93+
"parameters": {
94+
"nlist": self.nlist,
95+
"centroids_use_hnsw": True,
96+
"centroids_hnsw_m": self.centroids_hnsw_M,
97+
"centroids_hnsw_ef_construct": self.centroids_hnsw_efConstruction,
98+
"centroids_hnsw_ef_search": self.centroids_hnsw_efSearch
99+
}
100+
}
101+
102+
def search_param(self, do_filter: bool = False) -> dict:
103+
search_ext_param = {
104+
"lvector": {
105+
"nprobe": str(self.nprobe),
106+
"reorder_factor": str(self.reorder_factor),
107+
"client_refactor": str(self.client_refactor),
108+
"ef_search": str(self.centroids_hnsw_efSearch),
109+
}
110+
}
111+
if do_filter:
112+
search_ext_param["lvector"]["filter_type"] = self.filter_type
113+
if self.filter_type == "efficient_filter":
114+
search_ext_param["lvector"]["k_expand_scope"] = str(self.k_expand_scope)
115+
return search_ext_param
116+
117+
118+
class IVFBQConfig(LindormIndexConfig, DBCaseConfig):
119+
index: IndexType = IndexType.IVFBQ
120+
nlist: int | None
121+
exbits: int | None
122+
nprobe: int | None
123+
# search parameters
124+
centroids_hnsw_M: int | None
125+
centroids_hnsw_efConstruction: int | None
126+
centroids_hnsw_efSearch: int | None
127+
filter_type: str | None = "efficient_filter"
128+
129+
reorder_factor: int | None = 10
130+
client_refactor: bool = False
131+
k_expand_scope: int | None = 1000
132+
133+
def index_param(self) -> dict:
134+
return {
135+
"engine": "lvector",
136+
"name": "ivfbq",
137+
"space_type": self.parse_metric(),
138+
"parameters": {
139+
"nlist": self.nlist,
140+
"exbits": self.exbits,
141+
"centroids_use_hnsw": True,
142+
"centroids_hnsw_m": self.centroids_hnsw_M,
143+
"centroids_hnsw_ef_construct": self.centroids_hnsw_efConstruction,
144+
"centroids_hnsw_ef_search": self.centroids_hnsw_efSearch
145+
}
146+
}
147+
148+
def search_param(self, do_filter: bool = False) -> dict:
149+
search_ext_param = {
150+
"lvector": {
151+
"nprobe": str(self.nprobe),
152+
"reorder_factor": str(self.reorder_factor),
153+
"client_refactor": str(self.client_refactor),
154+
}
155+
}
156+
if do_filter:
157+
search_ext_param["lvector"]["filter_type"] = self.filter_type
158+
if self.filter_type == "efficient_filter":
159+
search_ext_param["lvector"]["k_expand_scope"] = str(self.k_expand_scope)
160+
return search_ext_param
161+
162+
_lindorm_vector_case_config = {
163+
IndexType.HNSW: HNSWConfig,
164+
IndexType.IVFPQ: IVFPQConfig,
165+
IndexType.IVFBQ: IVFBQConfig
166+
}

0 commit comments

Comments
 (0)