Skip to content

Commit 8b9ac96

Browse files
authored
Add nbits parameter to IVF_PQ index and adapt new filter logic (#576)
* support oceanbase * fix bug * fix bug * support IP * support multiple zone * support filter case * support ivf * support hnsw_bq and extra info * optimized formatting * optimized formatting * Revert dbCaseConfigs.py to match remote main branch * resolve review comments * Update __init__.py * Addressed review comments * Fixed code style issues * Add nbits parameter to IVF_PQ index and adapt new filter logic
1 parent e6ce1bb commit 8b9ac96

4 files changed

Lines changed: 33 additions & 5 deletions

File tree

vectordb_bench/backend/clients/oceanbase/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def OceanBaseIVF(**parameters: Unpack[OceanBaseIVFTypedDict]):
9393
m=input_m,
9494
nlist=parameters["nlist"],
9595
sample_per_nlist=parameters["sample_per_nlist"],
96+
nbits=parameters["nbits"],
9697
index=input_index_type,
9798
ivf_nprobes=parameters["ivf_nprobes"],
9899
),

vectordb_bench/backend/clients/oceanbase/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def search_param(self) -> dict:
8585
class OceanBaseIVFConfig(OceanBaseIndexConfig, DBCaseConfig):
8686
m: int
8787
sample_per_nlist: int
88+
nbits: int | None = None
8889
nlist: int
8990
index: IndexType
9091
ivf_nprobes: int | None = None
@@ -96,8 +97,9 @@ def index_param(self) -> dict:
9697
"metric_type": self.parse_metric(),
9798
"index_type": self.index.value,
9899
"params": {
99-
"m": self.M,
100+
"m": self.m,
100101
"sample_per_nlist": self.sample_per_nlist,
102+
"nbits": self.nbits,
101103
"nlist": self.nlist,
102104
},
103105
}

vectordb_bench/backend/clients/oceanbase/oceanbase.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
import mysql.connector as mysql
99

10+
from vectordb_bench.backend.filter import Filter, FilterOp
11+
1012
from ..api import IndexType, VectorDB
1113
from .config import OceanBaseConfigDict, OceanBaseHNSWConfig
1214

@@ -16,6 +18,12 @@
1618

1719

1820
class OceanBase(VectorDB):
21+
supported_filter_types: list[FilterOp] = [
22+
FilterOp.NonFilter,
23+
FilterOp.NumGE,
24+
FilterOp.StrEqual,
25+
]
26+
1927
def __init__(
2028
self,
2129
dim: int,
@@ -187,22 +195,30 @@ def insert_embeddings(
187195

188196
return insert_count, None
189197

198+
def prepare_filter(self, filters: Filter):
199+
if filters.type == FilterOp.NonFilter:
200+
self.expr = ""
201+
elif filters.type == FilterOp.NumGE:
202+
self.expr = f"WHERE id >= {filters.int_value}"
203+
elif filters.type == FilterOp.StrEqual:
204+
self.expr = f"WHERE id == '{filters.label_value}'"
205+
else:
206+
msg = f"Not support Filter for Oceanbase - {filters}"
207+
raise ValueError(msg)
208+
190209
def search_embedding(
191210
self,
192211
query: list[float],
193212
k: int = 100,
194-
filters: dict[str, Any] | None = None,
195-
timeout: int | None = None,
196213
) -> list[int]:
197214
if not self._cursor:
198215
raise ValueError("Cursor is not initialized")
199216

200217
packed = struct.pack(f"<{len(query)}f", *query)
201218
hex_vec = packed.hex()
202-
filter_clause = f"WHERE id >= {filters['id']}" if filters else ""
203219
query_str = (
204220
f"SELECT id FROM {self.table_name} " # noqa: S608
205-
f"{filter_clause} ORDER BY "
221+
f"{self.expr} ORDER BY "
206222
f"{self.db_case_config.parse_metric_func_str()}(embedding, X'{hex_vec}') "
207223
f"APPROXIMATE LIMIT {k}"
208224
)

vectordb_bench/cli/cli.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,15 @@ class OceanBaseIVFTypedDict(TypedDict):
501501
int | None,
502502
click.option("--nlist", "nlist", type=int, help="Number of cluster centers", required=True),
503503
]
504+
nbits: Annotated[
505+
int | None,
506+
click.option(
507+
"--nbits",
508+
"nbits",
509+
type=int,
510+
help="Number of bits used to encode the index of a sub-vector's centroid in the compressed representation",
511+
),
512+
]
504513
sample_per_nlist: Annotated[
505514
int | None,
506515
click.option(

0 commit comments

Comments
 (0)