Skip to content

Commit d035c62

Browse files
committed
enhance: fix coding styles
Signed-off-by: yangxuan <xuan.yang@zilliz.com>
1 parent 3e883e1 commit d035c62

10 files changed

Lines changed: 46 additions & 93 deletions

File tree

vectordb_bench/backend/clients/alisql/alisql.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,12 @@ def _create_db_table(self, dim: int):
7575
log.info(f"{self.name} client create table : {self.table_name}")
7676
self.cursor.execute(f'USE {self.db_config["database"]}')
7777

78-
self.cursor.execute(
79-
f"""
78+
self.cursor.execute(f"""
8079
CREATE TABLE {self.table_name} (
8180
id INT PRIMARY KEY,
8281
v VECTOR({self.dim}) NOT NULL
8382
)
84-
"""
85-
)
83+
""")
8684
self.cursor.execute("COMMIT")
8785

8886
except Exception as e:
@@ -143,12 +141,10 @@ def optimize(self, data_size: int) -> None:
143141
if index_param["index_type"] == "HNSW" and index_param["M"] is not None:
144142
index_options += f" M={index_param['M']}"
145143

146-
self.cursor.execute(
147-
f"""
144+
self.cursor.execute(f"""
148145
ALTER TABLE {self.db_config["database"]}.{self.table_name}
149146
ADD VECTOR KEY v(v) {index_options}
150-
"""
151-
)
147+
""")
152148
self.cursor.execute("COMMIT")
153149

154150
except Exception as e:

vectordb_bench/backend/clients/cockroachdb/cockroachdb.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -315,14 +315,12 @@ def _create_table(self, cursor: Cursor, conn: Connection, dim: int):
315315
)
316316
else:
317317
cursor.execute(
318-
sql.SQL(
319-
"""
318+
sql.SQL("""
320319
CREATE TABLE IF NOT EXISTS {table_name}
321320
({primary_field} UUID PRIMARY KEY DEFAULT gen_random_uuid(),
322321
{metadata_field} BIGINT NOT NULL,
323322
{vector_field} VECTOR({dim}));
324-
"""
325-
).format(
323+
""").format(
326324
table_name=sql.Identifier(self.table_name),
327325
primary_field=sql.Identifier(self._primary_field),
328326
metadata_field=sql.Identifier(self._metadata_field),

vectordb_bench/backend/clients/hologres/hologres.py

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,9 @@ def _vacuum(self):
170170
self.conn.autocommit = True
171171
with self.conn.cursor() as cursor:
172172
cursor.execute(
173-
sql.SQL(
174-
"""
173+
sql.SQL("""
175174
VACUUM {table_name};
176-
"""
177-
).format(
175+
""").format(
178176
table_name=sql.Identifier(self.table_name),
179177
)
180178
)
@@ -193,14 +191,12 @@ def _analyze(self):
193191
def _full_compact(self):
194192
log.info(f"{self.name} client full compact table : {self.table_name}")
195193
self.cursor.execute(
196-
sql.SQL(
197-
"""
194+
sql.SQL("""
198195
SELECT hologres.hg_full_compact_table(
199196
'{table_name}',
200197
'max_file_size_mb={full_compact_max_file_size_mb}'
201198
);
202-
"""
203-
).format(
199+
""").format(
204200
table_name=sql.SQL(self.table_name),
205201
full_compact_max_file_size_mb=sql.SQL(str(self.case_config.full_compact_max_file_size_mb)),
206202
)
@@ -211,17 +207,15 @@ def _create_index(self):
211207
assert self.conn is not None, "Connection is not initialized"
212208
assert self.cursor is not None, "Cursor is not initialized"
213209

214-
sql_index = sql.SQL(
215-
"""
210+
sql_index = sql.SQL("""
216211
CALL set_table_property ('{table_name}', 'vectors', '{{
217212
"embedding": {{
218213
"algorithm": "{algorithm}",
219214
"distance_method": "{distance_method}",
220215
"builder_params": {builder_params}
221216
}}
222217
}}');
223-
"""
224-
).format(
218+
""").format(
225219
table_name=sql.Identifier(self.table_name),
226220
algorithm=sql.SQL(self.case_config.algorithm()),
227221
distance_method=sql.SQL(self.case_config.distance_method()),
@@ -256,15 +250,13 @@ def _set_replica_count(self, replica_count: int = 2):
256250
sql_get_warehouse_name = sql.SQL("select current_warehouse();")
257251
log.info(f"get warehouse name with sql: {sql_get_warehouse_name}")
258252
self.cursor.execute(sql_get_warehouse_name)
259-
sql_tg_replica = sql.SQL(
260-
"""
253+
sql_tg_replica = sql.SQL("""
261254
CALL hg_table_group_set_warehouse_replica_count (
262255
'{dbname}.{tg_name}',
263256
{replica_count},
264257
'{warehouse_name}'
265258
);
266-
"""
267-
).format(
259+
""").format(
268260
tg_name=sql.SQL(self._tg_name),
269261
warehouse_name=sql.SQL(self.cursor.fetchone()[0]),
270262
dbname=sql.SQL(self.db_config["dbname"]),
@@ -292,15 +284,13 @@ def _create_table(self, dim: int):
292284

293285
self._set_replica_count(replica_count=2)
294286

295-
sql_table = sql.SQL(
296-
"""
287+
sql_table = sql.SQL("""
297288
CREATE TABLE IF NOT EXISTS {table_name} (
298289
id BIGINT PRIMARY KEY,
299290
embedding FLOAT4[] CHECK (array_ndims(embedding) = 1 AND array_length(embedding, 1) = {dim})
300291
)
301292
WITH (table_group = {tg_name});
302-
"""
303-
).format(
293+
""").format(
304294
table_name=sql.Identifier(self.table_name),
305295
dim=dim,
306296
tg_name=sql.SQL(self._tg_name),
@@ -351,16 +341,14 @@ def _compose_query_and_params(self, vec: list[float], topk: int, ge_id: int | No
351341
params.append(vec_float4)
352342
params.append(topk)
353343

354-
query = sql.SQL(
355-
"""
344+
query = sql.SQL("""
356345
SELECT id
357346
FROM {table_name}
358347
{where_clause}
359348
ORDER BY {distance_function}(embedding, %b)
360349
{order_direction}
361350
LIMIT %s;
362-
"""
363-
).format(
351+
""").format(
364352
table_name=sql.Identifier(self.table_name),
365353
distance_function=sql.SQL(self.case_config.distance_function()),
366354
where_clause=where_clause,

vectordb_bench/backend/clients/mariadb/mariadb.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,12 @@ def _create_db_table(self, dim: int):
7373
log.info(f"{self.name} client create table : {self.table_name}")
7474
self.cursor.execute(f"USE {self.db_name}")
7575

76-
self.cursor.execute(
77-
f"""
76+
self.cursor.execute(f"""
7877
CREATE TABLE {self.table_name} (
7978
id INT PRIMARY KEY,
8079
v VECTOR({self.dim}) NOT NULL
8180
) ENGINE={index_param["storage_engine"]}
82-
"""
83-
)
81+
""")
8482
self.cursor.execute("COMMIT")
8583

8684
except Exception as e:
@@ -142,12 +140,10 @@ def optimize(self) -> None:
142140
if index_param["index_type"] == "HNSW" and index_param["M"] is not None:
143141
index_options += f" M={index_param['M']}"
144142

145-
self.cursor.execute(
146-
f"""
143+
self.cursor.execute(f"""
147144
ALTER TABLE {self.db_name}.{self.table_name}
148145
ADD VECTOR KEY v(v) {index_options}
149-
"""
150-
)
146+
""")
151147
self.cursor.execute("COMMIT")
152148

153149
except Exception as e:

vectordb_bench/backend/clients/pgdiskann/pgdiskann.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,7 @@ def init(self) -> Generator[None, None, None]:
105105

106106
if search_params.get("reranking"):
107107
# Reranking-enabled queries
108-
self._filtered_search = sql.SQL(
109-
"""
108+
self._filtered_search = sql.SQL("""
110109
SELECT i.id
111110
FROM (
112111
SELECT id, embedding
@@ -117,16 +116,14 @@ def init(self) -> Generator[None, None, None]:
117116
) i
118117
ORDER BY i.embedding {reranking_metric_fun_op} %s::vector
119118
LIMIT %s::int
120-
"""
121-
).format(
119+
""").format(
122120
table_name=sql.Identifier(self.table_name),
123121
metric_fun_op=sql.SQL(search_params["metric_fun_op"]),
124122
reranking_metric_fun_op=sql.SQL(search_params["reranking_metric_fun_op"]),
125123
quantized_fetch_limit=sql.Literal(search_params["quantized_fetch_limit"]),
126124
)
127125

128-
self._unfiltered_search = sql.SQL(
129-
"""
126+
self._unfiltered_search = sql.SQL("""
130127
SELECT i.id
131128
FROM (
132129
SELECT id, embedding
@@ -136,8 +133,7 @@ def init(self) -> Generator[None, None, None]:
136133
) i
137134
ORDER BY i.embedding {reranking_metric_fun_op} %s::vector
138135
LIMIT %s::int
139-
"""
140-
).format(
136+
""").format(
141137
table_name=sql.Identifier(self.table_name),
142138
metric_fun_op=sql.SQL(search_params["metric_fun_op"]),
143139
reranking_metric_fun_op=sql.SQL(search_params["reranking_metric_fun_op"]),

vectordb_bench/backend/clients/pgvector/pgvector.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -410,12 +410,10 @@ def _create_table(self, dim: int):
410410
)
411411
else:
412412
self.cursor.execute(
413-
sql.SQL(
414-
"""
413+
sql.SQL("""
415414
CREATE TABLE IF NOT EXISTS public.{table_name}
416415
({primary_field} BIGINT PRIMARY KEY, embedding {table_quantization_type}({dim}));
417-
"""
418-
).format(
416+
""").format(
419417
table_name=sql.Identifier(self.table_name),
420418
table_quantization_type=sql.SQL(index_param["table_quantization_type"]),
421419
dim=dim,

vectordb_bench/backend/clients/tidb/tidb.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,13 @@ def _create_table(self):
6868
try:
6969
index_param = self.case_config.index_param()
7070
with self._get_connection() as (conn, cursor):
71-
cursor.execute(
72-
f"""
71+
cursor.execute(f"""
7372
CREATE TABLE {self.table_name} (
7473
id BIGINT PRIMARY KEY,
7574
embedding VECTOR({self.dim}) NOT NULL,
7675
VECTOR INDEX (({index_param["metric_fn"]}(embedding)))
7776
);
78-
"""
79-
)
77+
""")
8078
conn.commit()
8179
except Exception as e:
8280
log.warning("Failed to create table: %s error: %s", self.table_name, e)
@@ -118,12 +116,10 @@ def _optimize_check_tiflash_replica_progress(self):
118116
try:
119117
database = self.db_config["database"]
120118
with self._get_connection() as (_, cursor):
121-
cursor.execute(
122-
f"""
119+
cursor.execute(f"""
123120
SELECT PROGRESS FROM information_schema.tiflash_replica
124121
WHERE TABLE_SCHEMA = "{database}" AND TABLE_NAME = "{self.table_name}"
125-
""" # noqa: S608
126-
)
122+
""") # noqa: S608
127123
result = cursor.fetchone()
128124
return result[0]
129125
except Exception as e:
@@ -155,13 +151,11 @@ def _optimize_get_tiflash_index_pending_rows(self):
155151
try:
156152
database = self.db_config["database"]
157153
with self._get_connection() as (_, cursor):
158-
cursor.execute(
159-
f"""
154+
cursor.execute(f"""
160155
SELECT SUM(ROWS_STABLE_NOT_INDEXED)
161156
FROM information_schema.tiflash_indexes
162157
WHERE TIDB_DATABASE = "{database}" AND TIDB_TABLE = "{self.table_name}"
163-
""" # noqa: S608
164-
)
158+
""") # noqa: S608
165159
result = cursor.fetchone()
166160
return result[0]
167161
except Exception as e:
@@ -223,11 +217,9 @@ def search_embedding(
223217
timeout: int | None = None,
224218
**kwargs: Any,
225219
) -> list[int]:
226-
self.cursor.execute(
227-
f"""
220+
self.cursor.execute(f"""
228221
SELECT id FROM {self.table_name}
229222
ORDER BY {self.search_fn}(embedding, "{query!s}") LIMIT {k};
230-
""" # noqa: S608
231-
)
223+
""") # noqa: S608
232224
result = self.cursor.fetchall()
233225
return [int(i[0]) for i in result]

vectordb_bench/frontend/components/custom/displaypPrams.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
def displayParams(st):
2-
st.markdown(
3-
"""
2+
st.markdown("""
43
- `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
54
- Vectors data files: The file should have two kinds of columns: `id` as an incrementing `int` and `emb` as an array of `float32`. The name of two columns could be defined on your own.
65
- Query test vectors: The file could be named on your own and should have two kinds of columns: `id` as an incrementing `int` and `emb` as an array of `float32`. The `id` column must be named as `id`, and `emb` column could be defined on your own.
@@ -14,8 +13,7 @@ def displayParams(st):
1413
1514
- `Label percentages` - If you have filter file, please input label percentage you want to real run and `split with ','` when it's `more than one`. If you `don't have` filter file, than `keep the text vacant.`
1615
17-
"""
18-
)
16+
""")
1917
st.caption(
2018
"""We recommend limiting the number of test query vectors, like 1,000.""",
2119
help="""

vectordb_bench/frontend/components/welcome/explainPrams.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,20 @@
11
def explainPrams(st):
22
st.markdown("## descriptions")
33
st.markdown("### 1. Overview")
4-
st.markdown(
5-
"""
4+
st.markdown("""
65
- **VectorDBBench(VDBBench)** is an open-source benchmarking tool designed specifically for vector databases. Its main features include:
76
- (1) An easy-to-use **web UI** for configuration of tests and visual analysis of results.
87
- (2) A comprehensive set of **standards for testing and metric collection**.
98
- (3) Support for **various scenarios**, including additional support for **Filter** and **Streaming** based on standard tests.
109
- VDBBench embraces open-source and welcome contributions of code and test result submissions. The testing process and extended scenarios of VDBBench, as well as the intention behind our design will be introduced as follows.
11-
"""
12-
)
10+
""")
1311
st.markdown("### 2. Dataset")
14-
st.markdown(
15-
"""
12+
st.markdown("""
1613
- We provide two embedding datasets:
1714
- (1)*[Cohere 768dim](https://huggingface.co/datasets/Cohere/wikipedia-22-12)*, generated using the **Cohere** model based on the Wikipedia corpus.
1815
- (2)*[Cohere 1024dim](https://huggingface.co/datasets/Cohere/beir-embed-english-v3)*, generated using the **Cohere** embed-english-v3.0 model based on the bioasq corpus.
1916
- (3)*OpenAI 1536dim*, generated using the **OpenAI** model based on the [C4 corpus](https://huggingface.co/datasets/legacy-datasets/c4).
20-
"""
21-
)
17+
""")
2218
st.markdown("### 3. Standard Test")
2319
st.markdown(
2420
"""
@@ -43,15 +39,12 @@ def explainPrams(st):
4339
unsafe_allow_html=True,
4440
)
4541
st.markdown("### 4. Filter Search Test")
46-
st.markdown(
47-
"""
42+
st.markdown("""
4843
- Compared to the Standard Test, the **Filter Search** introduces additional scalar constraints (e.g. **color == red**) during the Search Test. Different **filter_ratios** present varying levels of challenge to the VectorDB's search performance.
4944
- We provide an additional **string column** containing 10 labels with different distribution ratios (50%,20%,10%,5%,2%,1%,0.5%,0.2%,0.1%). For each label, we conduct both a **Serial Test** and a **Concurrency Test** to observe the VectorDB's performance in terms of **QPS, latency, and recall** under different filtering conditions.
50-
"""
51-
)
45+
""")
5246
st.markdown("### 5. Streaming Search Test")
53-
st.markdown(
54-
"""
47+
st.markdown("""
5548
Different from Standard's load and search separation, Streaming Search Test primarily focuses on **search performance during the insertion process**.
5649
Different **base dataset sizes** and varying **insertion rates** set distinct challenges to the VectorDB's search capabilities.
5750
VDBBench will send insert requests at a **fixed rate**, maintaining consistent insertion pressure. The search test consists of three steps as follows:
@@ -62,5 +55,4 @@ def explainPrams(st):
6255
- Note: at this time, the insertion pressure drops to zero since data insertion is complete.
6356
- 3.**Optimized Search (Optional)**
6457
- Users can optionally perform an additional optimization step followed by a Serial Test and a Concurrent Test, recording qps, latency, and recall performance. This step **compares performance in Streaming section with the theoretically optimal performance**.
65-
"""
66-
)
58+
""")

vectordb_bench/frontend/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import random
22
import string
33

4-
54
passwordKeys = ["password", "api_key"]
65

76

0 commit comments

Comments
 (0)