Skip to content

Commit fb06d68

Browse files
tae898Copilot
andcommitted
feat(vector-search): update vector query syntax to use parameterized inputs and add tests for parameterized neighbors
Co-authored-by: Copilot <copilot@github.com>
1 parent 9d2d82d commit fb06d68

5 files changed

Lines changed: 59 additions & 25 deletions

File tree

bindings/python/examples/03_vector_search.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -257,22 +257,21 @@ def create_mock_embedding(category_seed, doc_seed):
257257

258258
for query_num, cat_num in enumerate(sampled_categories, 1):
259259
category = f"category_{cat_num}"
260+
index_name = "Article[embedding]"
260261

261262
print(f" 🔍 Query {query_num}: Find documents similar to Category {cat_num}")
262263
print()
263264

264265
query_embedding = create_mock_embedding(category, f"query{query_num}")
265-
266-
qvec_literal = (
267-
"[" + ", ".join(str(float(x)) for x in query_embedding.tolist()) + "]"
268-
)
269266
most_similar = db.query(
270267
"sql",
271268
(
272269
"SELECT title, category, distance, (1 - distance) AS score "
273-
"FROM (SELECT expand(vectorNeighbors('Article[embedding]', "
274-
f"{qvec_literal}, 5))) ORDER BY distance"
270+
"FROM (SELECT expand(vectorNeighbors(?, ?, ?))) ORDER BY distance"
275271
),
272+
index_name,
273+
query_embedding,
274+
5,
276275
).to_list()
277276

278277
print(" Top 5 MOST similar documents (smallest distance):")
@@ -288,9 +287,12 @@ def create_mock_embedding(category_seed, doc_seed):
288287
"sql",
289288
(
290289
"SELECT title, category, distance, (1 - distance) AS score "
291-
"FROM (SELECT expand(vectorNeighbors('Article[embedding]', "
292-
f"{qvec_literal}, 50))) WHERE category = ? ORDER BY distance LIMIT 5"
290+
"FROM (SELECT expand(vectorNeighbors(?, ?, ?))) "
291+
"WHERE category = ? ORDER BY distance LIMIT 5"
293292
),
293+
index_name,
294+
query_embedding,
295+
50,
294296
category,
295297
).to_list()
296298

bindings/python/examples/06_vector_search_recommendations.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ def vector_based_recommendations(db, model, movie_title, property_suffix="", lim
377377
Query time in seconds
378378
"""
379379
embedding_prop = f"embedding{property_suffix}"
380+
index_name = f"Movie[{embedding_prop}]"
380381

381382
# Find the movie to get its genres
382383
movies = list(
@@ -402,18 +403,16 @@ def vector_based_recommendations(db, model, movie_title, property_suffix="", lim
402403
)
403404

404405
start_time = time.time()
405-
qvec_literal = (
406-
"[" + ", ".join(str(float(x)) for x in query_embedding.tolist()) + "]"
407-
)
408406
rows = db.query(
409407
"sql",
410408
(
411409
"SELECT title, distance, (1 - distance) AS score "
412-
"FROM (SELECT expand(vectorNeighbors('Movie["
413-
f"{embedding_prop}]', "
414-
f"{qvec_literal}, {int(limit + 5)}))) WHERE title <> ? "
410+
"FROM (SELECT expand(vectorNeighbors(?, ?, ?))) WHERE title <> ? "
415411
"ORDER BY distance LIMIT ?"
416412
),
413+
index_name,
414+
query_embedding,
415+
int(limit + 5),
417416
movie_title,
418417
limit,
419418
).to_list()

bindings/python/examples/12_vector_search.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -590,19 +590,16 @@ def _extract_result_id(rec) -> int | None:
590590

591591
db = index["db"]
592592
index_name = index["name"]
593-
query_literals = [vector_to_arcadedb_literal(qvec) for qvec in queries]
594593

595594
for q_idx, qid in enumerate(qids):
596-
sql = (
597-
"SELECT vectorNeighbors("
598-
f"'{index_name}', {query_literals[q_idx]}, {int(k)}, {int(ef_search)}"
599-
") as res"
600-
)
601-
602595
start = time.perf_counter()
603596
row = db.query(
604597
"sql",
605-
sql,
598+
"SELECT vectorNeighbors(?, ?, ?, ?) as res",
599+
index_name,
600+
queries[q_idx],
601+
int(k),
602+
int(ef_search),
606603
).first()
607604
neighbors = row.get("res") if row else []
608605
result_ids: List[int] = []

bindings/python/src/arcadedb_embedded/vector.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@
1010
from .exceptions import ArcadeDBError
1111

1212

13+
def _quote_identifier(identifier: str) -> str:
14+
if not identifier:
15+
raise ArcadeDBError("SQL identifier cannot be empty")
16+
17+
return "`" + identifier.replace("`", "``") + "`"
18+
19+
1320
def to_java_float_array(vector):
1421
"""
1522
Convert a Python array-like object to a Java float array.
@@ -216,6 +223,9 @@ def _lookup_query_vector_by_key(self, key):
216223
vector_property = self._get_vector_property_name()
217224
type_name = self._get_type_name()
218225
id_property = self._get_id_property_name()
226+
quoted_vector_property = _quote_identifier(vector_property)
227+
quoted_type_name = _quote_identifier(type_name)
228+
quoted_id_property = _quote_identifier(id_property)
219229

220230
result = None
221231
try:
@@ -227,8 +237,8 @@ def _lookup_query_vector_by_key(self, key):
227237
result = self._database.query(
228238
"sql",
229239
(
230-
f"SELECT {vector_property} FROM {type_name} "
231-
f"WHERE {id_property} = ? LIMIT 1"
240+
f"SELECT {quoted_vector_property} AS `query_vector` FROM {quoted_type_name} "
241+
f"WHERE {quoted_id_property} = ? LIMIT 1"
232242
),
233243
key,
234244
).first()
@@ -238,7 +248,7 @@ def _lookup_query_vector_by_key(self, key):
238248
f"No record found in type '{type_name}' where {id_property} = {key!r}"
239249
)
240250

241-
query_vector = result.get(vector_property)
251+
query_vector = result.get("query_vector")
242252
if query_vector is None:
243253
raise ArcadeDBError(
244254
f"Record found for {id_property} = {key!r} "

bindings/python/tests/test_vector_sql.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,32 @@ def test_vector_neighbors(self, test_db):
520520
# Maybe it expects type name?
521521
pass
522522

523+
def test_vector_neighbors_accepts_parameterized_index_and_vector(self, test_db):
524+
"""SQL vectorNeighbors should accept bound index and vector parameters."""
525+
test_db.command("sql", "CREATE VERTEX TYPE ParamItem")
526+
test_db.command("sql", "CREATE PROPERTY ParamItem.vec ARRAY_OF_FLOATS")
527+
528+
test_db.command(
529+
"sql",
530+
'CREATE INDEX ON `ParamItem` (vec) LSM_VECTOR METADATA {"dimensions": 2}',
531+
)
532+
533+
with test_db.transaction():
534+
test_db.command("sql", "INSERT INTO `ParamItem` SET vec = [1.0, 0.0]")
535+
test_db.command("sql", "INSERT INTO `ParamItem` SET vec = [0.0, 1.0]")
536+
537+
row = test_db.query(
538+
"sql",
539+
"SELECT vectorNeighbors(?, ?, ?) as res",
540+
"ParamItem[vec]",
541+
arcadedb.to_java_float_array([0.9, 0.1]),
542+
1,
543+
).first()
544+
545+
res = row.get("res") if row else None
546+
assert res is not None
547+
assert len(res) == 1
548+
523549
def test_vector_delete_and_search_others_sql(self, test_db):
524550
"""Test deleting vertices in a larger dataset using SQL."""
525551
import random

0 commit comments

Comments
 (0)