Skip to content

Commit 0496db1

Browse files
author
Zhe Yu
committed
update chroma0 connector to adopt the new API definition.
1 parent 794b09d commit 0496db1

2 files changed

Lines changed: 24 additions & 17 deletions

File tree

src/vectorcode/database/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ async def query(
7878
@abstractmethod
7979
async def vectorise(
8080
self,
81+
file_path: str,
8182
chunker: TreeSitterChunker | None = None,
8283
embedding_function: EmbeddingFunction | None = None,
8384
) -> VectoriseStats:

src/vectorcode/database/chroma0.py

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -261,21 +261,21 @@ def __init__(self, configs: Config):
261261
params.update(self._configs.db_params)
262262
self._configs.db_params = params
263263

264-
async def query(self, collection_path, keywords_embeddings, opts):
265-
assert len(opts.keywords), "Keywords cannot be empty"
266-
assert len(keywords_embeddings) == len(opts.keywords), (
264+
async def query(self, keywords_embeddings):
265+
assert self._configs.query is not None
266+
assert len(self._configs.query), "Keywords cannot be empty"
267+
assert len(keywords_embeddings) == len(self._configs.query), (
267268
"Number of embeddings must match number of keywords."
268269
)
270+
collection_path = str(self._configs.project_root)
269271
collection: AsyncCollection = await self._create_or_get_collection(
270272
collection_path=collection_path, allow_create=False
271273
)
272-
query_count = opts.count or (
273-
await self.count(collection_path, ResultType.chunk)
274-
)
274+
query_count = self._configs.n_result or (await self.count(ResultType.chunk))
275275
query_filter = None
276-
if len(opts.excluded_files):
276+
if len(self._configs.query_exclude):
277277
query_filter = cast(
278-
chromadb.Where, {"path": {"$nin": list(opts.excluded_files)}}
278+
chromadb.Where, {"path": {"$nin": list(self._configs.query_exclude)}}
279279
)
280280
if QueryInclude.chunk in self._configs.include:
281281
if query_filter is None:
@@ -295,7 +295,7 @@ async def query(self, collection_path, keywords_embeddings, opts):
295295
n_results=query_count,
296296
where=query_filter,
297297
)
298-
return __convert_chroma_query_results(query_result, opts.keywords)
298+
return __convert_chroma_query_results(query_result, self._configs.query)
299299

300300
async def _create_or_get_collection(
301301
self, collection_path: str, allow_create: bool = False
@@ -345,11 +345,11 @@ async def _create_or_get_collection(
345345

346346
async def vectorise(
347347
self,
348-
collection_path: str,
349348
file_path: str,
350349
chunker: TreeSitterChunker | None = None,
351350
embedding_function: EmbeddingFunction | None = None,
352351
) -> VectoriseStats:
352+
collection_path = str(self._configs.project_root)
353353
collection = await self._create_or_get_collection(
354354
collection_path, allow_create=True
355355
)
@@ -406,7 +406,7 @@ async def list_collections(self):
406406
for col_name in await client.list_collections():
407407
col = await client.get_collection(col_name)
408408
project_root = str(col.metadata.get("path"))
409-
col_counts = await self.list(project_root)
409+
col_counts = await self.list()
410410
result.append(
411411
CollectionInfo(
412412
id=col_name,
@@ -422,11 +422,12 @@ async def list_collections(self):
422422
)
423423
return result
424424

425-
async def list(self, collection_path, what=None) -> CollectionContent:
425+
async def list(self, what=None) -> CollectionContent:
426426
"""
427427
When `what` is None, this method should populate both `CollectionContent.files` and `CollectionContent.chunks`.
428428
Otherwise, this method may populate only one of them to save waiting time.
429429
"""
430+
collection_path = str(self._configs.project_root)
430431
content = CollectionContent()
431432
collection = await self._create_or_get_collection((collection_path))
432433
raw_content = await collection.get(
@@ -469,15 +470,20 @@ async def list(self, collection_path, what=None) -> CollectionContent:
469470

470471
return content
471472

472-
async def delete(self, collection_path: str, file_path: str | Sequence[str]):
473+
async def delete(self):
474+
collection_path = str(self._configs.project_root)
473475
collection = await self._create_or_get_collection(collection_path, False)
474-
if isinstance(file_path, str):
475-
file_path = [file_path]
476+
rm_paths = self._configs.rm_paths
477+
if isinstance(rm_paths, str):
478+
rm_paths = [rm_paths]
476479
await collection.delete(
477-
where={"path": {"$in": [str(expand_path(i, True)) for i in file_path]}}
480+
where={"path": {"$in": [str(expand_path(i, True)) for i in rm_paths]}}
478481
)
479482

480-
async def drop(self, collection_path: str):
483+
async def drop(
484+
self,
485+
):
486+
collection_path = str(self._configs.project_root)
481487
async with _Chroma0ClientManager().get_client(self._configs) as client:
482488
await self._create_or_get_collection(collection_path, False)
483489
await client.delete_collection(get_collection_id(collection_path))

0 commit comments

Comments
 (0)