Skip to content

Commit 5f6db1f

Browse files
author
Zhe Yu
committed
logger for vectorise.
1 parent 213fe89 commit 5f6db1f

1 file changed

Lines changed: 16 additions & 1 deletion

File tree

src/vectorcode/subcommands/vectorise.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import asyncio
22
import hashlib
33
import json
4+
import logging
45
import os
56
import sys
67
import uuid
@@ -23,6 +24,8 @@
2324
)
2425
from vectorcode.common import get_client, get_collection, verify_ef
2526

27+
logger = logging.getLogger(name=__name__)
28+
2629

2730
def hash_str(string: str) -> str:
2831
"""Return the sha-256 hash of a string."""
@@ -55,6 +58,9 @@ async def chunked_add(
5558
)
5659

5760
if num_existing_chunks:
61+
logger.debug(
62+
"Deleting %s existing chunks for the current file.", num_existing_chunks
63+
)
5864
async with collection_lock:
5965
await collection.delete(where={"path": full_path_str})
6066

@@ -65,8 +71,10 @@ async def chunked_add(
6571
)
6672
if len(chunks) == 0 or (len(chunks) == 1 and chunks[0] == ""):
6773
# empty file
74+
logger.debug(f"Skipping {full_path_str} because it's empty.")
6875
return
6976
chunks.append(str(os.path.relpath(full_path_str, configs.project_root)))
77+
logger.debug(f"Chunked into {len(chunks)} pieces.")
7078
metas = []
7179
for chunk in chunks:
7280
meta: dict[str, str | dict[str, int]] = {"path": full_path_str}
@@ -84,7 +92,7 @@ async def chunked_add(
8492
metadatas=metas,
8593
)
8694
except UnicodeDecodeError: # pragma: nocover
87-
# probably binary. skip it.
95+
logger.warning(f"Failed to decode {full_path_str}.")
8896
return
8997

9098
if num_existing_chunks:
@@ -128,16 +136,19 @@ def load_files_from_include(project_root: str) -> list[str]:
128136
include_file_path = os.path.join(project_root, ".vectorcode", "vectorcode.include")
129137
specs: Optional[pathspec.GitIgnoreSpec] = None
130138
if os.path.isfile(include_file_path):
139+
logger.debug("Loading from local `vectorcode.include`.")
131140
with open(include_file_path) as fin:
132141
specs = pathspec.GitIgnoreSpec.from_lines(
133142
lines=(os.path.expanduser(i) for i in fin.readlines()),
134143
)
135144
elif os.path.isfile(GLOBAL_INCLUDE_SPEC):
145+
logger.debug("Loading from global `vectorcode.include`.")
136146
with open(GLOBAL_INCLUDE_SPEC) as fin:
137147
specs = pathspec.GitIgnoreSpec.from_lines(
138148
lines=(os.path.expanduser(i) for i in fin.readlines()),
139149
)
140150
if specs is not None:
151+
logger.info("Populating included files from loaded specs.")
141152
return [
142153
result.file
143154
for result in specs.check_tree_files(project_root)
@@ -177,9 +188,12 @@ async def vectorise(configs: Config) -> int:
177188
specs.append(GLOBAL_EXCLUDE_SPEC)
178189
for spec_path in specs:
179190
if os.path.isfile(spec_path):
191+
logger.info(f"Loading ignore specs from {spec_path}.")
180192
with open(spec_path) as fin:
181193
spec = pathspec.GitIgnoreSpec.from_lines(fin.readlines())
182194
files = exclude_paths_by_spec((str(i) for i in files), spec)
195+
else:
196+
logger.info("Ignoring exclude specs.")
183197

184198
stats = {"add": 0, "update": 0, "removed": 0}
185199
collection_lock = Lock()
@@ -224,6 +238,7 @@ async def vectorise(configs: Config) -> int:
224238
async with stats_lock:
225239
stats["removed"] = len(orphans)
226240
if len(orphans):
241+
logger.info(f"Removing {len(orphans)} orphaned files from database.")
227242
await collection.delete(where={"path": {"$in": list(orphans)}})
228243

229244
show_stats(configs=configs, stats=stats)

0 commit comments

Comments
 (0)