11import asyncio
22import hashlib
33import json
4+ import logging
45import os
56import sys
67import uuid
2324)
2425from vectorcode .common import get_client , get_collection , verify_ef
2526
27+ logger = logging .getLogger (name = __name__ )
28+
2629
2730def hash_str (string : str ) -> str :
2831 """Return the sha-256 hash of a string."""
@@ -55,6 +58,9 @@ async def chunked_add(
5558 )
5659
5760 if num_existing_chunks :
61+ logger .debug (
62+ "Deleting %s existing chunks for the current file." , num_existing_chunks
63+ )
5864 async with collection_lock :
5965 await collection .delete (where = {"path" : full_path_str })
6066
@@ -65,8 +71,10 @@ async def chunked_add(
6571 )
6672 if len (chunks ) == 0 or (len (chunks ) == 1 and chunks [0 ] == "" ):
6773 # empty file
74+ logger .debug (f"Skipping { full_path_str } because it's empty." )
6875 return
6976 chunks .append (str (os .path .relpath (full_path_str , configs .project_root )))
77+ logger .debug (f"Chunked into { len (chunks )} pieces." )
7078 metas = []
7179 for chunk in chunks :
7280 meta : dict [str , str | dict [str , int ]] = {"path" : full_path_str }
@@ -84,7 +92,7 @@ async def chunked_add(
8492 metadatas = metas ,
8593 )
8694 except UnicodeDecodeError : # pragma: nocover
87- # probably binary. skip it.
95+ logger . warning ( f"Failed to decode { full_path_str } ." )
8896 return
8997
9098 if num_existing_chunks :
@@ -128,16 +136,19 @@ def load_files_from_include(project_root: str) -> list[str]:
128136 include_file_path = os .path .join (project_root , ".vectorcode" , "vectorcode.include" )
129137 specs : Optional [pathspec .GitIgnoreSpec ] = None
130138 if os .path .isfile (include_file_path ):
139+ logger .debug ("Loading from local `vectorcode.include`." )
131140 with open (include_file_path ) as fin :
132141 specs = pathspec .GitIgnoreSpec .from_lines (
133142 lines = (os .path .expanduser (i ) for i in fin .readlines ()),
134143 )
135144 elif os .path .isfile (GLOBAL_INCLUDE_SPEC ):
145+ logger .debug ("Loading from global `vectorcode.include`." )
136146 with open (GLOBAL_INCLUDE_SPEC ) as fin :
137147 specs = pathspec .GitIgnoreSpec .from_lines (
138148 lines = (os .path .expanduser (i ) for i in fin .readlines ()),
139149 )
140150 if specs is not None :
151+ logger .info ("Populating included files from loaded specs." )
141152 return [
142153 result .file
143154 for result in specs .check_tree_files (project_root )
@@ -177,9 +188,12 @@ async def vectorise(configs: Config) -> int:
177188 specs .append (GLOBAL_EXCLUDE_SPEC )
178189 for spec_path in specs :
179190 if os .path .isfile (spec_path ):
191+ logger .info (f"Loading ignore specs from { spec_path } ." )
180192 with open (spec_path ) as fin :
181193 spec = pathspec .GitIgnoreSpec .from_lines (fin .readlines ())
182194 files = exclude_paths_by_spec ((str (i ) for i in files ), spec )
195+ else :
196+ logger .info ("Ignoring exclude specs." )
183197
184198 stats = {"add" : 0 , "update" : 0 , "removed" : 0 }
185199 collection_lock = Lock ()
@@ -224,6 +238,7 @@ async def vectorise(configs: Config) -> int:
224238 async with stats_lock :
225239 stats ["removed" ] = len (orphans )
226240 if len (orphans ):
241+ logger .info (f"Removing { len (orphans )} orphaned files from database." )
227242 await collection .delete (where = {"path" : {"$in" : list (orphans )}})
228243
229244 show_stats (configs = configs , stats = stats )
0 commit comments