Skip to content

Commit 70b749f

Browse files
author
Zhe Yu
committed
logging for common.py
1 parent 5ee764e commit 70b749f

2 files changed

Lines changed: 30 additions & 20 deletions

File tree

src/vectorcode/common.py

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import asyncio
22
import hashlib
3+
import logging
34
import os
45
import socket
56
import subprocess
67
import sys
8+
import traceback
79
from typing import AsyncGenerator
810

911
import chromadb
@@ -15,6 +17,8 @@
1517

1618
from vectorcode.cli_utils import Config, expand_path
1719

20+
logger = logging.getLogger(name=__name__)
21+
1822

1923
async def get_collections(
2024
client: AsyncClientAPI,
@@ -42,6 +46,7 @@ async def try_server(host: str, port: int):
4246
try:
4347
async with httpx.AsyncClient() as client:
4448
response = await client.get(url=url)
49+
logger.debug(f"Chromadb server at {host}:{port} returned {response=}")
4550
return response.status_code == 200
4651
except (httpx.ConnectError, httpx.ConnectTimeout):
4752
return False
@@ -82,6 +87,9 @@ async def start_server(configs: Config):
8287
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
8388
s.bind(("", 0)) # OS selects a free ephemeral port
8489
configs.port = int(s.getsockname()[1])
90+
logger.warning(
91+
f"Starting bundled ChromaDB server at {configs.host}:{configs.port}."
92+
)
8593
env.update({"ANONYMIZED_TELEMETRY": "False"})
8694
process = await asyncio.create_subprocess_exec(
8795
sys.executable,
@@ -130,10 +138,12 @@ async def get_client(configs: Config) -> AsyncClientAPI:
130138
def get_collection_name(full_path: str) -> str:
131139
full_path = str(expand_path(full_path, absolute=True))
132140
hasher = hashlib.sha256()
133-
hasher.update(
134-
f"{os.environ.get('USER', os.environ.get('USERNAME', 'DEFAULT_USER'))}@{socket.gethostname()}:{full_path}".encode()
135-
)
141+
plain_collection_name = f"{os.environ.get('USER', os.environ.get('USERNAME', 'DEFAULT_USER'))}@{socket.gethostname()}:{full_path}"
142+
hasher.update(plain_collection_name.encode())
136143
collection_id = hasher.hexdigest()[:63]
144+
logger.debug(
145+
f"Hashing {plain_collection_name} as the collection name for {full_path}."
146+
)
137147
return collection_id
138148

139149

@@ -143,20 +153,18 @@ def get_embedding_function(configs: Config) -> chromadb.EmbeddingFunction | None
143153
**configs.embedding_params
144154
)
145155
except AttributeError:
146-
print(
156+
logger.warning(
147157
f"Failed to use {configs.embedding_function}. Falling back to Sentence Transformer.",
148-
file=sys.stderr,
149158
)
150159
return embedding_functions.SentenceTransformerEmbeddingFunction()
151160
except Exception as e:
152-
print(
153-
f"Failed to use {configs.embedding_function} with the following error:",
154-
file=sys.stderr,
155-
)
156161
e.add_note(
157162
"\nFor errors caused by missing dependency, consult the documentation of pipx (or whatever package manager that you installed VectorCode with) for instructions to inject libraries into the virtual environment."
158163
)
159-
164+
logger.error(
165+
f"Failed to use {configs.embedding_function} with the following error:",
166+
)
167+
logger.error(traceback.format_exc())
160168
raise
161169

162170

@@ -175,6 +183,7 @@ async def get_collection(
175183
if __COLLECTION_CACHE.get(full_path) is None:
176184
collection_name = get_collection_name(full_path)
177185
embedding_function = get_embedding_function(configs)
186+
178187
collection_meta: dict[str, str | int] = {
179188
"path": full_path,
180189
"hostname": socket.gethostname(),
@@ -190,7 +199,9 @@ async def get_collection(
190199
if not key.startswith("hnsw:"):
191200
target_key = f"hnsw:{key}"
192201
collection_meta[target_key] = configs.hnsw[key]
193-
202+
logger.debug(
203+
f"Getting/Creating collection with the following metadata: {collection_meta}"
204+
)
194205
if not make_if_missing:
195206
__COLLECTION_CACHE[full_path] = await client.get_collection(
196207
collection_name, embedding_function
@@ -211,6 +222,9 @@ async def get_collection(
211222
)
212223
or not collection.metadata.get("created-by") == "VectorCode"
213224
):
225+
logger.error(
226+
f"Failed to use existing collection due to metadata mismatch: {collection_meta}"
227+
)
214228
raise IndexError(
215229
"Failed to create the collection due to hash collision. Please file a bug report."
216230
)
@@ -222,15 +236,13 @@ def verify_ef(collection: AsyncCollection, configs: Config):
222236
collection_ef = collection.metadata.get("embedding_function")
223237
collection_ep = collection.metadata.get("embedding_params")
224238
if collection_ef and collection_ef != configs.embedding_function:
225-
print(f"The collection was embedded using {collection_ef}.")
226-
print(
239+
logger.error(f"The collection was embedded using {collection_ef}.")
240+
logger.error(
227241
"Embeddings and query must use the same embedding function and parameters. Please double-check your config."
228242
)
229243
return False
230244
elif collection_ep and collection_ep != configs.embedding_params:
231-
print(
232-
f"The collection was embedded with a different set of configurations: {collection_ep}.",
233-
file=sys.stderr,
245+
logger.warning(
246+
f"The collection was embedded with a different set of configurations: {collection_ep}. The result may be inaccurate.",
234247
)
235-
print("The result may be inaccurate.", file=sys.stderr)
236248
return True

src/vectorcode/main.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,6 @@ async def async_main():
6767

6868
server_process = None
6969
if not await try_server(final_configs.host, final_configs.port):
70-
logger.warning(
71-
f"Host at {final_configs.host}:{final_configs.port} is unavailable. VectorCode will start its own Chromadb at a random port.",
72-
)
7370
server_process = await start_server(final_configs)
7471

7572
if final_configs.pipe:
@@ -107,6 +104,7 @@ async def async_main():
107104
except Exception as e:
108105
return_val = 1
109106
traceback.print_exception(e, file=sys.stderr)
107+
logger.error(traceback.format_exc())
110108
finally:
111109
if server_process is not None:
112110
logger.info("Shutting down the bundled Chromadb instance.")

0 commit comments

Comments
 (0)