Skip to content

Commit f1f264c

Browse files
committed
refactor: standardize logging and exception handling
- Add Logger class with configurable LOG_LEVEL setting - Replace all module loggers with Logger class - Use specific exceptions from exceptions.py instead of generic ValueError/Exception - Replace os.getenv with direct api_settings access - Update all adapters (astradb, chroma, milvus, pgvector) with consistent patterns
1 parent f821d51 commit f1f264c

22 files changed

Lines changed: 2047 additions & 1399 deletions

README.md

Lines changed: 24 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ from crossvector.dbs.astradb import AstraDBAdapter
104104

105105
# Initialize engine
106106
engine = VectorEngine(
107-
embedding_adapter=OpenAIEmbeddingAdapter(model_name="text-embedding-3-small"),
108-
db_adapter=AstraDBAdapter(),
107+
db=AstraDBAdapter(),
108+
embedding=OpenAIEmbeddingAdapter(model_name="text-embedding-3-small"),
109109
collection_name="my_documents",
110110
store_text=True # Optional: Set to False to not store original text
111111
)
@@ -121,9 +121,9 @@ print(f"Inserted {len(docs)} documents")
121121
# Alternative: Upsert with VectorDocument (if you have embeddings already)
122122
vector_docs = [
123123
VectorDocument(
124-
id="doc3",
125-
text="Python programming",
126-
vector=[0.1]*1536,
124+
id="doc3",
125+
text="Python programming",
126+
vector=[0.1]*1536,
127127
metadata={"category": "tech"}
128128
)
129129
]
@@ -269,14 +269,14 @@ from typing import Any, Dict, List, Set, Optional, Union, Sequence, Tuple
269269

270270
class MyCustomDBAdapter(VectorDBAdapter):
271271
"""Custom vector database adapter implementation."""
272-
272+
273273
# Optional: Set to True if your database uses '$vector' instead of 'vector'
274274
use_dollar_vector: bool = False
275-
275+
276276
def initialize(
277-
self,
278-
collection_name: str,
279-
embedding_dimension: int,
277+
self,
278+
collection_name: str,
279+
embedding_dimension: int,
280280
metric: str = "cosine",
281281
**kwargs: Any
282282
) -> None:
@@ -285,9 +285,9 @@ class MyCustomDBAdapter(VectorDBAdapter):
285285
pass
286286

287287
def add_collection(
288-
self,
289-
collection_name: str,
290-
embedding_dimension: int,
288+
self,
289+
collection_name: str,
290+
embedding_dimension: int,
291291
metric: str = "cosine"
292292
) -> Any:
293293
"""Create a new collection."""
@@ -300,9 +300,9 @@ class MyCustomDBAdapter(VectorDBAdapter):
300300
pass
301301

302302
def get_or_create_collection(
303-
self,
304-
collection_name: str,
305-
embedding_dimension: int,
303+
self,
304+
collection_name: str,
305+
embedding_dimension: int,
306306
metric: str = "cosine"
307307
) -> Any:
308308
"""Get existing collection or create if doesn't exist."""
@@ -343,15 +343,8 @@ class MyCustomDBAdapter(VectorDBAdapter):
343343
# Should return VectorDocument instance
344344
pass
345345

346-
def get_or_create(
347-
self,
348-
defaults: Optional[Dict[str, Any]] = None,
349-
**kwargs
350-
) -> Tuple[VectorDocument, bool]:
351-
"""Get document by pk or create if not found."""
352-
# Your implementation
353-
# Should return (VectorDocument, created: bool)
354-
pass
346+
# NOTE: get_or_create has been centralized in VectorEngine.
347+
# Adapters no longer implement this to avoid duplicated logic.
355348

356349
def create(self, **kwargs: Any) -> VectorDocument:
357350
"""Create and persist a single document."""
@@ -384,16 +377,8 @@ class MyCustomDBAdapter(VectorDBAdapter):
384377
# Should return updated VectorDocument instance
385378
pass
386379

387-
def update_or_create(
388-
self,
389-
defaults: Optional[Dict[str, Any]] = None,
390-
create_defaults: Optional[Dict[str, Any]] = None,
391-
**kwargs
392-
) -> Tuple[VectorDocument, bool]:
393-
"""Update document if exists, otherwise create."""
394-
# Your implementation
395-
# Should return (VectorDocument, created: bool)
396-
pass
380+
# NOTE: update_or_create has been centralized in VectorEngine.
381+
# Adapters no longer implement this to avoid duplicated logic.
397382

398383
def bulk_update(
399384
self,
@@ -408,8 +393,8 @@ class MyCustomDBAdapter(VectorDBAdapter):
408393
pass
409394

410395
def upsert(
411-
self,
412-
documents: List[VectorDocument],
396+
self,
397+
documents: List[VectorDocument],
413398
batch_size: int = None
414399
) -> List[VectorDocument]:
415400
"""Insert new documents or update existing ones."""
@@ -668,8 +653,8 @@ from crossvector.embeddings.openai import OpenAIEmbeddingAdapter
668653
from crossvector.dbs.pgvector import PGVectorAdapter
669654

670655
engine = VectorEngine(
671-
embedding_adapter=OpenAIEmbeddingAdapter(),
672-
db_adapter=PGVectorAdapter(),
656+
db=PGVectorAdapter(),
657+
embedding=OpenAIEmbeddingAdapter(),
673658
collection_name="docs",
674659
store_text=True
675660
)

docs/adapters/databases.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,13 @@ from typing import Any, Dict, List, Set, Optional, Union, Sequence, Tuple
6868

6969
class MyCustomDBAdapter(VectorDBAdapter):
7070
"""Custom vector database adapter implementation."""
71-
71+
7272
use_dollar_vector: bool = False # Set to True if your DB uses '$vector'
73-
73+
7474
def initialize(
75-
self,
76-
collection_name: str,
77-
embedding_dimension: int,
75+
self,
76+
collection_name: str,
77+
embedding_dimension: int,
7878
metric: str = "cosine",
7979
**kwargs: Any
8080
) -> None:
@@ -99,8 +99,8 @@ class MyCustomDBAdapter(VectorDBAdapter):
9999
pass
100100

101101
def upsert(
102-
self,
103-
documents: List[VectorDocument],
102+
self,
103+
documents: List[VectorDocument],
104104
batch_size: int = None
105105
) -> List[VectorDocument]:
106106
"""Insert new documents or update existing ones."""

docs/configuration.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ If you're only using embeddings for search and don't need to retrieve the origin
8888
from crossvector import VectorEngine
8989

9090
engine = VectorEngine(
91-
embedding_adapter=...,
92-
db_adapter=...,
91+
embedding=...,
92+
db=...,
9393
collection_name="my_docs",
9494
store_text=False # Don't store text, only embeddings and metadata
9595
)

docs/quickstart.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ from crossvector.dbs.astradb import AstraDBAdapter
77

88
# Initialize engine
99
engine = VectorEngine(
10-
embedding_adapter=OpenAIEmbeddingAdapter(model_name="text-embedding-3-small"),
11-
db_adapter=AstraDBAdapter(),
10+
embedding=OpenAIEmbeddingAdapter(model_name="text-embedding-3-small"),
11+
db=AstraDBAdapter(),
1212
collection_name="my_documents",
1313
store_text=True # Optional: Set to False to save space
1414
)

docs/schema.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ from crossvector import VectorDocument
1313

1414
# Without ID - auto-generated based on PRIMARY_KEY_MODE
1515
doc = VectorDocument(text="Hello world")
16-
print(doc.id)
16+
print(doc.id)
1717
# Possible values depending on PRIMARY_KEY_MODE:
1818
# - 'uuid' (default): Random UUID like "a1b2c3d4e5f6..."
1919
# - 'hash_text': SHA256 hash of text like "a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e"

scripts/tests/test_integration.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from dotenv import load_dotenv
99

10-
from crossvector import VectorDocument, VectorEngine
10+
from crossvector import VectorEngine
1111
from crossvector.dbs.astradb import AstraDBAdapter
1212
from crossvector.dbs.chroma import ChromaDBAdapter
1313
from crossvector.dbs.milvus import MilvusDBAdapter
@@ -32,13 +32,13 @@
3232
test_pks = ["doc1", "doc2", "doc3"]
3333

3434

35-
def test_engine(db_name: str, db_adapter, embedding_adapter, collection_name: str):
35+
def test_engine(db_name: str, db, embedding, collection_name: str):
3636
"""Test VectorEngine with a specific database adapter."""
3737
print(f"\n{'=' * 80}")
38-
print(f"Testing {db_name} with {embedding_adapter.model_name}")
38+
print(f"Testing {db_name} with {embedding.model_name}")
3939
print(f"{'=' * 80}")
4040

41-
engine = VectorEngine(embedding_adapter=embedding_adapter, db_adapter=db_adapter, collection_name=collection_name)
41+
engine = VectorEngine(embedding=embedding, db=db, collection_name=collection_name)
4242

4343
# Clean up existing data (if collection exists, drop it)
4444
try:
@@ -49,11 +49,12 @@ def test_engine(db_name: str, db_adapter, embedding_adapter, collection_name: st
4949
print(f"Note: Could not drop collection (may not exist): {e}")
5050

5151
# Re-initialize after dropping
52-
engine = VectorEngine(embedding_adapter=embedding_adapter, db_adapter=db_adapter, collection_name=collection_name)
52+
engine = VectorEngine(embedding=embedding, db=db, collection_name=collection_name)
5353

54-
# Test 1: Upsert VectorDocuments from texts (with auto-embedding)
55-
print("\n1. Testing upsert_from_texts...")
56-
result = engine.upsert_from_texts(texts=test_texts, metadatas=test_metadatas, pks=test_pks)
54+
# Test 1: Upsert VectorDocuments (with auto-embedding)
55+
print("\n1. Testing upsert...")
56+
docs = [{"id": test_pks[i], "text": test_texts[i], "metadata": test_metadatas[i]} for i in range(len(test_texts))]
57+
result = engine.upsert(docs)
5758
print(f"Inserted {len(result)} VectorDocuments")
5859

5960
# Test 2: Count VectorDocuments

src/crossvector/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from .abc import EmbeddingAdapter, VectorDBAdapter
77
from .engine import VectorEngine
88
from .schema import VectorDocument
9+
from .types import Doc, DocId, DocIds
910

1011
__version__ = "0.1.1"
1112

@@ -14,4 +15,7 @@
1415
"EmbeddingAdapter",
1516
"VectorDBAdapter",
1617
"VectorDocument",
18+
"Doc",
19+
"DocId",
20+
"DocIds",
1721
]

0 commit comments

Comments
 (0)