Skip to content

Commit 9cbf85d

Browse files
committed
Update README: Remove version numbers and external references
- Remove version numbers from title and headers for easier maintenance - Clean up API documentation for clarity - Update code examples and migration guide
1 parent ae2c65f commit 9cbf85d

1 file changed

Lines changed: 91 additions & 14 deletions

File tree

README.md

Lines changed: 91 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SochDB Python SDK v0.4.0
1+
# SochDB Python SDK
22

33
> **📢 Note:** This project has been renamed from **ToonDB** to **SochDB**. All references, packages, and APIs have been updated accordingly. If you're upgrading from ToonDB, please update your imports from `toondb` to `sochdb`.
44
@@ -61,7 +61,7 @@ pip install -e .
6161

6262
# SochDB Python SDK Documentation
6363

64-
**Version 0.4.0** | LLM-Optimized Embedded Database with Native Vector Search
64+
LLM-Optimized Embedded Database with Native Vector Search
6565

6666
---
6767

@@ -900,7 +900,21 @@ collection = ns.create_collection(
900900
collection = ns.collection("documents")
901901
```
902902

903-
### Inserting Documents
903+
### API Methods Overview
904+
905+
| Method | Purpose | Usage |
906+
|--------|---------|-------|
907+
| `add(ids, embeddings/vectors, metadatas)` | Bulk insert/update | Batch operations |
908+
| `upsert(ids, embeddings/vectors, metadatas)` | Insert or update | Batch upsert |
909+
| `query(query_embeddings, n_results, where)` | Search vectors | Standard query |
910+
| `insert(id, vector, metadata)` | Single insert | Single document |
911+
| `insert_batch(ids, vectors, metadatas)` | Bulk insert | Batch insert |
912+
| `search(SearchRequest)` | Advanced search | Full control |
913+
| `vector_search(vector, k, filter)` | Vector similarity | Convenience method |
914+
| `keyword_search(query, k, filter)` | BM25 search | Text search |
915+
| `hybrid_search(vector, text_query, k, alpha)` | Vector + BM25 | Combined search |
916+
917+
### Adding Documents
904918

905919
```python
906920
# Single insert
@@ -910,11 +924,29 @@ collection.insert(
910924
metadata={"title": "Introduction", "author": "Alice", "category": "tech"}
911925
)
912926

913-
# Batch insert (more efficient for bulk loading)
927+
# Batch add
928+
collection.add(
929+
ids=["doc1", "doc2", "doc3"],
930+
embeddings=[[...], [...], [...]], # or vectors=[[...], ...]
931+
metadatas=[
932+
{"title": "Doc 1"},
933+
{"title": "Doc 2"},
934+
{"title": "Doc 3"}
935+
]
936+
)
937+
938+
# Upsert (insert or update)
939+
collection.upsert(
940+
ids=["doc1", "doc2"],
941+
embeddings=[[...], [...]], # or vectors=[[...], ...]
942+
metadatas=[{"title": "Updated Doc 1"}, {"title": "Updated Doc 2"}]
943+
)
944+
945+
# Batch insert (alternative API)
914946
collection.insert_batch(
915947
ids=["doc1", "doc2", "doc3"],
916-
vectors=[[...], [...], [...]], # List of vectors
917-
metadata=[
948+
vectors=[[...], [...], [...]],
949+
metadatas=[
918950
{"title": "Doc 1"},
919951
{"title": "Doc 2"},
920952
{"title": "Doc 3"}
@@ -934,11 +966,18 @@ collection.insert_multi(
934966
```python
935967
from sochdb import SearchRequest
936968

969+
# Query API
970+
results = collection.query(
971+
query_embeddings=[[0.15, 0.25, ...]], # or query_vectors
972+
n_results=10,
973+
where={"author": "Alice"} # metadata filter
974+
)
975+
# Returns: {"ids": [[...]], "distances": [[...]], "metadatas": [[...]]}
976+
937977
# Using SearchRequest (full control)
938978
request = SearchRequest(
939979
vector=[0.15, 0.25, ...], # Query vector
940980
k=10, # Number of results
941-
ef_search=100, # Search quality (overrides collection default)
942981
filter={"author": "Alice"}, # Metadata filter
943982
min_score=0.7, # Minimum similarity score
944983
include_vectors=False, # Include vectors in results
@@ -953,7 +992,7 @@ results = collection.vector_search(
953992
filter={"author": "Alice"}
954993
)
955994

956-
# Process results
995+
# Process results (SearchResults object)
957996
for result in results:
958997
print(f"ID: {result.id}")
959998
print(f"Score: {result.score:.4f}") # Similarity score
@@ -1057,13 +1096,24 @@ config = CollectionConfig(
10571096
)
10581097
collection = ns.create_collection(config)
10591098

1060-
# Insert with text content
1099+
# Insert with text content (supports add() or insert())
1100+
collection.add(
1101+
ids=["article1"],
1102+
embeddings=[[...]],
1103+
metadatas=[{
1104+
"title": "Machine Learning Tutorial",
1105+
"text": "This tutorial covers the basics of machine learning...",
1106+
"category": "tech"
1107+
}]
1108+
)
1109+
1110+
# Or use insert for single document
10611111
collection.insert(
1062-
id="article1",
1112+
id="article2",
10631113
vector=[...],
10641114
metadata={
1065-
"title": "Machine Learning Tutorial",
1066-
"text": "This tutorial covers the basics of machine learning...",
1115+
"title": "Deep Learning Basics",
1116+
"text": "Introduction to neural networks...",
10671117
"category": "tech"
10681118
}
10691119
)
@@ -3085,7 +3135,21 @@ collection = ns.create_collection(CollectionConfig(
30853135
content_field="text"
30863136
))
30873137

3088-
# Index documents
3138+
# Index documents in batch
3139+
def index_documents_batch(documents: list, embed_fn):
3140+
"""Batch index documents."""
3141+
ids = [doc["id"] for doc in documents]
3142+
texts = [doc["text"] for doc in documents]
3143+
embeddings = [embed_fn(text) for text in texts]
3144+
metadatas = [{"text": text, "indexed_at": "2024-01-15"} for text in texts]
3145+
3146+
collection.add(
3147+
ids=ids,
3148+
embeddings=embeddings,
3149+
metadatas=metadatas
3150+
)
3151+
3152+
# Or single document insert
30893153
def index_document(doc_id: str, text: str, embed_fn):
30903154
embedding = embed_fn(text)
30913155
collection.insert(
@@ -3094,7 +3158,20 @@ def index_document(doc_id: str, text: str, embed_fn):
30943158
metadata={"text": text, "indexed_at": "2024-01-15"}
30953159
)
30963160

3097-
# Retrieve relevant context
3161+
# Retrieve relevant context using query API
3162+
def retrieve_context_query(query: str, embed_fn, k: int = 5) -> list:
3163+
"""Use query API for retrieval."""
3164+
query_embedding = embed_fn(query)
3165+
3166+
results = collection.query(
3167+
query_embeddings=[query_embedding],
3168+
n_results=k
3169+
)
3170+
3171+
# Returns: {"ids": [[...]], "distances": [[...]], "metadatas": [[...]]}
3172+
return [meta["text"] for meta in results["metadatas"][0]]
3173+
3174+
# Or use hybrid search
30983175
def retrieve_context(query: str, embed_fn, k: int = 5) -> list:
30993176
query_embedding = embed_fn(query)
31003177

0 commit comments

Comments
 (0)