1- # SochDB Python SDK v0.4.0
1+ # SochDB Python SDK
22
33> ** 📢 Note:** This project has been renamed from ** ToonDB** to ** SochDB** . All references, packages, and APIs have been updated accordingly. If you're upgrading from ToonDB, please update your imports from ` toondb ` to ` sochdb ` .
44
@@ -61,7 +61,7 @@ pip install -e .
6161
6262# SochDB Python SDK Documentation
6363
64- ** Version 0.4.0 ** | LLM-Optimized Embedded Database with Native Vector Search
64+ LLM-Optimized Embedded Database with Native Vector Search
6565
6666---
6767
@@ -900,7 +900,21 @@ collection = ns.create_collection(
900900collection = ns.collection(" documents" )
901901```
902902
903- ### Inserting Documents
903+ ### API Methods Overview
904+
905+ | Method | Purpose | Usage |
906+ | --------| ---------| -------|
907+ | ` add(ids, embeddings/vectors, metadatas) ` | Bulk insert/update | Batch operations |
908+ | ` upsert(ids, embeddings/vectors, metadatas) ` | Insert or update | Batch upsert |
909+ | ` query(query_embeddings, n_results, where) ` | Search vectors | Standard query |
910+ | ` insert(id, vector, metadata) ` | Single insert | Single document |
911+ | ` insert_batch(ids, vectors, metadatas) ` | Bulk insert | Batch insert |
912+ | ` search(SearchRequest) ` | Advanced search | Full control |
913+ | ` vector_search(vector, k, filter) ` | Vector similarity | Convenience method |
914+ | ` keyword_search(query, k, filter) ` | BM25 search | Text search |
915+ | ` hybrid_search(vector, text_query, k, alpha) ` | Vector + BM25 | Combined search |
916+
917+ ### Adding Documents
904918
905919``` python
906920# Single insert
@@ -910,11 +924,29 @@ collection.insert(
910924 metadata = {" title" : " Introduction" , " author" : " Alice" , " category" : " tech" }
911925)
912926
913- # Batch insert (more efficient for bulk loading)
927+ # Batch add
928+ collection.add(
929+ ids = [" doc1" , " doc2" , " doc3" ],
930+ embeddings = [[... ], [... ], [... ]], # or vectors=[[...], ...]
931+ metadatas = [
932+ {" title" : " Doc 1" },
933+ {" title" : " Doc 2" },
934+ {" title" : " Doc 3" }
935+ ]
936+ )
937+
938+ # Upsert (insert or update)
939+ collection.upsert(
940+ ids = [" doc1" , " doc2" ],
941+ embeddings = [[... ], [... ]], # or vectors=[[...], ...]
942+ metadatas = [{" title" : " Updated Doc 1" }, {" title" : " Updated Doc 2" }]
943+ )
944+
945+ # Batch insert (alternative API)
914946collection.insert_batch(
915947 ids = [" doc1" , " doc2" , " doc3" ],
916- vectors = [[... ], [... ], [... ]], # List of vectors
917- metadata = [
948+ vectors = [[... ], [... ], [... ]],
949+ metadatas = [
918950 {" title" : " Doc 1" },
919951 {" title" : " Doc 2" },
920952 {" title" : " Doc 3" }
@@ -934,11 +966,18 @@ collection.insert_multi(
934966``` python
935967from sochdb import SearchRequest
936968
969+ # Query API
970+ results = collection.query(
971+ query_embeddings = [[0.15 , 0.25 , ... ]], # or query_vectors
972+ n_results = 10 ,
973+ where = {" author" : " Alice" } # metadata filter
974+ )
975+ # Returns: {"ids": [[...]], "distances": [[...]], "metadatas": [[...]]}
976+
937977# Using SearchRequest (full control)
938978request = SearchRequest(
939979 vector = [0.15 , 0.25 , ... ], # Query vector
940980 k = 10 , # Number of results
941- ef_search = 100 , # Search quality (overrides collection default)
942981 filter = {" author" : " Alice" }, # Metadata filter
943982 min_score = 0.7 , # Minimum similarity score
944983 include_vectors = False , # Include vectors in results
@@ -953,7 +992,7 @@ results = collection.vector_search(
953992 filter = {" author" : " Alice" }
954993)
955994
956- # Process results
995+ # Process results (SearchResults object)
957996for result in results:
958997 print (f " ID: { result.id} " )
959998 print (f " Score: { result.score:.4f } " ) # Similarity score
@@ -1057,13 +1096,24 @@ config = CollectionConfig(
10571096)
10581097collection = ns.create_collection(config)
10591098
1060- # Insert with text content
1099+ # Insert with text content (supports add() or insert())
1100+ collection.add(
1101+ ids = [" article1" ],
1102+ embeddings = [[... ]],
1103+ metadatas = [{
1104+ " title" : " Machine Learning Tutorial" ,
1105+ " text" : " This tutorial covers the basics of machine learning..." ,
1106+ " category" : " tech"
1107+ }]
1108+ )
1109+
1110+ # Or use insert for single document
10611111collection.insert(
1062- id = " article1 " ,
1112+ id = " article2 " ,
10631113 vector = [... ],
10641114 metadata = {
1065- " title" : " Machine Learning Tutorial " ,
1066- " text" : " This tutorial covers the basics of machine learning ..." ,
1115+ " title" : " Deep Learning Basics " ,
1116+ " text" : " Introduction to neural networks ..." ,
10671117 " category" : " tech"
10681118 }
10691119)
@@ -3085,7 +3135,21 @@ collection = ns.create_collection(CollectionConfig(
30853135 content_field = " text"
30863136))
30873137
3088- # Index documents
3138+ # Index documents in batch
3139+ def index_documents_batch (documents : list , embed_fn ):
3140+ """ Batch index documents."""
3141+ ids = [doc[" id" ] for doc in documents]
3142+ texts = [doc[" text" ] for doc in documents]
3143+ embeddings = [embed_fn(text) for text in texts]
3144+ metadatas = [{" text" : text, " indexed_at" : " 2024-01-15" } for text in texts]
3145+
3146+ collection.add(
3147+ ids = ids,
3148+ embeddings = embeddings,
3149+ metadatas = metadatas
3150+ )
3151+
3152+ # Or single document insert
30893153def index_document (doc_id : str , text : str , embed_fn ):
30903154 embedding = embed_fn(text)
30913155 collection.insert(
@@ -3094,7 +3158,20 @@ def index_document(doc_id: str, text: str, embed_fn):
30943158 metadata = {" text" : text, " indexed_at" : " 2024-01-15" }
30953159 )
30963160
3097- # Retrieve relevant context
3161+ # Retrieve relevant context using query API
3162+ def retrieve_context_query (query : str , embed_fn , k : int = 5 ) -> list :
3163+ """ Use query API for retrieval."""
3164+ query_embedding = embed_fn(query)
3165+
3166+ results = collection.query(
3167+ query_embeddings = [query_embedding],
3168+ n_results = k
3169+ )
3170+
3171+ # Returns: {"ids": [[...]], "distances": [[...]], "metadatas": [[...]]}
3172+ return [meta[" text" ] for meta in results[" metadatas" ][0 ]]
3173+
3174+ # Or use hybrid search
30983175def retrieve_context (query : str , embed_fn , k : int = 5 ) -> list :
30993176 query_embedding = embed_fn(query)
31003177
0 commit comments