@@ -419,9 +419,44 @@ def _setup_bindings(cls):
419419 except (AttributeError , OSError ):
420420 pass
421421
422- # Collection Search API (Native Rust vector search )
422+ # Collection API (Native Rust vector operations )
423423 # Optional: Only available in newer native library versions
424424 try :
425+ # sochdb_collection_create(ptr, namespace, collection, dimension, dist_type) -> c_int
426+ lib .sochdb_collection_create .argtypes = [
427+ ctypes .c_void_p , # ptr
428+ ctypes .c_char_p , # namespace
429+ ctypes .c_char_p , # collection
430+ ctypes .c_size_t , # dimension
431+ ctypes .c_uint8 , # dist_type: 0=Cosine, 1=Euclidean, 2=Dot
432+ ]
433+ lib .sochdb_collection_create .restype = ctypes .c_int
434+
435+ # sochdb_collection_insert(ptr, namespace, collection, id, vector_ptr, vector_len, metadata_json) -> c_int
436+ lib .sochdb_collection_insert .argtypes = [
437+ ctypes .c_void_p , # ptr
438+ ctypes .c_char_p , # namespace
439+ ctypes .c_char_p , # collection
440+ ctypes .c_char_p , # id
441+ ctypes .POINTER (ctypes .c_float ), # vector_ptr
442+ ctypes .c_size_t , # vector_len
443+ ctypes .c_char_p , # metadata_json (nullable)
444+ ]
445+ lib .sochdb_collection_insert .restype = ctypes .c_int
446+
447+ # sochdb_collection_insert_batch(ptr, ns, col, ids[], vectors_flat, dim, metas[], count) -> c_int
448+ lib .sochdb_collection_insert_batch .argtypes = [
449+ ctypes .c_void_p , # ptr
450+ ctypes .c_char_p , # namespace
451+ ctypes .c_char_p , # collection
452+ ctypes .POINTER (ctypes .c_char_p ), # ids array
453+ ctypes .POINTER (ctypes .c_float ), # flat vectors array
454+ ctypes .c_size_t , # dimension
455+ ctypes .POINTER (ctypes .c_char_p ), # metadata_jsons array (nullable entries)
456+ ctypes .c_size_t , # count
457+ ]
458+ lib .sochdb_collection_insert_batch .restype = ctypes .c_int
459+
425460 lib .sochdb_collection_search .argtypes = [
426461 ctypes .c_void_p , # ptr
427462 ctypes .c_char_p , # namespace
@@ -2386,6 +2421,171 @@ def ffi_collection_search(
23862421 # FFI not available, return empty (caller should fallback)
23872422 return None
23882423
2424+ def ffi_collection_create (
2425+ self ,
2426+ namespace : str ,
2427+ collection : str ,
2428+ dimension : int ,
2429+ metric : str = "cosine" ,
2430+ ) -> bool :
2431+ """
2432+ Create a collection via native Rust FFI.
2433+
2434+ Args:
2435+ namespace: Namespace name
2436+ collection: Collection name
2437+ dimension: Vector dimension
2438+ metric: Distance metric ("cosine", "euclidean", "dot_product")
2439+
2440+ Returns:
2441+ True on success
2442+ """
2443+ self ._check_open ()
2444+ dist_map = {"cosine" : 0 , "euclidean" : 1 , "dot_product" : 2 , "dot" : 2 }
2445+ dist_type = dist_map .get (metric , 0 )
2446+ try :
2447+ result = self ._lib .sochdb_collection_create (
2448+ self ._handle ,
2449+ namespace .encode ("utf-8" ),
2450+ collection .encode ("utf-8" ),
2451+ dimension ,
2452+ dist_type ,
2453+ )
2454+ return result == 0
2455+ except (AttributeError , OSError ):
2456+ return False
2457+
2458+ def ffi_collection_insert (
2459+ self ,
2460+ namespace : str ,
2461+ collection : str ,
2462+ doc_id : str ,
2463+ vector : "List[float]" ,
2464+ metadata : "Optional[Dict]" = None ,
2465+ ) -> bool :
2466+ """
2467+ Insert a vector into a collection via native Rust FFI.
2468+
2469+ This persists the vector to disk AND inserts into the in-process HNSW index.
2470+
2471+ Args:
2472+ namespace: Namespace name
2473+ collection: Collection name
2474+ doc_id: Document ID string
2475+ vector: Vector embedding
2476+ metadata: Optional metadata dict
2477+
2478+ Returns:
2479+ True on success
2480+ """
2481+ self ._check_open ()
2482+ import numpy as np
2483+ import json as _json
2484+
2485+ vec_array = np .array (vector , dtype = np .float32 )
2486+ vec_ptr = vec_array .ctypes .data_as (ctypes .POINTER (ctypes .c_float ))
2487+
2488+ meta_json = None
2489+ if metadata :
2490+ meta_json = _json .dumps (metadata ).encode ("utf-8" )
2491+
2492+ try :
2493+ result = self ._lib .sochdb_collection_insert (
2494+ self ._handle ,
2495+ namespace .encode ("utf-8" ),
2496+ collection .encode ("utf-8" ),
2497+ doc_id .encode ("utf-8" ),
2498+ vec_ptr ,
2499+ len (vector ),
2500+ meta_json ,
2501+ )
2502+ return result == 0
2503+ except (AttributeError , OSError ):
2504+ return False
2505+
2506+ def ffi_collection_insert_batch (
2507+ self ,
2508+ namespace : str ,
2509+ collection : str ,
2510+ ids : "List[str]" ,
2511+ vectors : "List[List[float]]" ,
2512+ metadatas : "Optional[List[Optional[Dict]]]" = None ,
2513+ ) -> int :
2514+ """
2515+ Batch insert vectors into a collection via native Rust FFI.
2516+ Uses a single transaction for the entire batch for high throughput.
2517+
2518+ Args:
2519+ namespace: Namespace name
2520+ collection: Collection name
2521+ ids: List of document ID strings
2522+ vectors: List of vector embeddings
2523+ metadatas: Optional list of metadata dicts
2524+
2525+ Returns:
2526+ Number of successfully inserted vectors
2527+ """
2528+ import numpy as np
2529+
2530+ if not ids or not vectors :
2531+ return 0
2532+
2533+ n = len (ids )
2534+ dimension = len (vectors [0 ])
2535+ ns_bytes = namespace .encode ("utf-8" )
2536+ col_bytes = collection .encode ("utf-8" )
2537+
2538+ # Build flat vector array (n * dimension floats)
2539+ flat_vectors = np .array (vectors , dtype = np .float32 ).flatten ()
2540+ vec_ptr = flat_vectors .ctypes .data_as (ctypes .POINTER (ctypes .c_float ))
2541+
2542+ # Build C string array for IDs
2543+ id_bytes = [str (doc_id ).encode ("utf-8" ) for doc_id in ids ]
2544+ IdArrayType = ctypes .c_char_p * n
2545+ id_array = IdArrayType (* id_bytes )
2546+
2547+ # Build C string array for metadata JSONs
2548+ meta_bytes = []
2549+ for i in range (n ):
2550+ if metadatas and i < len (metadatas ) and metadatas [i ]:
2551+ meta_bytes .append (json .dumps (metadatas [i ]).encode ("utf-8" ))
2552+ else :
2553+ meta_bytes .append (None )
2554+ MetaArrayType = ctypes .c_char_p * n
2555+ meta_array = MetaArrayType (* meta_bytes )
2556+
2557+ try :
2558+ result = self ._lib .sochdb_collection_insert_batch (
2559+ self ._handle ,
2560+ ns_bytes ,
2561+ col_bytes ,
2562+ id_array ,
2563+ vec_ptr ,
2564+ ctypes .c_size_t (dimension ),
2565+ meta_array ,
2566+ ctypes .c_size_t (n ),
2567+ )
2568+ return max (result , 0 )
2569+ except (AttributeError , OSError ):
2570+ # Fallback: per-vector insert if batch FFI not available
2571+ count = 0
2572+ for i , (doc_id , vector ) in enumerate (zip (ids , vectors )):
2573+ vec_array = np .array (vector , dtype = np .float32 )
2574+ vp = vec_array .ctypes .data_as (ctypes .POINTER (ctypes .c_float ))
2575+ meta_json = None
2576+ if metadatas and i < len (metadatas ) and metadatas [i ]:
2577+ meta_json = json .dumps (metadatas [i ]).encode ("utf-8" )
2578+ try :
2579+ r = self ._lib .sochdb_collection_insert (
2580+ self ._handle , ns_bytes , col_bytes ,
2581+ str (doc_id ).encode ("utf-8" ), vp , len (vector ), meta_json ,
2582+ )
2583+ if r == 0 :
2584+ count += 1
2585+ except (AttributeError , OSError ):
2586+ pass
2587+ return count
2588+
23892589 def ffi_collection_keyword_search (
23902590 self ,
23912591 namespace : str ,
0 commit comments