@@ -7,6 +7,7 @@ use valori_kernel::types::vector::FxpVector;
77use valori_kernel:: types:: id:: RecordId ;
88use valori_kernel:: fxp:: ops:: from_f32;
99use valori_kernel:: event:: KernelEvent ;
10+ use valori_kernel:: proof:: generate_proof_bytes;
1011use serde_json; // For metadata serialization
1112use hex; // For hash encoding
1213
@@ -379,6 +380,85 @@ impl ValoriEngine {
379380 Err ( pyo3:: exceptions:: PyRuntimeError :: new_err ( "Event Log not initialized" ) )
380381 }
381382 }
383+
384+ /// Batch atomic insert with Merkle proofs.
385+ /// Returns a list of (record_id, proof_hex).
386+ #[ pyo3( signature = ( vectors, tags) ) ]
387+ fn insert_batch_with_proof ( & self , vectors : Vec < Vec < f32 > > , tags : Vec < u64 > ) -> PyResult < Vec < ( u32 , String ) > > {
388+ if vectors. len ( ) != tags. len ( ) {
389+ return Err ( pyo3:: exceptions:: PyValueError :: new_err ( "vectors and tags must have the same length" ) ) ;
390+ }
391+
392+ let mut engine = self . inner . lock ( ) . unwrap ( ) ;
393+
394+ let mut events = Vec :: with_capacity ( vectors. len ( ) ) ;
395+ let mut results = Vec :: with_capacity ( vectors. len ( ) ) ;
396+ let mut temp_used_ids = std:: collections:: HashSet :: new ( ) ;
397+ let mut next_candidate = 0 ;
398+
399+ for ( vec, & tag) in vectors. iter ( ) . zip ( tags. iter ( ) ) {
400+ if vec. len ( ) != D {
401+ return Err ( pyo3:: exceptions:: PyValueError :: new_err ( format ! ( "Expected {} dims" , D ) ) ) ;
402+ }
403+
404+ let mut fxp_vec = FxpVector :: < D > :: new_zeros ( ) ;
405+ let mut fixed_values = Vec :: with_capacity ( D ) ;
406+ for ( i, & f) in vec. iter ( ) . enumerate ( ) {
407+ if f < -32767.0 || f > 32767.0 {
408+ return Err ( pyo3:: exceptions:: PyValueError :: new_err ( format ! (
409+ "Float at index {} ({}) outside valid range [-32767.0, 32767.0]" , i, f
410+ ) ) ) ;
411+ }
412+ let scalar = from_f32 ( f) ;
413+ fxp_vec. data [ i] = scalar;
414+ fixed_values. push ( scalar. 0 ) ;
415+ }
416+
417+ let proof_bytes = generate_proof_bytes ( & fixed_values) ;
418+ let proof_hex = hex:: encode ( & proof_bytes) ;
419+
420+ let mut found_id = None ;
421+ for i in next_candidate..MAX_RECORDS {
422+ let rid = RecordId ( i as u32 ) ;
423+ if engine. state . get_record ( rid) . is_none ( ) && !temp_used_ids. contains ( & i) {
424+ found_id = Some ( rid) ;
425+ next_candidate = i + 1 ;
426+ break ;
427+ }
428+ }
429+
430+ let rid = found_id. ok_or_else ( || {
431+ pyo3:: exceptions:: PyRuntimeError :: new_err ( "Capacity Exceeded" )
432+ } ) ?;
433+ temp_used_ids. insert ( rid. 0 as usize ) ;
434+
435+ events. push ( KernelEvent :: InsertRecord {
436+ id : rid,
437+ vector : fxp_vec,
438+ metadata : Some ( proof_bytes) ,
439+ tag,
440+ } ) ;
441+ results. push ( ( rid. 0 , proof_hex) ) ;
442+ }
443+
444+ if let Some ( ref mut committer) = engine. event_committer {
445+ match committer. commit_batch ( events. clone ( ) ) {
446+ Ok ( _) => {
447+ for event in & events {
448+ engine. apply_committed_event ( event) . map_err ( |e| {
449+ pyo3:: exceptions:: PyRuntimeError :: new_err ( format ! ( "Apply failed: {:?}" , e) )
450+ } ) ?;
451+ }
452+ Ok ( results)
453+ }
454+ Err ( e) => Err ( pyo3:: exceptions:: PyRuntimeError :: new_err (
455+ format ! ( "Batch commit failed: {:?}" , e)
456+ ) ) ,
457+ }
458+ } else {
459+ Err ( pyo3:: exceptions:: PyRuntimeError :: new_err ( "Event Log not initialized" ) )
460+ }
461+ }
382462}
383463
384464// ============================================================================
@@ -405,27 +485,6 @@ fn ingest_embedding(floats: Vec<f32>) -> PyResult<Vec<i32>> {
405485 Ok ( fixed)
406486}
407487
408- /// Internal helper — generates Merkle proof as raw bytes.
409- /// Single source of truth for Merkle logic.
410- /// Used by both generate_proof() (hex output) and insert_with_proof() (Record.metadata).
411- fn generate_proof_bytes ( fixed_values : & [ i32 ] ) -> Vec < u8 > {
412- let leaves: Vec < [ u8 ; 32 ] > = fixed_values
413- . iter ( )
414- . enumerate ( )
415- . map ( |( pos, & val) | {
416- let mut buf = [ 0u8 ; 8 ] ;
417- buf[ ..4 ] . copy_from_slice ( & ( pos as u32 ) . to_le_bytes ( ) ) ;
418- buf[ 4 ..] . copy_from_slice ( & val. to_le_bytes ( ) ) ;
419-
420- let mut hasher = blake3:: Hasher :: new ( ) ;
421- hasher. update ( & buf) ;
422- * hasher. finalize ( ) . as_bytes ( )
423- } )
424- . collect ( ) ;
425-
426- merkle_root ( & leaves) . to_vec ( )
427- }
428-
429488/// Build a position-aware Merkle tree over Q16.16 integers.
430489///
431490/// Each leaf = BLAKE3(position_u32_le || value_i32_le).
@@ -441,25 +500,6 @@ fn generate_proof(fixed_values: Vec<i32>) -> PyResult<String> {
441500 Ok ( hex:: encode ( generate_proof_bytes ( & fixed_values) ) )
442501}
443502
444- /// Standard binary Merkle tree. Odd leaf: hashed with itself.
445- fn merkle_root ( leaves : & [ [ u8 ; 32 ] ] ) -> [ u8 ; 32 ] {
446- if leaves. len ( ) == 1 {
447- return leaves[ 0 ] ;
448- }
449-
450- let next_level: Vec < [ u8 ; 32 ] > = leaves
451- . chunks ( 2 )
452- . map ( |pair| {
453- let mut hasher = blake3:: Hasher :: new ( ) ;
454- hasher. update ( & pair[ 0 ] ) ;
455- hasher. update ( pair. get ( 1 ) . unwrap_or ( & pair[ 0 ] ) ) ;
456- * hasher. finalize ( ) . as_bytes ( )
457- } )
458- . collect ( ) ;
459-
460- merkle_root ( & next_level)
461- }
462-
463503/// Verify a float embedding against a claimed proof hash.
464504///
465505/// Full pipeline in Rust: f32 → Q16.16 → Merkle → compare.
0 commit comments