Skip to content

Commit cfb08bb

Browse files
committed
feat: Deterministic Proof Bridge — per-record BLAKE3 Merkle proofs
- Fix f32→Q16.16 conversion discrepancy (from_f32 now uses round+clamp) - Single source of truth: kernel's from_f32() used by FFI, never inlined - Add bridge functions: ingest_embedding, generate_proof, verify_embedding - Add insert_with_proof() — atomic insert with proof as Record.metadata - Proofs are event-sourced, snapshot-persisted, included in state hash - Add ValoriAdapter — drop-in wrapper for external vector DBs - 47 Python tests + 11 Rust tests, all passing Files changed: src/fxp/ops.rs — from_f32() aligned to use .round().clamp() ffi/Cargo.toml — enable std feature, add blake3 dep ffi/src/lib.rs — bridge functions + insert_with_proof python/valori/adapter.py — kernel-backed proof adapter python/valori/__init__.py — export bridge functions README.md — document Proof Bridge
1 parent f4dc8dc commit cfb08bb

9 files changed

Lines changed: 985 additions & 12 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,86 @@ Valori uses **Q16.16 Fixed-Point Arithmetic** instead of standard `f32` floats.
191191
- **Bare Metal:** `no_std` compatible for ARM Cortex-M embedded systems
192192
- **Replication:** Leader-follower for read scaling
193193

194+
### 7. Deterministic Proof Bridge
195+
- **Per-record proofs** — BLAKE3 Merkle tree over Q16.16 integers
196+
- **Atomic insertion** — proof baked into `Record.metadata` at birth
197+
- **Event-sourced** — proofs go through `KernelEvent`, survive restarts
198+
- **Drop-in adapter** — wrap any existing vector DB (Pinecone, Qdrant, etc.)
199+
- **Hardware-independent** — same embedding → same proof on any machine
200+
201+
---
202+
203+
## 🔐 Deterministic Proof Bridge
204+
205+
Valori can generate per-record cryptographic proofs over AI embeddings. Proofs are deterministic — identical on any hardware — and stored inside the kernel's event-sourced state.
206+
207+
### Direct Usage (Rust FFI)
208+
209+
```python
210+
from valori import ingest_embedding, generate_proof, verify_embedding
211+
212+
# Any AI model → float embedding
213+
embedding = model.encode("patient diagnosis report")
214+
215+
# Convert to Q16.16 integers (deterministic, hardware-independent)
216+
fixed = ingest_embedding(embedding.tolist())
217+
218+
# Generate BLAKE3 Merkle proof
219+
proof_hash = generate_proof(fixed)
220+
221+
# Verify on any machine, any time — no server needed
222+
assert verify_embedding(embedding.tolist(), proof_hash) # True
223+
```
224+
225+
### Atomic Insert with Proof (Kernel-Backed)
226+
227+
```python
228+
from valori import Valori
229+
230+
client = Valori()
231+
232+
# Single FFI call — proof is baked into Record.metadata
233+
record_id, proof_hash = client.kernel.insert_with_proof(
234+
embedding.tolist(), tag=0
235+
)
236+
237+
# Proof is now:
238+
# ✅ Stored as Record.metadata
239+
# ✅ Event-sourced (in the event log)
240+
# ✅ Included in kernel_state_hash()
241+
# ✅ Persisted in snapshots
242+
# ✅ Survives crashes and restarts
243+
```
244+
245+
### Drop-in Adapter for Existing Systems
246+
247+
```python
248+
from valori import ValoriAdapter
249+
250+
# Wrap your existing vector DB — zero changes to existing code
251+
db = ValoriAdapter(your_pinecone_client)
252+
253+
# Insert goes to both: external DB + Valori kernel (for proofs)
254+
proof = db.insert("doc_001", embedding)
255+
256+
# Verify anytime
257+
db.verify("doc_001", embedding) # True — proof from kernel metadata
258+
259+
# Search results include verification status
260+
results = db.search(query_embedding, k=10)
261+
# Each result has: {"id": ..., "verified": True, "proof_hash": "abc..."}
262+
```
263+
264+
### What Makes This Different
265+
266+
| Feature | Other VectorDBs | Valori |
267+
|---------|----------------|--------|
268+
| **Per-record proof** | ❌ Not possible | ✅ BLAKE3 Merkle root per embedding |
269+
| **Offline verification** | ❌ Need running server |`verify_embedding()` runs anywhere |
270+
| **Tamper detection** | ❌ Only global checksums | ✅ Detects exactly which record changed |
271+
| **Hardware-independent** | ❌ Float rounding varies | ✅ Q16.16 integers — bit-identical everywhere |
272+
| **Zero trust** | ❌ Must trust vendor | ✅ Proof is math, not policy |
273+
194274
---
195275

196276
## 📚 Documentation

ffi/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ name = "valori_ffi"
88
crate-type = ["cdylib"]
99

1010
[dependencies]
11-
valori-kernel = { path = ".." }
11+
valori-kernel = { path = "..", features = ["std"] }
1212
valori-node = { path = "../node" }
1313
pyo3 = { version = "0.23", features = ["extension-module", "abi3-py39"] }
1414
serde_json = "1.0"
1515
hex = "0.4"
16+
blake3 = "1"

ffi/src/lib.rs

Lines changed: 169 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ use std::sync::{Arc, Mutex};
44
use valori_node::config::NodeConfig;
55
use valori_node::engine::Engine;
66
use valori_kernel::types::vector::FxpVector;
7-
use valori_kernel::types::scalar::FxpScalar;
87
use valori_kernel::types::id::RecordId;
8+
use valori_kernel::fxp::ops::from_f32;
99
use valori_kernel::event::KernelEvent;
1010
use serde_json; // For metadata serialization
1111
use hex; // For hash encoding
@@ -17,7 +17,7 @@ const D: usize = 384;
1717
const MAX_NODES: usize = 100;
1818
const MAX_EDGES: usize = 100;
1919

20-
const SCALE: f32 = 65536.0;
20+
// f32→Q16.16 conversion is handled by valori_kernel::fxp::ops::from_f32 (single source of truth)
2121

2222
#[pyclass]
2323
struct ValoriEngine {
@@ -62,8 +62,7 @@ impl ValoriEngine {
6262
// 1. Convert to Fixed Point
6363
let mut fxp_vec = FxpVector::<D>::new_zeros();
6464
for (i, v) in vector.iter().enumerate() {
65-
let fixed = (v * SCALE).round().clamp(i32::MIN as f32, i32::MAX as f32) as i32;
66-
fxp_vec.data[i] = FxpScalar(fixed);
65+
fxp_vec.data[i] = from_f32(*v);
6766
}
6867

6968
// 2. Determine ID (first free slot) - Must match Kernel's deterministic logic
@@ -112,8 +111,7 @@ impl ValoriEngine {
112111
// Convert query to FxpVector for kernel search
113112
let mut fxp_vec = FxpVector::<D>::new_zeros();
114113
for (i, &v) in vector.iter().enumerate() {
115-
let fixed = (v * SCALE).round().clamp(i32::MIN as f32, i32::MAX as f32) as i32;
116-
fxp_vec.data[i] = FxpScalar(fixed);
114+
fxp_vec.data[i] = from_f32(v);
117115
}
118116

119117
let mut results = vec![valori_kernel::index::SearchResult::default(); k];
@@ -309,10 +307,175 @@ impl ValoriEngine {
309307
engine.metadata.set(key, value);
310308
Ok(())
311309
}
310+
311+
/// Atomic insert with proof — single FFI call.
312+
///
313+
/// 1. Validates + converts f32 → Q16.16 (from_f32)
314+
/// 2. Generates BLAKE3 Merkle proof over Q16.16 integers
315+
/// 3. Inserts record with proof hash as Record.metadata
316+
/// 4. Returns (record_id, proof_hash_hex)
317+
///
318+
/// The proof is event-sourced, snapshot-persisted, and included
319+
/// in kernel_state_hash() — it can never be out of sync.
320+
#[pyo3(signature = (vector, tag))]
321+
fn insert_with_proof(&self, vector: Vec<f32>, tag: u64) -> PyResult<(u32, String)> {
322+
if vector.len() != D {
323+
return Err(pyo3::exceptions::PyValueError::new_err(format!("Expected {} dims", D)));
324+
}
325+
326+
// 1. Validate range + convert to Q16.16
327+
let mut fxp_vec = FxpVector::<D>::new_zeros();
328+
let mut fixed_values = Vec::with_capacity(D);
329+
for (i, &f) in vector.iter().enumerate() {
330+
if f < -32767.0 || f > 32767.0 {
331+
return Err(pyo3::exceptions::PyValueError::new_err(format!(
332+
"Float at index {} ({}) outside valid range [-32767.0, 32767.0]", i, f
333+
)));
334+
}
335+
let scalar = from_f32(f);
336+
fxp_vec.data[i] = scalar;
337+
fixed_values.push(scalar.0);
338+
}
339+
340+
// 2. Generate Merkle proof over Q16.16 integers
341+
let proof_bytes = generate_proof_bytes(&fixed_values);
342+
let proof_hex = hex::encode(&proof_bytes);
343+
344+
// 3. Insert with proof as Record.metadata (event-sourced)
345+
let mut engine = self.inner.lock().unwrap();
346+
347+
let mut id_val = None;
348+
for i in 0..MAX_RECORDS {
349+
let rid = RecordId(i as u32);
350+
if engine.state.get_record(rid).is_none() {
351+
id_val = Some(rid);
352+
break;
353+
}
354+
}
355+
356+
let rid = id_val.ok_or_else(|| {
357+
pyo3::exceptions::PyRuntimeError::new_err("Capacity Exceeded")
358+
})?;
359+
360+
if let Some(ref mut committer) = engine.event_committer {
361+
let event = KernelEvent::InsertRecord {
362+
id: rid,
363+
vector: fxp_vec,
364+
metadata: Some(proof_bytes), // ← proof baked into record
365+
tag,
366+
};
367+
match committer.commit_event(event.clone()) {
368+
Ok(_) => {
369+
engine.apply_committed_event(&event).map_err(|e| {
370+
pyo3::exceptions::PyRuntimeError::new_err(format!("Apply failed: {:?}", e))
371+
})?;
372+
Ok((rid.0, proof_hex))
373+
}
374+
Err(e) => Err(pyo3::exceptions::PyRuntimeError::new_err(
375+
format!("Commit failed: {:?}", e)
376+
)),
377+
}
378+
} else {
379+
Err(pyo3::exceptions::PyRuntimeError::new_err("Event Log not initialized"))
380+
}
381+
}
382+
}
383+
384+
// ============================================================================
385+
// Bridge Functions — Standalone pyfunctions for deterministic proof generation
386+
// ============================================================================
387+
388+
/// Convert float embeddings to Q16.16 fixed-point integers.
389+
///
390+
/// Uses the kernel's from_f32() — single source of truth.
391+
/// Rejects values outside [-32767.0, 32767.0] (Q16.16 safe range).
392+
#[pyfunction]
393+
fn ingest_embedding(floats: Vec<f32>) -> PyResult<Vec<i32>> {
394+
for (i, &f) in floats.iter().enumerate() {
395+
if f < -32767.0 || f > 32767.0 {
396+
return Err(pyo3::exceptions::PyValueError::new_err(format!(
397+
"Float at index {} ({}) outside valid range [-32767.0, 32767.0]. \
398+
Normalize before ingestion.",
399+
i, f
400+
)));
401+
}
402+
}
403+
404+
let fixed: Vec<i32> = floats.iter().map(|&f| from_f32(f).0).collect();
405+
Ok(fixed)
406+
}
407+
408+
/// Internal helper — generates Merkle proof as raw bytes.
409+
/// Single source of truth for Merkle logic.
410+
/// Used by both generate_proof() (hex output) and insert_with_proof() (Record.metadata).
411+
fn generate_proof_bytes(fixed_values: &[i32]) -> Vec<u8> {
412+
let leaves: Vec<[u8; 32]> = fixed_values
413+
.iter()
414+
.enumerate()
415+
.map(|(pos, &val)| {
416+
let mut buf = [0u8; 8];
417+
buf[..4].copy_from_slice(&(pos as u32).to_le_bytes());
418+
buf[4..].copy_from_slice(&val.to_le_bytes());
419+
420+
let mut hasher = blake3::Hasher::new();
421+
hasher.update(&buf);
422+
*hasher.finalize().as_bytes()
423+
})
424+
.collect();
425+
426+
merkle_root(&leaves).to_vec()
427+
}
428+
429+
/// Build a position-aware Merkle tree over Q16.16 integers.
430+
///
431+
/// Each leaf = BLAKE3(position_u32_le || value_i32_le).
432+
/// Returns the root hash as a hex string.
433+
/// Same BLAKE3 crate the kernel uses — zero divergence possible.
434+
#[pyfunction]
435+
fn generate_proof(fixed_values: Vec<i32>) -> PyResult<String> {
436+
if fixed_values.is_empty() {
437+
return Err(pyo3::exceptions::PyValueError::new_err(
438+
"Cannot generate proof for empty vector"
439+
));
440+
}
441+
Ok(hex::encode(generate_proof_bytes(&fixed_values)))
442+
}
443+
444+
/// Standard binary Merkle tree. Odd leaf: hashed with itself.
445+
fn merkle_root(leaves: &[[u8; 32]]) -> [u8; 32] {
446+
if leaves.len() == 1 {
447+
return leaves[0];
448+
}
449+
450+
let next_level: Vec<[u8; 32]> = leaves
451+
.chunks(2)
452+
.map(|pair| {
453+
let mut hasher = blake3::Hasher::new();
454+
hasher.update(&pair[0]);
455+
hasher.update(pair.get(1).unwrap_or(&pair[0]));
456+
*hasher.finalize().as_bytes()
457+
})
458+
.collect();
459+
460+
merkle_root(&next_level)
461+
}
462+
463+
/// Verify a float embedding against a claimed proof hash.
464+
///
465+
/// Full pipeline in Rust: f32 → Q16.16 → Merkle → compare.
466+
/// No Python math involved.
467+
#[pyfunction]
468+
fn verify_embedding(floats: Vec<f32>, claimed_hash: String) -> PyResult<bool> {
469+
let fixed = ingest_embedding(floats)?;
470+
let computed_hash = generate_proof(fixed)?;
471+
Ok(computed_hash == claimed_hash)
312472
}
313473

314474
#[pymodule]
315475
fn valori_ffi(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
316476
m.add_class::<ValoriEngine>()?;
477+
m.add_function(wrap_pyfunction!(ingest_embedding, m)?)?;
478+
m.add_function(wrap_pyfunction!(generate_proof, m)?)?;
479+
m.add_function(wrap_pyfunction!(verify_embedding, m)?)?;
317480
Ok(())
318481
}

0 commit comments

Comments
 (0)