varshith-Git
diff --git a/‎crates/cli/Cargo.toml‎
Lines changed: 8 additions & 1 deletion b/‎crates/cli/Cargo.toml‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎crates/cli/src/bin/bench_1m.rs‎
Lines changed: 131 additions & 0 deletions b/‎crates/cli/src/bin/bench_1m.rs‎
Lines changed: 131 additions & 0 deletions
diff --git a/‎crates/cli/src/bin/bench_filter.rs‎
Lines changed: 50 additions & 0 deletions b/‎crates/cli/src/bin/bench_filter.rs‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎crates/cli/src/bin/bench_ingest.rs‎
Lines changed: 106 additions & 0 deletions b/‎crates/cli/src/bin/bench_ingest.rs‎
Lines changed: 106 additions & 0 deletions
@@ -11,17 +11,24 @@ path = "src/lib.rs"
 name = "valori"
 path = "src/main.rs"
 
+[[bin]]
+name = "bench_ingest"
+path = "src/bin/bench_ingest.rs"
+
 [dependencies]
 clap = { version = "4.5", features = ["derive"] }
 valori-persistence = { path = "../persistence" }
 anyhow = "1.0"
 comfy-table = "7.1"
 chrono = "0.4"
 crc64fast = "1.0"
-valori-kernel = { path = "../kernel" }
+valori-kernel = { path = "../.." }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
+memmap2 = "0.9"
+bytemuck = "1.14"
 
 
 [dev-dependencies]
 tempfile = "3.24.0"
+
@@ -0,0 +1,131 @@
+use anyhow::Result;
+use memmap2::Mmap;
+use std::fs::File;
+use std::time::Instant;
+use valori_kernel::adapters::sift_batch::SiftBatchLoader;
+use bytemuck::cast_slice; // The "Senior" way to cast types
+
+const Q16_SCALE: f32 = 65536.0;
+
+fn main() -> Result<()> {
+    println!("🚀 Starting SIFT1M Granular Benchmark...");
+
+    let path = "data/sift/sift/sift_base.fvecs";
+    let file = File::open(path).expect("Failed to open SIFT file.");
+    let mmap = unsafe { Mmap::map(&file)? };
+
+    // Initialize Loader
+    let mut loader = SiftBatchLoader::new(&mmap)
+        .ok_or_else(|| anyhow::anyhow!("Invalid SIFT format"))?;
+
+    let dim = loader.dim();
+    let total = loader.len();
+    let batch_size = 10_000;
+    
+    // We calculate stride manually to inline the logic and avoid function overhead
+    // Header (4B) + Data (dim * 4B)
+    let stride = 4 + (dim * 4); 
+
+    println!("📊 Dataset: {} Vectors | Dim: {}", total, dim);
+
+    // ==========================================================
+    // TEST 1: RAW I/O (Baseline)
+    // ==========================================================
+    println!("\nTest 1: Raw Memory Bandwidth (No Parsing)...");
+    loader = SiftBatchLoader::new(&mmap).unwrap(); // Reset cursor
+    let start_io = Instant::now();
+    let mut bytes_checksum: u64 = 0;
+    
+    while let Some((raw_bytes, _count)) = loader.next_batch(batch_size) {
+        // Force OS to page-in data by reading every byte.
+        // We use a simple sum which the compiler can SIMD optimize,
+        // ensuring we hit memory bandwidth limits, not CPU limits.
+        let chunk_sum: u64 = raw_bytes.iter().map(|&b| b as u64).sum();
+        bytes_checksum = bytes_checksum.wrapping_add(chunk_sum);
+    }
+    std::hint::black_box(bytes_checksum); // Ensure calculation isn't deleted
+    
+    let time_io = start_io.elapsed();
+    // approximate bytes read (total file size)
+    let total_bytes = mmap.len(); 
+    println!("   -> Time: {:.4}s | {:.2} GB/s", 
+        time_io.as_secs_f64(), 
+        (total_bytes as f64 / 1_024.0 / 1_024.0 / 1_024.0) / time_io.as_secs_f64()
+    );
+
+    // ==========================================================
+    // TEST 2: PARSING COST (Bytemuck Cast)
+    // ==========================================================
+    println!("\nTest 2: Structure Cost (Bytes -> &[f32])...");
+    loader = SiftBatchLoader::new(&mmap).unwrap();
+    let start_parse = Instant::now();
+    let mut _check_parse: f32 = 0.0;
+
+    while let Some((raw_bytes, count)) = loader.next_batch(batch_size) {
+        for i in 0..count {
+            let offset = i * stride;
+            // Zero-Copy Slice: Skip 4 byte header, take the rest
+            // Note: f32 requires 4-byte alignment. SIFT stride is (4 + 128*4) = 516.
+            // 516 is divisible by 4, so address alignment is preserved!
+            let vec_bytes = &raw_bytes[offset + 4 .. offset + stride];
+            
+            // bytemuck::cast_slice is SAFE. It checks alignment and length.
+            // If this panics, your data is corrupt.
+            let vec_f32: &[f32] = cast_slice(vec_bytes);
+            
+            
+            // Sum ALL floats to ensure we read all memory, making this comparable to Test 1.
+            for &val in vec_f32 {
+                _check_parse += val;
+            }
+        }
+    }
+
+    let time_parse = start_parse.elapsed();
+    println!("   - Checksum (f32):    {:.2} (Ignore)", _check_parse);
+    println!("   -> Time: {:.4}s | Overhead: {:.4}s", 
+        time_parse.as_secs_f64(),
+        time_parse.checked_sub(time_io).unwrap_or(std::time::Duration::ZERO).as_secs_f64()
+    );
+
+    // ==========================================================
+    // TEST 3: MATH COST (f32 -> Q16.16)
+    // ==========================================================
+    println!("\nTest 3: Determinism Cost (Math Ops)...");
+    loader = SiftBatchLoader::new(&mmap).unwrap();
+    let start_math = Instant::now();
+    let mut check_math: i64 = 0;
+
+    while let Some((raw_bytes, count)) = loader.next_batch(batch_size) {
+        for i in 0..count {
+            let offset = i * stride;
+            let vec_bytes = &raw_bytes[offset + 4 .. offset + stride];
+            let vec_f32: &[f32] = cast_slice(vec_bytes);
+
+            // THE HOT LOOP
+            for &val in vec_f32 {
+                let fixed = (val * Q16_SCALE) as i32;
+                check_math = check_math.wrapping_add(fixed as i64);
+            }
+        }
+    }
+
+    let time_math = start_math.elapsed();
+    
+    // Fix: Don't subtract Test 2 if Test 3 is faster (due to SIMD)
+    // Just report the raw math time, which is the "Hot Cache" performance.
+    println!("   -> Time: {:.4}s", time_math.as_secs_f64());
+    
+    println!("--------------------------------------------------");
+    println!("📉 COST ANALYSIS:");
+    println!("   - Cold I/O (Disk):   {:.4}s", time_io.as_secs_f64());
+    println!("   - Hot Math (Memory): {:.4}s", time_math.as_secs_f64());
+    println!("--------------------------------------------------");
+    
+    let total_ops = total as f64 * dim as f64;
+    println!("⚡ Hot Throughput: {:.2} Billion ops/sec", 
+        total_ops / time_math.as_secs_f64() / 1_000_000_000.0
+    );
+
+    Ok(())
+}
@@ -0,0 +1,50 @@
+use anyhow::Result;
+use valori_kernel::{ValoriKernel, types::FixedPointVector};
+
+fn main() -> Result<()> {
+    println!("🚀 Starting Metadata Filter Benchmark...");
+    let mut kernel = ValoriKernel::new();
+    
+    let dim = 128;
+
+    // 1. Ingest Data with Tags
+    // Even IDs -> Tag 1 (Red)
+    // Odd IDs  -> Tag 2 (Blue)
+    println!("📥 Ingesting 10,000 tagged vectors...");
+    for i in 0..10_000u64 {
+        let vec = vec![0; dim]; // Dummy vector
+        let tag = if i % 2 == 0 { 1 } else { 2 };
+        
+        // Use the new insert helper
+        kernel.insert(i, vec, tag)?; 
+    }
+
+    // 2. Search with Filter (Tag 1)
+    println!("🔎 Searching for Tag 1 (Evens)...");
+    let query = vec![0; dim];
+    // None = No filter, Some(1) = Filter for Tag 1
+    let results = kernel.search(&query, 10, Some(1))?;
+
+    // 3. Verify
+    println!("📊 Got {} results", results.len());
+    for (id, _) in results {
+        if id % 2 != 0 {
+            panic!("❌ FAILED: Found Odd ID {} inside Tag 1 results!", id);
+        }
+    }
+    
+    println!("✅ SUCCESS: Filter strictly enforced. All results have Tag 1.");
+
+    // 4. Search with Filter (Tag 2)
+    println!("🔎 Searching for Tag 2 (Odds)...");
+    let results2 = kernel.search(&query, 10, Some(2))?;
+    for (id, _) in results2 {
+        if id % 2 == 0 {
+            panic!("❌ FAILED: Found Even ID {} inside Tag 2 results!", id);
+        }
+    }
+    println!("✅ SUCCESS: Filter strictly enforced for Tag 2.");
+
+    println!("✅ Architecture supports Hybrid Search (Vector + Metadata).");
+    Ok(())
+}
@@ -0,0 +1,106 @@
+use anyhow::Result;
+use memmap2::Mmap;
+use std::fs::File;
+use std::time::Instant;
+use valori_kernel::ValoriKernel; // The Engine
+use valori_kernel::adapters::sift_batch::SiftBatchLoader;
+use bytemuck::cast_slice;
+
+// Standard SIFT1M is 128 dims
+const DIM: usize = 128;
+const Q16_SCALE: f32 = 65536.0;
+
+fn main() -> Result<()> {
+    println!("🚀 Starting Kernel Ingestion Benchmark (End-to-End)...");
+
+    // 1. Setup Data
+    let path = "data/sift/sift/sift_base.fvecs";
+    let file = File::open(path).expect("Failed to open SIFT file");
+    let mmap = unsafe { Mmap::map(&file)? };
+    let mut loader = SiftBatchLoader::new(&mmap)
+        .ok_or_else(|| anyhow::anyhow!("Invalid SIFT format"))?;
+
+    println!("📊 Dataset: {} Vectors", loader.len());
+
+    // 2. Initialize Kernel
+    // This is the "System Under Test"
+    let mut kernel = ValoriKernel::new();
+    println!("🤖 Kernel Initialized. Ready for Ingest.");
+
+    // 3. Setup Reusable Buffer (Zero-Alloc Loop)
+    // Payload: [CMD(1)] + [ID(8)] + [DIM(2)] + [VALUES(128*4)]
+    // Based on `crates/kernel/src/types.rs`
+    let packet_size = 1 + 8 + 2 + (DIM * 4);
+    let mut packet_buffer = vec![0u8; packet_size];
+    
+    // Constant Header Fields
+    // Offset 0: CMD_INSERT = 1
+    // Buffer: CMD(1) + ID(8) + DIM(2) + VEC(DIM*4) + TAG(8)
+    let buffer_size = 1 + 8 + 2 + (DIM * 4) + 8;
+    let mut packet_buffer = vec![0u8; buffer_size]; 
+    packet_buffer[0] = 1; // CMD_INSERT
+    packet_buffer[9..11].copy_from_slice(&(DIM as u16).to_le_bytes());
+
+    let ingest_limit = loader.len(); // Ingest all available vectors
+    let mut id_counter = 0;
+    
+    println!("🏁 Ingestion Started...");
+    let start = Instant::now();
+
+    while let Some((raw_bytes, count)) = loader.next_batch(1000) {
+        let stride = 4 + (DIM * 4);
+        for i in 0..count {
+            if id_counter >= ingest_limit { break; }
+            let offset = i * stride; // fvecs format
+            let vec_f32: &[f32] = cast_slice(&raw_bytes[offset+4 .. offset+stride]);
+            
+            // Construct Payload
+            // ID
+            let id = id_counter as u64;
+            packet_buffer[1..9].copy_from_slice(&id.to_le_bytes());
+            
+            // Vector
+            let vec_start = 11;
+            let vec_end = 11 + (DIM * 4);
+            let payload_vec = &mut packet_buffer[vec_start..vec_end];
+            for (j, &val) in vec_f32.iter().enumerate() {
+                let fixed = (val * Q16_SCALE) as i32;
+                payload_vec[j*4..(j+1)*4].copy_from_slice(&fixed.to_le_bytes());
+            }
+
+            // Tag (Default 0)
+            packet_buffer[vec_end..vec_end+8].copy_from_slice(&0u64.to_le_bytes());
+
+            // The Critical Call (Apply to State)
+            // This tests the Kernel's locking, hashing, and storage logic.
+            kernel.apply_event(&packet_buffer)?;
+            id_counter += 1;
+
+            if id_counter % 5000 == 0 {
+                // simple println to avoid cursor jump issues in automation
+                println!("Ingesting: {} ...", id_counter);
+            }
+        }
+    }
+    println!();
+
+    let duration = start.elapsed();
+    let seconds = duration.as_secs_f64();
+    let eps = id_counter as f64 / seconds;
+
+    println!("--------------------------------------------------");
+    println!("✅ INGESTION COMPLETE.");
+    println!("   - Events:     {}", id_counter);
+    println!("   - Time:       {:.4} seconds", seconds);
+    println!("   - Throughput: {:.2} EPS (Events Per Second)", eps);
+    println!("--------------------------------------------------");
+    
+    // Check if we hit the target
+    if eps > 10_000.0 {
+        println!("🚀 SUCCESS: Speed > 10k EPS");
+    } else {
+        println!("⚠️  WARNING: Speed < 10k EPS. Optimization needed.");
+    }
+
+    Ok(())
+}