|
| 1 | +// soa_scan.rs — 1M-row SoA scalability PoC: key-only scan vs full-value scan. |
| 2 | +// |
| 3 | +// Proves the OGAR canon's "the key prerenders nodes with ZERO value decode": when the |
| 4 | +// canonical NodeRow (512 B = key 16 + edges 16 + value 480) is laid out COLUMNAR |
| 5 | +// (struct-of-arrays) — a contiguous key column + a contiguous value column — a key-only |
| 6 | +// scan (prefix-route / render-select, e.g. "draw the skeleton subtree" = classid 0x0A02) |
| 7 | +// touches ~30x less memory than materializing the value slab, and stays flat as N grows. |
| 8 | +// This is the same prefix routing the /fma-body skeleton button and `graph 00000a02` do, |
| 9 | +// measured at scale. |
| 10 | +// |
| 11 | +// 1M is SYNTHETIC — real FMA is ~1368 placed meshes / ~75K terms; the scan throughput is |
| 12 | +// the point, not data realism. Synthetic rows are seeded by the FMA addressing |
| 13 | +// distribution (~25% skeleton classid, the rest soft tissue). |
| 14 | +// |
| 15 | +// usage: soa_scan [max_n] [reps] (default 1_000_000 rows, 5 reps; lower max_n for less RAM) |
| 16 | + |
| 17 | +use std::hint::black_box; |
| 18 | +use std::time::Instant; |
| 19 | + |
| 20 | +const CLASSID_SOFT: u32 = 0x0000_0A01; |
| 21 | +const CLASSID_SKELETON: u32 = 0x0000_0A02; |
| 22 | + |
| 23 | +/// Columnar SoA: the 16-byte GUID key column and the 480-byte value-slab column, kept |
| 24 | +/// in separate contiguous allocations (the "SoA > tenant view" split). |
| 25 | +fn build(n: usize) -> (Vec<[u8; 16]>, Vec<[u8; 480]>) { |
| 26 | + let mut keys = Vec::with_capacity(n); |
| 27 | + let mut values = Vec::with_capacity(n); |
| 28 | + for i in 0..n { |
| 29 | + // synthetic canonical GUID seeded by i: ~25% skeleton, rest soft tissue. |
| 30 | + let classid = if i % 4 == 0 { CLASSID_SKELETON } else { CLASSID_SOFT }; |
| 31 | + let h = (i as u64).wrapping_mul(0x9E37_79B9_7F4A_7C15); // splitmix-ish spread |
| 32 | + let mut k = [0u8; 16]; |
| 33 | + k[0..4].copy_from_slice(&classid.to_le_bytes()); // 0..4 classid |
| 34 | + k[4..12].copy_from_slice(&h.to_le_bytes()); // 4..12 HEEL/HIP/TWIG + family low |
| 35 | + let id = (i as u32) & 0x00FF_FFFF; |
| 36 | + k[12..15].copy_from_slice(&id.to_le_bytes()[..3]); // identity (u24) |
| 37 | + k[15] = (h >> 56) as u8; |
| 38 | + keys.push(k); |
| 39 | + // value slab: filled from i so the read can't be elided to a no-op. |
| 40 | + let mut v = [0u8; 480]; |
| 41 | + v[0] = (i & 0xFF) as u8; |
| 42 | + v[239] = (h & 0xFF) as u8; |
| 43 | + v[479] = (i >> 8) as u8; |
| 44 | + values.push(v); |
| 45 | + } |
| 46 | + (keys, values) |
| 47 | +} |
| 48 | + |
| 49 | +fn gbps(bytes: usize, secs: f64) -> f64 { |
| 50 | + (bytes as f64) / secs / 1e9 |
| 51 | +} |
| 52 | + |
| 53 | +fn main() { |
| 54 | + let a: Vec<String> = std::env::args().collect(); |
| 55 | + let max_n: usize = a.get(1).and_then(|s| s.parse().ok()).unwrap_or(1_000_000); |
| 56 | + let reps: usize = a.get(2).and_then(|s| s.parse().ok()).unwrap_or(5); |
| 57 | + |
| 58 | + println!("# SoA scalability: key-only (16 B/row, prefix-route) vs value (480 B/row, decode the slab)"); |
| 59 | + println!("# NodeRow = 512 B = key(16) + edges(16) + value(480); columnar SoA. {reps} reps, best-of."); |
| 60 | + println!("{:>10} | {:>20} | {:>20} | {:>8}", "rows", "key-only (route)", "value (decode slab)", "speedup"); |
| 61 | + println!("{:->10}-+-{:->20}-+-{:->20}-+-{:->8}", "", "", "", ""); |
| 62 | + |
| 63 | + let scales: Vec<usize> = [64_000usize, 256_000, max_n].into_iter().filter(|&n| n > 0).collect(); |
| 64 | + for &n in &scales { |
| 65 | + let (keys, values) = build(n); |
| 66 | + |
| 67 | + // key-only scan: prefix-route — count the skeleton subtree, reading only the key |
| 68 | + // column (classid at bytes 0..4; the 16-byte key column is what's streamed). |
| 69 | + let mut best_key = f64::MAX; |
| 70 | + let mut routed = 0usize; |
| 71 | + for _ in 0..reps { |
| 72 | + let t = Instant::now(); |
| 73 | + let mut c = 0usize; |
| 74 | + for k in &keys { |
| 75 | + let classid = u32::from_le_bytes([k[0], k[1], k[2], k[3]]); |
| 76 | + if classid == CLASSID_SKELETON { |
| 77 | + c += 1; |
| 78 | + } |
| 79 | + } |
| 80 | + black_box(c); |
| 81 | + best_key = best_key.min(t.elapsed().as_secs_f64()); |
| 82 | + routed = c; |
| 83 | + } |
| 84 | + |
| 85 | + // value scan: materialize the slab — sum all 480 bytes per row (the work a value |
| 86 | + // decode / tenant read does), reading the whole value column. |
| 87 | + let mut best_val = f64::MAX; |
| 88 | + for _ in 0..reps { |
| 89 | + let t = Instant::now(); |
| 90 | + let mut s = 0u64; |
| 91 | + for v in &values { |
| 92 | + let mut acc = 0u64; |
| 93 | + for &b in v.iter() { |
| 94 | + acc = acc.wrapping_add(b as u64); |
| 95 | + } |
| 96 | + s = s.wrapping_add(acc); |
| 97 | + } |
| 98 | + black_box(s); |
| 99 | + best_val = best_val.min(t.elapsed().as_secs_f64()); |
| 100 | + } |
| 101 | + |
| 102 | + let key_bytes = n * 16; // key column streamed |
| 103 | + let val_bytes = n * 480; // value column streamed |
| 104 | + let key_rps = n as f64 / best_key; |
| 105 | + let val_rps = n as f64 / best_val; |
| 106 | + println!( |
| 107 | + "{:>10} | {:>8.0} M/s {:>5.1} GB/s | {:>8.0} M/s {:>5.1} GB/s | {:>6.1}x", |
| 108 | + n, |
| 109 | + key_rps / 1e6, |
| 110 | + gbps(key_bytes, best_key), |
| 111 | + val_rps / 1e6, |
| 112 | + gbps(val_bytes, best_val), |
| 113 | + best_val / best_key, |
| 114 | + ); |
| 115 | + let _ = routed; |
| 116 | + } |
| 117 | + println!("# key-only touches 16 B/row vs 480 B/row (30x less); routing/render-select needs NO value decode."); |
| 118 | +} |
0 commit comments