Skip to content

Commit 029be26

Browse files
Aiur: flat append-only QueryMap + env-gated record-size stats (#444)
* Aiur: flat append-only QueryMap + env-gated record-size stats Replace the per-circuit FxIndexMap<Vec<G>, QueryResult> with flat arenas: keys/outputs/multiplicities in contiguous Vec<G>s (fixed stride per circuit) indexed by a hashbrown HashTable<u32>. The record IS the proof witness so entries cannot be dropped — only stored compactly: per-entry RAM drops from ~133 B (two heap Vecs + IndexMap bucket + allocator metadata) to ~54 B, and execution runs ~3x faster wall-clock (no per-call heap allocation, better locality). Entry index == insertion order, preserving the memory-circuit pointer semantics. Circuit stats are bit-identical to the map-based form. Measured on ByteArray.utf8DecodeChar?_utf8EncodeChar_append: at the op-count where the old representation OOM-killed a 249 GB box (12.9B ops, 1.64B entries), the flat form sits at 104 GB vs 246 GB, with bit-identical memory-circuit entry counts. Also adds IX_AIUR_QUERY_STATS=1: periodic (every 2^31 ops) + final dumps of per-function entry counts and retained G-elems, the RAM-attribution diagnostic used to find this and the kernel-side pathologies. * chore: Move QueryMap to its own file * fmt + clippy --------- Co-authored-by: samuelburnham <45365069+samuelburnham@users.noreply.github.com>
1 parent 3c09339 commit 029be26

8 files changed

Lines changed: 244 additions & 36 deletions

File tree

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ anyhow = "1"
1111
blake3 = "1.8.4"
1212
itertools = "0.14.0"
1313
indexmap = { version = "2", features = ["rayon"] }
14+
hashbrown = "0.15"
1415
lean-ffi = { git = "https://github.com/argumentcomputer/lean-ffi", rev = "cc98ebf67bf453ac3827cb767f78b13ea674dd6a" }
1516
mimalloc = { version = "0.1", default-features = false }
1617
multi-stark = { git = "https://github.com/argumentcomputer/multi-stark.git", rev = "9ecab51d553445c0cc7b571af00a76b8a83a6f8c" }

src/aiur.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ pub mod constraints;
33
pub mod execute;
44
pub mod gadgets;
55
pub mod memory;
6+
pub mod querymap;
67
pub mod synthesis;
78
pub mod trace;
89

src/aiur/execute.rs

Lines changed: 70 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,10 @@ use crate::{
1212
bytes1::{Bytes1, Bytes1Op, Bytes1Queries},
1313
bytes2::{Bytes2, Bytes2Op, Bytes2Queries},
1414
},
15+
querymap::QueryMap,
1516
},
1617
};
1718

18-
pub struct QueryResult {
19-
pub(crate) output: Vec<G>,
20-
pub(crate) multiplicity: G,
21-
}
22-
23-
pub type QueryMap = FxIndexMap<Vec<G>, QueryResult>;
24-
2519
pub struct QueryRecord {
2620
pub(crate) function_queries: Vec<QueryMap>,
2721
pub(crate) memory_queries: FxIndexMap<usize, QueryMap>,
@@ -31,12 +25,15 @@ pub struct QueryRecord {
3125

3226
impl QueryRecord {
3327
fn new(toplevel: &Toplevel) -> Self {
34-
let function_queries =
35-
toplevel.functions.iter().map(|_| QueryMap::default()).collect();
28+
let function_queries = toplevel
29+
.functions
30+
.iter()
31+
.map(|f| QueryMap::new(f.layout.input_size))
32+
.collect();
3633
let memory_queries = toplevel
3734
.memory_sizes
3835
.iter()
39-
.map(|width| (*width, QueryMap::default()))
36+
.map(|width| (*width, QueryMap::new(*width)))
4037
.collect();
4138
let bytes1_queries = Bytes1Queries::new();
4239
let bytes2_queries = Bytes2Queries::new();
@@ -158,6 +155,41 @@ impl std::fmt::Display for ExecError {
158155

159156
impl std::error::Error for ExecError {}
160157

158+
/// Gated by `IX_AIUR_QUERY_STATS=1`: dump per-function query-map sizes
159+
/// during/after execution so RAM blowups can be attributed to specific
160+
/// Aiur functions (indices resolve to names via the Lean
161+
/// `CompiledToplevel`). Heaviest maps first, by retained G elements.
162+
static QUERY_STATS: std::sync::LazyLock<bool> =
163+
std::sync::LazyLock::new(|| {
164+
std::env::var_os("IX_AIUR_QUERY_STATS").is_some()
165+
});
166+
167+
fn dump_query_stats(record: &QueryRecord, tag: &str) {
168+
let mut rows: Vec<(usize, usize, usize)> = record
169+
.function_queries
170+
.iter()
171+
.enumerate()
172+
.map(|(i, m)| (i, m.len(), m.retained_elems()))
173+
.filter(|(_, n, _)| *n > 0)
174+
.collect();
175+
rows.sort_by(|a, b| b.2.cmp(&a.2));
176+
let total_entries: usize = rows.iter().map(|r| r.1).sum();
177+
let total_elems: usize = rows.iter().map(|r| r.2).sum();
178+
eprintln!(
179+
"[aiur-stats {tag}] function_queries: {total_entries} entries, \
180+
{total_elems} G-elems; top maps:"
181+
);
182+
for (i, n, e) in rows.iter().take(30) {
183+
eprintln!(" fn{i:<4} entries={n:<12} g_elems={e}");
184+
}
185+
let mem: Vec<String> = record
186+
.memory_queries
187+
.iter()
188+
.map(|(w, m)| format!("w{w}={}", m.len()))
189+
.collect();
190+
eprintln!("[aiur-stats {tag}] memory entries: {}", mem.join(" "));
191+
}
192+
161193
impl Toplevel {
162194
pub fn execute(
163195
&self,
@@ -172,6 +204,9 @@ impl Toplevel {
172204
let function = &self.functions[fun_idx];
173205
let output =
174206
function.execute(fun_idx, args, self, &mut record, io_buffer)?;
207+
if *QUERY_STATS {
208+
dump_query_stats(&record, "final");
209+
}
175210
Ok((record, output))
176211
}
177212
}
@@ -213,7 +248,14 @@ impl Function {
213248
}
214249
push_block_exec_entries!(&self.body);
215250
let mut unconstrained = false;
251+
let mut stats_ops: u64 = 0;
216252
while let Some(exec_entry) = exec_entries_stack.pop() {
253+
if *QUERY_STATS {
254+
stats_ops += 1;
255+
if stats_ops.is_multiple_of(1 << 31) {
256+
dump_query_stats(record, &format!("ops={stats_ops}"));
257+
}
258+
}
217259
match exec_entry {
218260
ExecEntry::Op(Op::Const(c)) => map.push(*c),
219261
ExecEntry::Op(Op::Add(a, b)) => {
@@ -236,14 +278,14 @@ impl Function {
236278
map.push(G::from_bool(a == G::ZERO));
237279
},
238280
ExecEntry::Op(Op::Call(callee_idx, args, _, op_unconstrained)) => {
239-
let args = args.iter().map(|i| map[*i]).collect();
281+
let args: Vec<G> = args.iter().map(|i| map[*i]).collect();
240282
if let Some(result) =
241283
record.function_queries[*callee_idx].get_mut(&args)
242284
{
243285
if !unconstrained && !op_unconstrained {
244-
result.multiplicity += G::ONE;
286+
*result.multiplicity += G::ONE;
245287
}
246-
map.extend(result.output.clone());
288+
map.extend_from_slice(result.output);
247289
} else {
248290
let saved_map = std::mem::replace(&mut map, args);
249291
callers_states_stack.push(CallerState {
@@ -266,16 +308,16 @@ impl Function {
266308
.ok_or(ExecError::InvalidMemorySize(size))?;
267309
if let Some(result) = memory_queries.get_mut(&values) {
268310
if !unconstrained {
269-
result.multiplicity += G::ONE;
311+
*result.multiplicity += G::ONE;
270312
}
271-
map.extend(&result.output);
313+
map.extend_from_slice(result.output);
272314
} else {
273315
let ptr = G::from_usize(memory_queries.len());
274-
let result = QueryResult {
275-
output: vec![ptr],
276-
multiplicity: G::from_bool(!unconstrained),
277-
};
278-
memory_queries.insert(values, result);
316+
memory_queries.insert(
317+
&values,
318+
&[ptr],
319+
G::from_bool(!unconstrained),
320+
);
279321
map.push(ptr);
280322
}
281323
},
@@ -289,13 +331,13 @@ impl Function {
289331
let ptr_usize = usize::try_from(ptr_u64)
290332
.ok()
291333
.ok_or(ExecError::PointerTooLarge(ptr_u64))?;
292-
let (args, result) = memory_queries
334+
let (args, multiplicity) = memory_queries
293335
.get_index_mut(ptr_usize)
294336
.ok_or(ExecError::UnboundPointer { ptr: ptr_u64, size: *size })?;
295337
if !unconstrained {
296-
result.multiplicity += G::ONE;
338+
*multiplicity += G::ONE;
297339
}
298-
map.extend(args);
340+
map.extend_from_slice(args);
299341
},
300342
ExecEntry::Op(Op::AssertEq(xs, ys)) => {
301343
if xs.len() != ys.len() {
@@ -542,13 +584,12 @@ impl Function {
542584
ExecEntry::Ctrl(Ctrl::Return(_, output)) => {
543585
// Register the query.
544586
let input_size = toplevel.functions[fun_idx].layout.input_size;
545-
let args = map[..input_size].to_vec();
546587
let output = output.iter().map(|i| map[*i]).collect::<Vec<_>>();
547-
let result = QueryResult {
548-
output: output.clone(),
549-
multiplicity: G::from_bool(!unconstrained),
550-
};
551-
record.function_queries[fun_idx].insert(args, result);
588+
record.function_queries[fun_idx].insert(
589+
&map[..input_size],
590+
&output,
591+
G::from_bool(!unconstrained),
592+
);
552593
if let Some(CallerState {
553594
fun_idx: caller_idx,
554595
map: caller_map,

src/aiur/memory.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@ use multi_stark::{
77
};
88
use rayon::{
99
iter::{
10-
IndexedParallelIterator, IntoParallelRefIterator,
11-
IntoParallelRefMutIterator, ParallelIterator,
10+
IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator,
1211
},
1312
slice::ParallelSliceMut,
1413
};
@@ -69,10 +68,10 @@ impl Memory {
6968

7069
rows_no_padding
7170
.par_chunks_mut(width)
72-
.zip(queries.par_iter())
7371
.zip(lookups_no_padding.par_iter_mut())
7472
.enumerate()
75-
.for_each(|(i, ((row, (values, result)), row_lookups))| {
73+
.for_each(|(i, (row, row_lookups))| {
74+
let (values, result) = queries.get_index(i).expect("index in range");
7675
row[0] = result.multiplicity;
7776
row[1] = G::ONE;
7877
row[2] = G::from_usize(i);

0 commit comments

Comments
 (0)