Skip to content

Commit b5911e4

Browse files
authored
feat: merge-train/avm (#22059)
BEGIN_COMMIT_OVERRIDE fix(avm)!: data copy read padding (#21921) fix(avm)!: sha256 pre-audit (#22001) END_COMMIT_OVERRIDE
2 parents 7d2aa8c + 86e1a8d commit b5911e4

31 files changed

Lines changed: 1629 additions & 1359 deletions

barretenberg/cpp/pil/vm2/data_copy.pil

Lines changed: 68 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -55,30 +55,29 @@ include "gt.pil";
5555
* context (using dst_context_id).
5656
* - M[src_addr]: aka value[0] (the first value read from the src context)
5757
* - the memory tag is ignored for these reads
58-
* - M[src_addr + data_index_upper_bound - 1]: aka value[data_index_upper_bound - 1] (the last value read from the src context)
59-
* - data_index_upper_bound is derived from the copy_size, see pil relations for an explanation
58+
* - M[src_addr + clamped_read_index_upper_bound - 1]: aka value[clamped_read_index_upper_bound - 1] (the last value read from the src context)
59+
* - clamped_read_index_upper_bound is derived from the copy_size, see pil relations for an explanation
6060
* - the memory tag is ignored for these reads
6161
* - M[dst_addr]: aka output[0] (the first value written to the dst context)
6262
* - guaranteed by this gadget to be FF
6363
* - M[dst_addr + copy_size - 1]: aka output[copy_size - 1] (the last value written to the dst context)
6464
* - guaranteed by this gadget to be FF
6565
*
6666
* ERROR HANDLING:
67-
* There is one type of potential errors that is checked: memory out of range accesses
68-
* They are checked simultaneously and are part of the same temporality group.
69-
* - src_out_of_range_err: if the read address is out of range
70-
* - dst_out_of_range_err: if the write address is out of range
71-
* - err: if either of the above errors is set
72-
* If there are no errors, we read and write the calldata/returndata from the parent/child context to the current context
67+
* There is one type of error: dst_out_of_range_err (write address exceeds memory).
68+
* Source out-of-range reads are NOT errors. Instead, reads that would exceed the memory space
69+
* are clamped and the remaining writes are zero-padded. This is consistent with top-level
70+
* calldata copy behavior where reads past the data boundary return 0.
71+
*
7372
* COMPUTING AMOUNT OF DATA TO READ
74-
* We need to ensure that we do not read outside the bounds designated by the parent/child context for their respective data.
75-
* this data_index_upper_bound is computed via min(data_size, copy_size + copy_offset).
73+
* We first compute read_index_upper_bound = min(data_size, copy_size + copy_offset).
74+
* If src_addr + read_index_upper_bound > AVM_MEMORY_SIZE, we clamp to (AVM_MEMORY_SIZE - src_addr).
7675
*
7776
* READING / WRITING DATA
7877
* At each row, the i-th data is simultaneously read from the parent/child and written into the current context
7978
* For top level calldatacopy, the data is retrieved from the calldata column instead of memory.
80-
* The number of reads that are performed is (data_index_upper_bound - copy_offset), while the number of writes is copy_size
81-
* If the data_index_upper_bound < copy_offset, the number of reads is constrained to be 0.
79+
* The number of reads that are performed is (clamped_read_index_upper_bound - copy_offset), while the number of writes is copy_size
80+
* If the clamped_read_index_upper_bound < copy_offset, the number of reads is constrained to be 0.
8281
* If num_reads < copy_size, the remaining (copy_size - num_reads) rows are designated as padding rows.
8382
* padding rows are constrained to have the value = 0.
8483
*
@@ -103,7 +102,7 @@ include "gt.pil";
103102
* src_context_id, dst_context_id,
104103
* copy_size, offset, dst_addr,
105104
* src_addr, src_data_size,
106-
* err
105+
* dst_out_of_range_err
107106
* }
108107
*
109108
* Usage: RD COPY
@@ -120,7 +119,7 @@ include "gt.pil";
120119
* src_context_id, dst_context_id,
121120
* copy_size, offset, dst_addr,
122121
* src_addr, src_data_size,
123-
* err
122+
* dst_out_of_range_err
124123
* };
125124
*
126125
* Inputs:
@@ -134,7 +133,7 @@ include "gt.pil";
134133
* @column src_data_size The size of the data in the parent/child context.
135134
*
136135
* Output:
137-
* @column err The error flag.
136+
* @column dst_out_of_range_err The error flag (only dst out of range is an error).
138137
*
139138
* Multi-rows computation:
140139
* 1) Happy path: number of rows = copy_size
@@ -247,12 +246,12 @@ namespace data_copy;
247246
// Computing the src index upper bound
248247
//////////////////////////////////////
249248
// Computing the read count, i.e. the number of elements that will be read from the src data.
250-
// We compute the data index upper bound using min(offset + copy_size, src_data_size)
251-
// This ensures that we cannot read pass the designated data address assigned by the parent/child
249+
// We first compute read_index_upper_bound = min(offset + copy_size, src_data_size).
250+
// This ensures that we cannot read past the designated data address assigned by the parent/child.
252251
// The min operation is essentially checking the comparison of the following
253252
// 1) (offset + copy_size) > src_data_size or
254253
// 2) (offset + copy_size) <= src_data_size
255-
// if (1) then data_index_upper_bound = src_data_size, otherwise data_index_upper_bound = (offset + copy_size)
254+
// if (1) then read_index_upper_bound = src_data_size, otherwise read_index_upper_bound = (offset + copy_size)
256255
pol commit offset_plus_size;
257256
offset_plus_size = sel_start * (offset + copy_size);
258257
pol commit offset_plus_size_is_gt; // @boolean (by lookup into gt)
@@ -265,41 +264,56 @@ namespace data_copy;
265264
in
266265
gt.sel_others { gt.input_a, gt.input_b, gt.res };
267266

268-
// Set data_index_upper_bound based on the conditions (1) or (2) from above
269-
pol commit data_index_upper_bound;
270-
data_index_upper_bound = sel_start * ((src_data_size - offset_plus_size) * offset_plus_size_is_gt + offset_plus_size);
267+
// Read index upper bound: min(offset + copy_size, src_data_size)
268+
pol READ_INDEX_UPPER_BOUND = (src_data_size - offset_plus_size) * offset_plus_size_is_gt + offset_plus_size;
271269

272270
//////////////////////////////
273-
// Error Handling
271+
// Src Address Range Clamping
274272
//////////////////////////////
275-
// Errors on whether the read or write addresses are out of range in memory. (provided that sel_start == 1).
276-
pol commit src_out_of_range_err; // @boolean (by lookup into gt)
277-
pol commit dst_out_of_range_err; // @boolean (by lookup into gt)
273+
// If the src read addresses exceed the memory space (src_addr + read_index_upper_bound > AVM_MEMORY_SIZE),
274+
// we clamp its value so that reads stay within memory bounds (`clamped_read_index_upper_bound`).
275+
// Out-of-range reads return 0 (padding). This is NOT an error — it is consistent with top-level
276+
// calldata copy behavior where reads past the data boundary return 0.
278277

279278
// AVM_MEMORY_SIZE == AVM_HIGHEST_MEM_ADDRESS + 1.
280-
pol commit mem_size; // todo: While we do not support constants
279+
pol commit mem_size; // Lookup constant support: We need this temporarily while we do not allow for aliases in the lookup tuple
281280
sel_start * (mem_size - constants.AVM_MEMORY_SIZE) = 0;
282281

283-
// To check that the read and write addresses are within range, we compare the upper bound
284-
// of the read/write addresses to the memory size. Working with upper bounds is easier than
285-
// working with the highest read/write address as we avoid an underflow when src_addr/dst_addr
286-
// and copy_size/data_index_upper_bound are both zero.
287-
// Example: src_addr = AVM_HIGHEST_MEM_ADDRESS, data_index_upper_bound = 1 is perfectly valid
288-
// and read_addr_upper_bound = AVM_HIGHEST_MEM_ADDRESS + 1 <= AVM_MEMORY_SIZE.
289-
// An out-of-range error is raised when read_addr_upper_bound/write_addr_upper_bound > AVM_MEMORY_SIZE.
282+
pol commit src_reads_exceed_mem; // @boolean (by lookup into gt) — clamping flag
290283

291-
// Note that for a top-level call, src_addr == 0 (enforced in context.pil (#[CD_OFFSET_ENQUEUED_CALL_IS_ZERO]))
292-
// and src_out_of_range_err == 0.
293-
pol commit read_addr_upper_bound;
294-
read_addr_upper_bound = sel_start * (src_addr + data_index_upper_bound);
284+
pol commit read_addr_upper_bound; // the upper bound of the address that is accessed
285+
read_addr_upper_bound = sel_start * (src_addr + READ_INDEX_UPPER_BOUND);
295286
// Preconditions to `gt` gadget require both inputs to be bounded by 2^128.
296-
// `read_addr_upper_bound` = src_addr + data_index_upper_bound (former is U32, latter is < 2^33, so < 2^34).
287+
// `read_addr_upper_bound` = src_addr + read_index_upper_bound (former is U32, latter is < 2^33, so < 2^34).
297288
// `mem_size` = AVM_MEMORY_SIZE = 2^32.
289+
// Note that for a top-level call, src_addr == 0 (enforced in context.pil (#[CD_OFFSET_ENQUEUED_CALL_IS_ZERO]))
290+
// and src_reads_exceed_mem == 0.
298291
#[CHECK_SRC_ADDR_IN_RANGE]
299-
sel_start { read_addr_upper_bound, mem_size, src_out_of_range_err }
292+
sel_start { read_addr_upper_bound, mem_size, src_reads_exceed_mem }
300293
in
301294
gt.sel_others { gt.input_a, gt.input_b, gt.res };
302295

296+
// Clamp clamped_read_index_upper_bound at (mem_size - src_addr) when src reads exceed memory.
297+
// When src_reads_exceed_mem == 0: clamped_read_index_upper_bound = read_index_upper_bound
298+
// When src_reads_exceed_mem == 1: clamped_read_index_upper_bound = mem_size - src_addr
299+
// (mem_size - src_addr >= 1 since src_addr is U32 < AVM_MEMORY_SIZE)
300+
//
301+
// This is a data index (not a memory address). Valid range: [0, min(src_data_size, mem_size - src_addr)].
302+
// - Lower bound: 0 when copy_size == 0 or src_data_size == 0.
303+
// - Upper bound without clamping: min(offset + copy_size, src_data_size) <= src_data_size (U32).
304+
// - Upper bound with clamping: min(read_index_upper_bound, mem_size - src_addr) <= mem_size - src_addr <= mem_size (U32).
305+
// Used to compute reads_left = max(0, clamped - offset), which determines how many rows
306+
// are real memory/column reads vs. zero-padded writes.
307+
pol commit clamped_read_index_upper_bound;
308+
clamped_read_index_upper_bound = sel_start * (READ_INDEX_UPPER_BOUND * (1 - src_reads_exceed_mem) + (mem_size - src_addr) * src_reads_exceed_mem);
309+
310+
//////////////////////////////
311+
// Error Handling
312+
//////////////////////////////
313+
// Only dst address out of range is an error. Src out of range is handled by clamping reads (see above).
314+
// Only constrained on start row
315+
pol commit dst_out_of_range_err; // @boolean (by lookup into gt)
316+
303317
pol commit write_addr_upper_bound;
304318
write_addr_upper_bound = sel_start * (dst_addr + copy_size);
305319
// Preconditions to `gt` gadget require both inputs to be bounded by 2^128.
@@ -310,16 +324,11 @@ namespace data_copy;
310324
in
311325
gt.sel_others { gt.input_a, gt.input_b, gt.res };
312326

313-
// Consolidate the errors
314-
// Underconstrained for non-starting rows (sel_start = 0).
315-
pol commit err; // @boolean (by definition)
316-
err = 1 - (1 - dst_out_of_range_err) * (1 - src_out_of_range_err);
317-
318327
//////////////////////////////
319328
// Control flow management
320329
//////////////////////////////
321330
pol commit sel_start_no_err; // @boolean (by definition)
322-
sel_start_no_err = sel_start * (1 - err);
331+
sel_start_no_err = sel_start * (1 - dst_out_of_range_err);
323332

324333
pol commit sel_write_count_is_zero; // @boolean
325334
sel_write_count_is_zero * (1 - sel_write_count_is_zero) = 0;
@@ -339,29 +348,29 @@ namespace data_copy;
339348
SEL_PERFORM_COPY * (WRITE_COUNT_MINUS_ONE * (sel_end * (1 - write_count_minus_one_inv) + write_count_minus_one_inv) - 1 + sel_end) = 0;
340349

341350
#[END_ON_ERR] // sel_end = 1 if error
342-
sel_start * err * (sel_end - 1) = 0;
351+
sel_start * dst_out_of_range_err * (sel_end - 1) = 0;
343352

344353
pol commit reads_left; // Number of reads of the src data, if reads_left = 0 but copy_size != 0 then it is a padding row
345-
// src data elements are read from indices [offset, data_index_upper_bound), therefore reads_left = data_index_upper_bound - offset
346-
// We need to be careful that data_index_upper_bound - offset does not underflow (i.e. when offset > data_index_upper_bound, reads_left = 0)
354+
// src data elements are read from indices [offset, clamped_read_index_upper_bound), therefore reads_left = clamped_read_index_upper_bound - offset
355+
// We need to be careful that clamped_read_index_upper_bound - offset does not underflow (i.e. when offset > clamped_read_index_upper_bound, reads_left = 0)
347356
// We test that condition here
348-
pol commit data_index_upper_bound_gt_offset; // @boolean (by lookup into gt)
357+
pol commit sel_has_reads; // @boolean (by lookup into gt)
349358
// Preconditions to `gt` gadget require both inputs to be bounded by 2^128.
350-
// `data_index_upper_bound` is at most `src_data_size` which is U32. `offset` is U32.
351-
#[DATA_INDEX_UPPER_BOUND_GT_OFFSET]
352-
sel_start_no_err { data_index_upper_bound, offset, data_index_upper_bound_gt_offset }
359+
// `clamped_read_index_upper_bound` is at most `src_data_size` which is U32. `offset` is U32.
360+
#[SEL_HAS_READS]
361+
sel_start_no_err { clamped_read_index_upper_bound, offset, sel_has_reads }
353362
in
354363
gt.sel_others { gt.input_a, gt.input_b, gt.res };
355364

356-
// If data_index_upper_bound_gt_offset = 0 (i.e. when offset >= data_index_upper_bound), reads_left = 0
357-
// otherwise, reads_left = data_index_upper_bound - offset
365+
// If sel_has_reads = 0 (i.e. when offset >= clamped_read_index_upper_bound), reads_left = 0
366+
// otherwise, reads_left = clamped_read_index_upper_bound - offset
358367
#[INIT_READS_LEFT]
359-
sel_start_no_err * (1 - sel_write_count_is_zero) * (reads_left - (data_index_upper_bound - offset) * data_index_upper_bound_gt_offset) = 0;
368+
sel_start_no_err * (1 - sel_write_count_is_zero) * (reads_left - (clamped_read_index_upper_bound - offset) * sel_has_reads) = 0;
360369

361370
//////////////////////////////
362371
// Execute Data Copy
363372
//////////////////////////////
364-
// Most of these relations are either gated explicitly by err, end, or LATCH_CONDITION.
373+
// Most of these relations are either gated explicitly by dst_out_of_range_err, end, or LATCH_CONDITION.
365374
// ===== Writing to dst_context_id =====
366375
pol commit sel_mem_write; // @boolean (by definition, see SEL_PERFORM_COPY)
367376
sel_mem_write = SEL_PERFORM_COPY; // We write if there is no error and copy_size != 0
@@ -402,7 +411,8 @@ namespace data_copy;
402411
sel * (1 - padding) * (1 - sel_end) * (reads_left' - reads_left + 1) = 0;
403412
pol commit padding; // @boolean If we write, padding = 1 iff reads_left = 0
404413
padding * (1 - padding) = 0;
405-
pol commit reads_left_inv;
414+
pol commit reads_left_inv; //@zero-check
415+
// padding = 1 iff reads_left = 0
406416
#[PADDING_CONDITION]
407417
SEL_PERFORM_COPY * (reads_left * (padding * (1 - reads_left_inv) + reads_left_inv) - 1 + padding) = 0;
408418

@@ -443,7 +453,7 @@ namespace data_copy;
443453
#[CD_COPY_COLUMN]
444454
cd_copy_col_read = SEL_PERFORM_COPY * (1 - padding) * is_top_level * sel_cd_copy;
445455

446-
// The calldata trace starts at index = 1 (TODO: We need this temporarily while we dont allow for aliases in the lookup tuple):
456+
// Lookup constant support: The calldata trace starts at index = 1. We need this temporarily while we do not allow for aliases in the lookup tuple.
447457
pol commit read_addr_plus_one;
448458
read_addr_plus_one = cd_copy_col_read * (read_addr + 1);
449459

barretenberg/cpp/pil/vm2/execution.pil

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -952,7 +952,7 @@ sel_exec_dispatch_calldata_copy {
952952
data_copy.src_context_id, data_copy.dst_context_id,
953953
data_copy.copy_size, data_copy.offset, data_copy.dst_addr,
954954
data_copy.src_addr, data_copy.src_data_size,
955-
data_copy.err
955+
data_copy.dst_out_of_range_err
956956
};
957957

958958
#[DISPATCH_TO_RD_COPY]
@@ -967,7 +967,7 @@ sel_exec_dispatch_returndata_copy {
967967
data_copy.src_context_id, data_copy.dst_context_id,
968968
data_copy.copy_size, data_copy.offset, data_copy.dst_addr,
969969
data_copy.src_addr, data_copy.src_data_size,
970-
data_copy.err
970+
data_copy.dst_out_of_range_err
971971
};
972972

973973
// SET DISPATCHING

0 commit comments

Comments
 (0)