@@ -2,46 +2,115 @@ include "bc_decomposition.pil";
22
33include "../range_check.pil";
44include "../constants_gen.pil";
5-
6- // Explanations
7- // *****************************************************************************
8- // The role of this subtrace is to fetch an instruction from the bytecode at a
9- // position specified by pc. This subtrace copies the relevant bytecode portion
10- // from subtrace specified in bc_decomposition.pil.
11- // Note that instruction fetching will only be performed for the instructions
12- // which are executed by the execution trace and only once, i.e., even if
13- // the AVM executes several times a given instruction, the current sub-trace
14- // will only produce one row per static instruction.
15- // The main work performed by the current sub-trace consists in transforming
16- // bytes into the operands of the corresponding instruction. This part is
17- // expressed by the relations #[ADDRESSING_MODE_BYTES_DECOMPOSITION] and
18- // #[OPXXX_BYTES_DECOMPOSITION]. They encode each operand based on relevant bytes
19- // of the relative position to PC depending on each opcode. Each wire opcode has
20- // a static specification in precomputed.pil (see WIRE INSTRUCTION SPEC table)
21- // corresponding to the operand decomposition (sel_op_dc_XXX).
22- // All the other relations deal with error handling (except lookups/interactions).
23- // We handle 4 possible different errors and consolidate into a single one: sel_parsing_err.
24- // Only sel_parsing_err is propagated to the execution trace.
25- // List of errors:
26- // 1) pc_out_of_range: pc is out-of-range if pc >= bytecode_size.
27- // - bytecode_size is retrieved from bc_decomposition.pil at pc == 0.
28- // 2) opcode_out_of_range:
29- // - opcode is out-of-range if the byte at pc is not a valid wire opcode.
30- // - this information is retrieved from precomputed.pil.
31- // 3) instr_out_of_range: remaing bytes in bytecode are less than instruction size.
32- // - instruction size (in bytes) is retrieved from precomputed.pil.
33- // - bytes_to_read (remaining bytes in bytecode) is retrieved from bc_decomposition.pil.
34- // 4) tag_out_of_range:
35- // - tag is out-of-range if a tag operand is not a valid memory tag.
36- // - this information is retrieved from precomputed.pil.
37- //
38- // There is a hierarchy among these errors, in the sense that if any of them is encountered
39- // the next ones are irrelevant. If pc is out of range, there is no instruction to consider.
40- // If the opcode is invalid, we cannot consider any instruction. If the instruction is not
41- // complete then there is no point to validate tag operands.
42- // Thus, our witness generation will never toggle more than a single error. We will
43- // enforce this constraint as it will help reducing the degrees of some relations.
44- // Assuming disjoint erros, the consolidated error is simply the sum of the 4 errors.
5+ include "../precomputed.pil";
6+
7+ /**
8+ * This subtrace fetches and validates instructions from bytecode. It is used during execution
9+ * at each active row, as long as the context's bytecode has been successfully retrieved.
10+ * For our purposes an instruction can be thought of as:
11+ * - Opcode (exec_opcode, u8): The operation to perform encoded numerically as defined in our
12+ * instruction specification e.g. the SUB operation is mapped to 1.
13+ * - Operands ([op1, ..., op7], MemoryValue[]): The inputs and/or outputs of the operation. The
14+ * number of operands and their types differs per opcode e.g. FDIV has three; two FF inputs
15+ * and one FF output.
16+ * - Addressing Mode (addressing_mode, u16): The indirect and relative contributions of the operands
17+ * encoded as a u16. Each bit pair of the value represents whether the corresponding operand is
18+ * direct (00), indirect (01), or relative (10). See yarn-project/simulator/docs/avm/addressing.md
19+ * for the encoding breakdown and an example.
20+ *
21+ * The main work of this trace is to derive the decoded operands of an instruction from raw bytecode
22+ * bytes and our opcode specification:
23+ * - addressing_mode: #[ADDRESSING_MODE_BYTES_DECOMPOSITION]
24+ * - [op1, op2, op3, op4, op5, op6, op7]: #[OPXXX_BYTES_DECOMPOSITION]
25+ *
26+ * It copies the relevant bytecode portion from bc_decomposition.pil (#[BYTES_FROM_BC_DEC]):
27+ * bd0 = bytecode[pc], bd1 = bytecode[pc+1], ..., bd36 = bytecode[pc+36],
28+ * where bc_decomposition holds a 37 (= MAX_INSTRUCTION_SIZE) byte sliding window starting at the
29+ * position specified by pc. The first byte (bd0) is the wire opcode and the remaining are raw operand bytes.
30+ *
31+ * The operands' encoding is determined per opcode by the WIRE_INSTRUCTION_SPEC in precomputed.pil.
32+ * This specification provides: exec_opcode, instr_size, operand decomposition selectors (sel_op_dc_0..16)
33+ * and tag metadata (sel_has_tag, sel_tag_is_op2). We use the decomposition selectors to transform the
34+ * fetched bytecode bytes (bd0..36) into the operands (addressing_mode, op1..7)).
35+ *
36+ * The remaining work of this trace uses other specification data to handle errors (see ERROR HANDLING).
37+ *
38+ *
39+ * PRECONDITIONS:
40+ * - The bytecode identified by bytecode_id must already be decomposed in bc_decomposition.pil.
41+ * - The [pc, bytecode_id] pairs present in this trace correspond to an instruction in a successfully
42+ * retrieved bytecode. This is enforced by execution.pil lookups.
43+ *
44+ * USAGE: Note that this subtrace is only designed to be used by execution, where it is accessed by
45+ * two separate lookups. This allows for error handling without having to constrain partial
46+ * instructions on parsing failure.
47+ * To determine whether parsing succeeded (#[INSTRUCTION_FETCHING_RESULT] in execution.pil):
48+ * sel_bytecode_retrieval_success {
49+ * pc, bytecode_id, sel_instruction_fetching_failure
50+ * } in instr_fetching.sel {
51+ * instr_fetching.pc, instr_fetching.bytecode_id, instr_fetching.sel_parsing_err
52+ * };
53+ *
54+ * To retrieve the full decoded instruction, only when parsing succeeded (using
55+ * sel_instruction_fetching_success == !sel_instruction_fetching_failure && sel_bytecode_retrieval_success
56+ * to gate #[INSTRUCTION_FETCHING_BODY] in execution.pil):
57+ * sel_instruction_fetching_success {
58+ * pc, bytecode_id, exec_opcode,
59+ * instr_size, addressing_mode,
60+ * op[0], op[1], op[2], op[3], op[4],
61+ op[5], op[6]
62+ * } in instr_fetching.sel {
63+ * instr_fetching.pc, instr_fetching.bytecode_id, instr_fetching.exec_opcode,
64+ * instr_fetching.instr_size, instr_fetching.addressing_mode,
65+ * instr_fetching.op1, instr_fetching.op2, instr_fetching.op3, instr_fetching.op4,
66+ * instr_fetching.op5, instr_fetching.op6, instr_fetching.op7
67+ * };
68+ *
69+ * ERROR HANDLING:
70+ * The trace detects four (mutually exclusive) parsing errors, consolidated into sel_parsing_err:
71+ * 1. pc_out_of_range: pc >= bytecode_size.
72+ * Determined by #[PC_OUT_OF_RANGE_TOGGLE] where bytecode_size is looked up from bc_decomposition.
73+ * 2. opcode_out_of_range: bd0 does not correspond to a valid wire opcode.
74+ * Determined by the precomputed table entry for bd0 (#[WIRE_INSTRUCTION_INFO]).
75+ * 3. instr_out_of_range: bytes_to_read < instr_size.
76+ * Determined by #[INSTR_OUT_OF_RANGE_TOGGLE] where bytes_to_read is looked up from
77+ * bc_decomposition and instr_size is from the precomputed table entry (#[WIRE_INSTRUCTION_INFO]).
78+ * 4. tag_out_of_range: a tag operand does not correspond to a valid memory tag.
79+ * Determined by the precomputed table entry for the tag operand (op2 or op3) (#[TAG_VALUE_VALIDATION]).
80+ *
81+ * The errors are hierarchical in the above order (see simulation -> deserialize_instruction()); if
82+ * any are encountered, the following are irrelevant. Thus, witness generation sets at most one error flag,
83+ * where disjointness is enforced by the definition of sel_parsing_err. Only sel_parsing_err is propagated to
84+ * execution.pil.
85+ *
86+ * Note that decoded operands are enforced to be 0 when any of errors 1-3 are active (denoted by
87+ * PARSING_ERROR_EXCEPT_TAG_ERROR == 1).
88+ *
89+ * TRACE SHAPE: One row per unique [pc, bytecode_id] pair. Note that simulation deduplicates
90+ * instructions that have already been processed i.e. when the same [pc, bytecode_id] pair is
91+ * retrieved multiple times in the same tx, only one event is emitted.
92+ * See InstructionFetchingEvent and DeduplicatingEventEmitter used in simulate_for_witgen.
93+ *
94+ * INTERACTIONS:
95+ * execution.pil --> instr_fetching.pil --> bc_decomposition.pil <-> bc_hashing.pil
96+ * --> precomputed.pil
97+ * --> range_check.pil
98+ *
99+ * This subtrace is looked up by:
100+ * - execution.pil: To fetch the instruction at [pc, bytecode_id] and detect parse errors
101+ * (#[INSTRUCTION_FETCHING_RESULT] and #[INSTRUCTION_FETCHING_BODY]).
102+ *
103+ * This subtrace looks up:
104+ * - bc_decomposition.pil: To retrieve the bytecode size (#[BYTECODE_SIZE_FROM_BC_DEC]) and the
105+ * encoded instruction bytes for [pc, bytecode_id] (#[BYTES_FROM_BC_DEC]).
106+ * - precomputed.pil: To retrieve the wire instruction information for the opcode (#[WIRE_INSTRUCTION_INFO],
107+ * by opcode byte bd0), to validate the tag operand if it exists
108+ * (#[TAG_VALUE_VALIDATION]), and to enforce that instr_abs_diff is positive via
109+ * a range check (#[INSTR_ABS_DIFF_POSITIVE]).
110+ * - range_check.pil: To to enforce that pc_abs_diff is positve via a range check (#[PC_ABS_DIFF_POSITIVE]).
111+ *
112+ * Note that the latter two range checks are on 8 and 32 bit numbers resp., hence use different traces.
113+ */
45114
46115namespace instr_fetching;
47116
@@ -88,37 +157,34 @@ sel_parsing_err * (1 - sel_parsing_err) = 0; // enforces disjoint errors
88157// Handling pc_out_of_range error
89158// ****************************************************************************
90159
91- // Retrieved from bc_decomposition.pil based on bytecode_id with pc == 0
160+ // Size of the bytecode in bytes (constrained by #[BYTECODE_SIZE_FROM_BC_DEC]).
92161pol commit bytecode_size;
93162
94- // We have to enforce that: pc < bytecode_size <==> pc_out_of_range == 0
95- // We use a specific absolute difference value to distinguish pc < bytecode_size
96- // from pc >= bytecode_size.
97-
98- // pc - bytecode_size if bytecode_size <= pc
163+ // Absolute difference variant where we compute:
164+ // pc - bytecode_size if bytecode_size <= pc (pc_out_of_range == 1)
99165// bytecode_size - pc - 1 if bytecode_size > pc
100166pol commit pc_abs_diff;
167+
168+ // From the following relation, we have: pc_abs_diff >= 0 ==> [pc >= bytecode_size <==> pc_out_of_range == 1]
101169#[PC_OUT_OF_RANGE_TOGGLE]
102170pc_abs_diff = sel * ((2 * pc_out_of_range - 1) * (pc - bytecode_size) - 1 + pc_out_of_range);
103171
104- // TODO: Remove this one once we support constant in lookup tuples
105- // A column with the value 32 at each row.
172+ // Lookup constant support: Can be removed when we support constants in lookups.
106173pol commit pc_size_in_bits;
107174sel * (pc_size_in_bits - constants.AVM_PC_SIZE_IN_BITS) = 0;
108175
109176// pc_abs_diff is 32-bit long (pc is uint32_t)
110- // Use constant AVM_PC_SIZE_IN_BITS once we support constants in lookup tuples.
111177#[PC_ABS_DIFF_POSITIVE]
112178sel { pc_abs_diff, pc_size_in_bits } in range_check.sel { range_check.value, range_check.rng_chk_bits };
113179
114180// ****************************************************************************
115181// Handling instr_out_of_range error
116182// ****************************************************************************
117183
118- // Number of bytes which were read at a given pc. Retrieved from bc_decomposition.pil
184+ // Number of bytes which were read at a given pc (constrained by #[BYTES_FROM_BC_DEC]).
119185pol commit bytes_to_read;
120186
121- // Instruction size in bytes. Copied from precomputed.pil
187+ // Instruction size in bytes (constrained by #[WIRE_INSTRUCTION_INFO]).
122188pol commit instr_size;
123189
124190// Absolute difference variant where we compute:
@@ -138,17 +204,27 @@ sel { instr_abs_diff } in precomputed.sel_range_8 { precomputed.idx };
138204// Handling tag_out_of_range error
139205// ****************************************************************************
140206
141- // Retrieved from precomputed.pil (instruction specification)
142- pol commit sel_has_tag; // @boolean (by lookup only when sel_pc_in_range == 1) - With current instruction specs, tag can appear at op2 (SET_XXX) or op3 (CAST_8, CAST_16)
143- pol commit sel_tag_is_op2; // @boolean (by lookup only when sel_pc_in_range == 1) - (sel_tag_is_op2 == 0 && sel_has_tag == 1) ==> op3 is a tag
207+ // Retrieved from precomputed.pil (instruction specification).
208+ // Whether this instruction has a tag. Constrained by the precomputed table (#[WIRE_INSTRUCTION_INFO])
209+ // to be 1 iff we have a wire opcode (represented by bd0) with a tag as defined in the instruction spec.
210+ pol commit sel_has_tag; // @boolean (by lookup only when sel_pc_in_range == 1)
211+
212+ // Whether the tag exists at op2 (== 1) or op3, if exists (sel_tag_is_op2 == 0 && sel_has_tag == 1 ==> op3 is a tag).
213+ // Constrained, like sel_has_tag, by the precomputed table (#[WIRE_INSTRUCTION_INFO]) against the wire opcode.
214+ pol commit sel_tag_is_op2; // @boolean (by lookup only when sel_pc_in_range == 1)
144215
145- // Value to validate as tag. According to our instruction specifications, there is a maximum of one tag per instruction
146- // and it appears only at op2 or op3.
216+ // Value to validate as tag. According to our current instruction specifications, there is a maximum of one
217+ // tag per instruction and it appears only at op2 (SET_XXX) or op3 (CAST_8, CAST_16).
218+ // Lookup constant support: Can be removed when we support constants in lookups.
147219pol commit tag_value;
220+ // Note that (sel_has_tag - sel_tag_is_op2) cannot underflow (= p - 1) since the precomputed table enforces
221+ // that sel_tag_is_op2 == 1 ==> sel_has_tag == 1 (#[WIRE_INSTRUCTION_INFO]). When this lookup is skipped (sel_pc_in_range == 0)
222+ // we have pc_out_of_range == 1, meaning an error hierarchically higher than tag_out_of_range is activated, and we have
223+ // the same outcome of a sel_parsing_err to process anyway.
148224#[TAG_VALUE]
149225tag_value = (sel_has_tag - sel_tag_is_op2) * op3 + sel_tag_is_op2 * op2;
150226
151- // TODO: Investigate whether enforcing the tag checking in execution trace or in CAST/SET gadgets might be a better option.
227+ // TODO(#AVM-263) : Investigate whether enforcing the tag checking in execution trace or in CAST/SET gadgets might be a better option.
152228#[TAG_VALUE_VALIDATION]
153229sel_has_tag { tag_value, tag_out_of_range } in precomputed.sel_range_8 { precomputed.idx, precomputed.sel_mem_tag_out_of_range };
154230
0 commit comments