perf(v2.1): split rv64 load/store by operation and width#2922
perf(v2.1): split rv64 load/store by operation and width#2922shuklaayush wants to merge 14 commits into
Conversation
f32022a to
0c58aa7
Compare
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
100fb7f to
a209580
Compare
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
26202ea to
9c4933c
Compare
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
This comment has been minimized.
Code reviewNo issues found. Checked for bugs and CLAUDE.md compliance. |
| @@ -0,0 +1 @@ | |||
| pub mod core; | |||
There was a problem hiding this comment.
maybe we shouldn't put width_aligned in its own folder if its mod.rs is only one line
| LOAD_SIGN_EXTEND_WORD_SELECTOR_WIDTH, | ||
| >; | ||
| pub type LoadSignExtendWordFiller = LoadSignExtendWidthAlignedFiller< | ||
| crate::adapters::Rv64LoadAdapterFiller, |
There was a problem hiding this comment.
maybe import can be put above
| LOADW => KIND_WORD, | ||
| LOADH => KIND_HALFWORD, | ||
| LOADB => KIND_BYTE, |
There was a problem hiding this comment.
KIND doesn't sound right here
| /// register). Numerically equal to [`RV64_PTR_U16_LIMBS`], but named for arithmetic-word use. | ||
| pub const RV64_WORD_U16_LIMBS: usize = RV64_WORD_NUM_LIMBS / 2; | ||
|
|
||
| pub(crate) const RV64_ACCESS_SIZE_BYTE: usize = 0; |
There was a problem hiding this comment.
I think the naming of this constant doesn't actually represent what it means
| #[inline(always)] | ||
| pub fn memory_read_u16<const N: usize>( | ||
| memory: &GuestMemory, | ||
| address_space: u32, | ||
| ptr: u32, | ||
| ) -> [u16; N] { | ||
| debug_assert!( | ||
| address_space == RV64_REGISTER_AS | ||
| || address_space == RV64_MEMORY_AS | ||
| || address_space == PUBLIC_VALUES_AS, | ||
| ); | ||
|
|
||
| // SAFETY: these address spaces are u16-celled and `ptr` is an AS-native cell pointer. | ||
| unsafe { memory.read::<u16, N>(address_space, ptr) } | ||
| } | ||
|
|
There was a problem hiding this comment.
this helper feels weird to me. I don't think we need a helper for this function
| #[inline(always)] | ||
| pub(crate) fn u16_cell_byte(cell: u16, byte_idx: usize) -> u16 { | ||
| u16::from(cell.to_le_bytes()[byte_idx]) | ||
| } |
There was a problem hiding this comment.
better to not have this function for readability since this function is just one line
| pub(crate) const RV64_BYTE_MASK: u16 = (1 << RV64_BYTE_BITS) - 1; | ||
| pub(crate) const RV64_BYTE_SIGN_BIT: u16 = 1 << (RV64_BYTE_BITS - 1); | ||
| pub(crate) const RV64_U16_SIGN_BIT: u16 = 1 << (U16_BITS - 1); |
There was a problem hiding this comment.
maybe better to not have this constants ? either use another constant if its already there or just inline the definition of this constant. I feel like AI likes to define constants for everything
| } | ||
|
|
||
| impl Rv64StoreAdapterRecord { | ||
| pub(crate) fn effective_ptr(&self) -> u32 { |
There was a problem hiding this comment.
I feel like there has to be a better name for this function.
| /// Wrapper type for u8 so typed VM memory reads and writes can use opcode-specific helpers. | ||
| #[derive(Copy, Clone, Debug, Default)] | ||
| struct U8(u8); |
There was a problem hiding this comment.
hmm what's with the U8 struct here
| pub(crate) fn u16_block_to_f_bytes(block: [u16; BLOCK_FE_WIDTH]) -> [F; 8] { | ||
| rv64_u16_block_to_bytes(block).map(F::from_u8) | ||
| } |
There was a problem hiding this comment.
do we not have a function for this already
| let a = rng.random_range(0..(max_addr - 8)) / 8 * 8; | ||
| let b = rng.random_range(0..(max_addr - 8)) / 8 * 8; |
There was a problem hiding this comment.
use the gen_pointer helper here
| pub(crate) fn b(bytes: [u8; 8]) -> [u16; BLOCK_FE_WIDTH] { | ||
| rv64_bytes_to_u16_block(bytes) | ||
| } |
There was a problem hiding this comment.
I don't think we need a new function for this
| pub mod mul_cuda { | ||
|
|
||
| use super::*; | ||
|
|
There was a problem hiding this comment.
nit: remove the space here ?
| - The instruction is correctly fetched from the program ROM at address `from_pc` and the program counter is set to `from_pc + 4` | ||
| - A memory read from register `rs1` is performed | ||
| - A memory read from register `rs2` is performed | ||
| - A memory write to `mem_as` is performed at address `val(rs1) + imm` |
There was a problem hiding this comment.
maybe specify mem_as is only as 2 or 3 here
| static_assert(sizeof(Rv64LoadAdapterRecord) == 44); | ||
| static_assert(sizeof(LoadRecord) == 8); | ||
| static_assert(sizeof(Rv64LoadRecord) == 52); | ||
| static_assert(offsetof(LoadRecord, read_data) == 0); | ||
| static_assert(offsetof(Rv64LoadRecord, core) == 44); |
There was a problem hiding this comment.
I don't think we have this static assert in the other cuda tracegens, should we remove this for consistency?
| enum Rv64LoadSignExtendOpcode { | ||
| LOADB = 8, | ||
| LOADH = 9, | ||
| LOADW = 10, | ||
| }; |
There was a problem hiding this comment.
seems like a dead code ? same in the load.cuh file about the opcode enum
| enum Rv64StoreOpcode { | ||
| STORED = 4, | ||
| STOREW = 5, | ||
| STOREH = 6, | ||
| STOREB = 7, | ||
| }; |
| } | ||
| }; | ||
|
|
||
| __global__ void rv64_load_byte_tracegen_kernel( |
There was a problem hiding this comment.
I don't think we usually call it _kernel. We usually name it rv64_load_byte_tracegen and _rv64_load_byte_tracegen
| } | ||
| }; | ||
|
|
||
| __global__ void rv64_load_doubleword_tracegen_kernel( |
There was a problem hiding this comment.
same here and all other cuda tracegen files in this PR
| template <typename T> struct LoadByteCoreCols { | ||
| T selector[LOAD_BYTE_SELECTOR_WIDTH]; | ||
| T is_valid; | ||
| T read_cell_bytes[2]; | ||
| T read_data[BLOCK_FE_WIDTH]; | ||
| }; | ||
|
|
||
| template <typename T> struct Rv64LoadByteCols { | ||
| Rv64LoadAdapterCols<T> adapter; | ||
| LoadByteCoreCols<T> core; | ||
| }; |
There was a problem hiding this comment.
should we move the definition of the columns in the load_byte.cuh files ? all the other .cu file doesn't have the definition of the columns. one idea would be to split the header .cuh file for consistency with other chips ?
| mem_config.addr_spaces[RV64_REGISTER_AS as usize].num_cells = 1 << 29; | ||
| mem_config.addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = 1 << 29; |
There was a problem hiding this comment.
why did it change the num_cells here ? and doesn't this exceed the currently allowed num_cells ? it should be at most 1 << 28. I think it should just use VmChipTestBuilder::from_config(MemoryConfig::default()) ?
| LOADW => 2, | ||
| LOADH => 1, | ||
| LOADB => 0, | ||
| _ => unreachable!("signed load test only supports LOADB/LOADH/LOADW"), |
There was a problem hiding this comment.
alignment_bit instead of alignment
| store_adapter_context::<AB, I>( | ||
| cols.is_valid.into(), | ||
| expected_opcode, | ||
| shift_amount, | ||
| cols.read_data, | ||
| cols.prev_data, | ||
| write_data, | ||
| ) |
There was a problem hiding this comment.
wanted to note that this is inconsistent with the behavior of other chips which just returns the AirAdapterContext without using a helper function
| fn access_cells<const KIND: usize>() -> usize { | ||
| match KIND { | ||
| KIND_DOUBLEWORD => 4, | ||
| KIND_WORD => 2, | ||
| KIND_HALFWORD => 1, | ||
| _ => unreachable!("unsupported width-aligned store kind"), | ||
| } | ||
| } |
There was a problem hiding this comment.
the naming of this function is confusing
| fn fill_dummy_trace_row(&self, row_slice: &mut [F]) { | ||
| let (adapter_row, _) = unsafe { | ||
| row_slice | ||
| .split_at_mut_unchecked(<Rv64StoreAdapterFiller as AdapterTraceFiller<F>>::WIDTH) | ||
| }; | ||
| let adapter_row: &mut Rv64StoreAdapterCols<F> = adapter_row.borrow_mut(); | ||
| adapter_row.mem_as = F::from_u32(2); | ||
| } |
There was a problem hiding this comment.
hmmm why did it need to write a custom fill_dummy_trace_row function for store ?
| let d_records = tracing::info_span!("trace_gen.h2d_records") | ||
| .in_scope(|| records.to_device_on(device_ctx)) | ||
| .unwrap(); | ||
| let d_trace = DeviceMatrix::<F>::with_capacity_on(trace_height, trace_width, device_ctx); |
There was a problem hiding this comment.
its unusual to have this tracing in the generate_proving_ctx. should we remove this ?
| pub const WORD_STORE_CASES: usize = 2; | ||
| pub const WORD_STORE_SELECTOR_WIDTH: usize = 1; | ||
|
|
There was a problem hiding this comment.
the constant name isn't good. maybe STOREW_CASES ?
There was a problem hiding this comment.
but I also think the CASES constant name isn't good as well in the comment before
| #[test] | ||
| fn run_storew_sanity_test() { | ||
| let read_data = b([138, 45, 202, 76, 131, 74, 186, 29]); | ||
| let prev_data = b([159, 213, 89, 34, 142, 67, 210, 88]); | ||
| assert_eq!( | ||
| store_write_data(STOREW, read_data, prev_data, 0), | ||
| b([138, 45, 202, 76, 142, 67, 210, 88]) | ||
| ); | ||
| assert_eq!( | ||
| store_write_data(STOREW, read_data, prev_data, 4), | ||
| b([159, 213, 89, 34, 138, 45, 202, 76]) | ||
| ); | ||
| } |
| } | ||
| } | ||
|
|
||
| pub(crate) fn store_kind_for_opcode(opcode: Rv64LoadStoreOpcode) -> usize { |
There was a problem hiding this comment.
this function name is confusing
GunaDD
left a comment
There was a problem hiding this comment.
left comments mostly on the code structure and naming of variables which is confusing. the logic of the chip itself seems correct to me!
Summary
Testing
cargo +nightly fmt --allRUSTC_WRAPPER= cargo test -p openvm-riscv-circuit --no-runOPENVM_SKIP_DEBUG=1 RUSTC_WRAPPER= cargo test -p openvm-riscv-circuit negative_split -- --nocaptureOPENVM_SKIP_DEBUG=1 RUSTC_WRAPPER= cargo test -p openvm-riscv-circuit rand_load -- --nocaptureOPENVM_SKIP_DEBUG=1 RUSTC_WRAPPER= cargo test -p openvm-riscv-circuit rand_store -- --nocaptureOPENVM_SKIP_DEBUG=1 RUSTC_WRAPPER= cargo test -p openvm-riscv-circuit rand_load_sign_extend -- --nocapturereth benchmark comparison
resolve int-7671, int-8104