@@ -13864,20 +13864,51 @@ pub mod optimize {
1386413864 let current_local_count = base_local_count
1386513865 + caller_locals.iter().map(|(count, _)| count).sum::<u32>();
1386613866
13867- // Step 1: Allocate temporary locals for the callee's parameters
13868- // We need to pop arguments from the stack and store them in locals
13867+ // Step 1: Allocate temporary locals for the callee's parameters.
13868+ // Always reserve `param_count` slots so the callee-local
13869+ // remap offset is stable; a forwarded param leaves its
13870+ // slot unused (dead-locals removes it).
1386913871 let param_start_idx = current_local_count;
1387013872 let param_count = callee.signature.params.len() as u32;
13871-
13872- // Add parameter locals to caller (one local per parameter)
1387313873 for param_type in &callee.signature.params {
1387413874 caller_locals.push((1, *param_type));
1387513875 }
1387613876
13877- // Step 2: Generate instructions to store arguments from stack to locals
13878- // Arguments are on the stack in order: arg0, arg1, ..., argN (top)
13879- // We need to store them in reverse order (argN first, then argN-1, etc.)
13880- for i in (0..param_count).rev() {
13877+ // loom#228 ARG-FORWARDING: if a trailing argument is a bare
13878+ // `local.get K` and the callee never WRITES that parameter,
13879+ // forward K into the inlined body instead of spilling it to
13880+ // a temp and immediately reloading. Sound: K is not rewritten
13881+ // between the get and the (now-inlined) body, and the body
13882+ // never writes K — it doesn't write the param (checked), and
13883+ // callee locals remap to a disjoint range above param_start_idx
13884+ // (K is a pre-existing caller local, so K < param_start_idx).
13885+ // Forwardable args form a top-suffix: we can only pop bare
13886+ // `local.get`s contiguously from the value-stack top. This is
13887+ // what removes the redundant copy in control-flow callers,
13888+ // where simplify_locals' equivalence cleanup bails (#228).
13889+ let callee_writes = callee_param_writes(callee, param_count);
13890+ let mut param_targets: Vec<u32> =
13891+ (0..param_count).map(|i| param_start_idx + i).collect();
13892+ // Walk args from the top (param_count-1) downward.
13893+ let mut spill_count = param_count;
13894+ while spill_count > 0 {
13895+ let p = (spill_count - 1) as usize;
13896+ if callee_writes[p] {
13897+ break; // callee writes this param → must spill
13898+ }
13899+ match result.last() {
13900+ Some(Instruction::LocalGet(k)) => {
13901+ param_targets[p] = *k; // forward the caller local
13902+ result.pop();
13903+ spill_count -= 1;
13904+ }
13905+ _ => break, // non-bare arg → lower args stay on stack
13906+ }
13907+ }
13908+
13909+ // Step 2: Spill the remaining (non-forwarded) args — params
13910+ // 0..spill_count, still on the stack (arg spill_count-1 on top).
13911+ for i in (0..spill_count).rev() {
1388113912 result.push(Instruction::LocalSet(param_start_idx + i));
1388213913 }
1388313914
@@ -13887,13 +13918,12 @@ pub mod optimize {
1388713918 caller_locals.push((*count, *typ));
1388813919 }
1388913920
13890- // Step 4: Clone and remap callee's instructions
13891- // Replace parameter references with our temporary locals
13921+ // Step 4: Clone and remap callee's instructions per param_targets.
1389213922 let inlined_body = remap_locals_in_block(
1389313923 &callee.instructions,
1389413924 callee_locals_start,
1389513925 param_count,
13896- param_start_idx ,
13926+ ¶m_targets ,
1389713927 );
1389813928
1389913929 result.extend(inlined_body);
@@ -13967,31 +13997,68 @@ pub mod optimize {
1396713997 result
1396813998 }
1396913999
13970- /// Remap local indices in inlined code to avoid conflicts
14000+ /// loom#228 — which of the callee's parameters does its body WRITE
14001+ /// (`local.set`/`local.tee`, recursively)? A written parameter cannot be
14002+ /// arg-forwarded: forwarding maps the param to the caller's source local, so
14003+ /// a write would clobber that caller local. Returns a `param_count`-length
14004+ /// vector; index i true ⇒ parameter i is assigned somewhere in the body.
14005+ fn callee_param_writes(func: &super::Function, param_count: u32) -> Vec<bool> {
14006+ fn scan(instrs: &[Instruction], param_count: u32, writes: &mut [bool]) {
14007+ for instr in instrs {
14008+ match instr {
14009+ Instruction::LocalSet(idx) | Instruction::LocalTee(idx)
14010+ if *idx < param_count =>
14011+ {
14012+ writes[*idx as usize] = true;
14013+ }
14014+ Instruction::Block { body, .. } | Instruction::Loop { body, .. } => {
14015+ scan(body, param_count, writes);
14016+ }
14017+ Instruction::If {
14018+ then_body,
14019+ else_body,
14020+ ..
14021+ } => {
14022+ scan(then_body, param_count, writes);
14023+ scan(else_body, param_count, writes);
14024+ }
14025+ _ => {}
14026+ }
14027+ }
14028+ }
14029+ let mut writes = vec![false; param_count as usize];
14030+ scan(&func.instructions, param_count, &mut writes);
14031+ writes
14032+ }
14033+
14034+ /// Remap local indices in inlined code to avoid conflicts.
1397114035 ///
1397214036 /// Parameters:
1397314037 /// - instructions: The callee's instructions to remap
1397414038 /// - offset: The offset for remapping the callee's locals (non-parameter locals)
1397514039 /// - param_count: Number of parameters in the callee
13976- /// - param_start_idx: The starting index in the caller where we stored parameters
14040+ /// - param_targets: Per-parameter destination local in the caller — either a
14041+ /// spill temp, or (loom#228 arg-forwarding) the caller local the argument
14042+ /// was loaded from, so a `local.get K` argument is used directly instead of
14043+ /// spilled to a temp and reloaded.
1397714044 fn remap_locals_in_block(
1397814045 instructions: &[Instruction],
1397914046 offset: u32,
1398014047 param_count: u32,
13981- param_start_idx: u32,
14048+ param_targets: &[ u32] ,
1398214049 ) -> Vec<Instruction> {
1398314050 instructions
1398414051 .iter()
1398514052 .map(|instr| match instr {
13986- // Remap parameter accesses to our temporary parameter locals
14053+ // Remap parameter accesses to their per- parameter target local
1398714054 Instruction::LocalGet(idx) if *idx < param_count => {
13988- Instruction::LocalGet(param_start_idx + idx )
14055+ Instruction::LocalGet(param_targets[*idx as usize] )
1398914056 }
1399014057 Instruction::LocalSet(idx) if *idx < param_count => {
13991- Instruction::LocalSet(param_start_idx + idx )
14058+ Instruction::LocalSet(param_targets[*idx as usize] )
1399214059 }
1399314060 Instruction::LocalTee(idx) if *idx < param_count => {
13994- Instruction::LocalTee(param_start_idx + idx )
14061+ Instruction::LocalTee(param_targets[*idx as usize] )
1399514062 }
1399614063
1399714064 // Remap the callee's local variables (non-parameters)
@@ -14008,12 +14075,12 @@ pub mod optimize {
1400814075 // Recursively remap in control flow
1400914076 Instruction::Block { block_type, body } => Instruction::Block {
1401014077 block_type: block_type.clone(),
14011- body: remap_locals_in_block(body, offset, param_count, param_start_idx ),
14078+ body: remap_locals_in_block(body, offset, param_count, param_targets ),
1401214079 },
1401314080
1401414081 Instruction::Loop { block_type, body } => Instruction::Loop {
1401514082 block_type: block_type.clone(),
14016- body: remap_locals_in_block(body, offset, param_count, param_start_idx ),
14083+ body: remap_locals_in_block(body, offset, param_count, param_targets ),
1401714084 },
1401814085
1401914086 Instruction::If {
@@ -14022,18 +14089,8 @@ pub mod optimize {
1402214089 else_body,
1402314090 } => Instruction::If {
1402414091 block_type: block_type.clone(),
14025- then_body: remap_locals_in_block(
14026- then_body,
14027- offset,
14028- param_count,
14029- param_start_idx,
14030- ),
14031- else_body: remap_locals_in_block(
14032- else_body,
14033- offset,
14034- param_count,
14035- param_start_idx,
14036- ),
14092+ then_body: remap_locals_in_block(then_body, offset, param_count, param_targets),
14093+ else_body: remap_locals_in_block(else_body, offset, param_count, param_targets),
1403714094 },
1403814095
1403914096 // Keep everything else unchanged
@@ -19019,6 +19076,79 @@ mod tests {
1901919076 wasmparser::validate(&wasm_bytes).expect("output validates");
1902019077 }
1902119078
19079+ // Tier 0 (loom#228 secondary): inline arg-forwarding. A bare `local.get K`
19080+ // argument to a callee that does NOT write that param is forwarded into the
19081+ // inlined body — no spill-to-temp + immediate reload. Pre-fix every inlined
19082+ // site emitted `local.set TEMP` for each param; post-fix a forwarded param
19083+ // emits none.
19084+ #[test]
19085+ fn test_inline_arg_forwarding_no_redundant_copy() {
19086+ let wat = r#"(module
19087+ (func $leaf (param i32 i32) (result i32)
19088+ local.get 0 local.get 1 i32.add i32.const 3 i32.shl)
19089+ (func $caller (export "c") (param i32 i32) (result i32)
19090+ local.get 0 local.get 1 call $leaf)
19091+ )"#;
19092+ let mut module = parse::parse_wat(wat).expect("parse");
19093+ optimize::inline_functions(&mut module).expect("inline must not panic");
19094+
19095+ // $caller (function index 1) should now contain the leaf body with both
19096+ // params forwarded to caller locals 0 and 1 — i.e. NO Call and NO LocalSet.
19097+ let caller = &module.functions[1];
19098+ let calls = caller
19099+ .instructions
19100+ .iter()
19101+ .filter(|i| matches!(i, Instruction::Call(_)))
19102+ .count();
19103+ let sets = caller
19104+ .instructions
19105+ .iter()
19106+ .filter(|i| matches!(i, Instruction::LocalSet(_) | Instruction::LocalTee(_)))
19107+ .count();
19108+ assert_eq!(calls, 0, "leaf must be inlined");
19109+ assert_eq!(
19110+ sets, 0,
19111+ "both `local.get` args must be FORWARDED (no spill `local.set`) — \
19112+ pre-fix this was 2"
19113+ );
19114+ let wasm = encode::encode_wasm(&module).expect("encode");
19115+ wasmparser::validate(&wasm).expect("output validates");
19116+ }
19117+
19118+ // Tier 0 SOUNDNESS GUARD: a callee that WRITES its parameter must NOT be
19119+ // arg-forwarded — forwarding would map the param to the caller's source local
19120+ // and the write would clobber it. The param must stay spilled to a temp.
19121+ #[test]
19122+ fn test_inline_arg_forwarding_skips_written_param() {
19123+ // $writer reassigns its parameter (local 0) before reading it.
19124+ let wat = r#"(module
19125+ (func $writer (param i32) (result i32)
19126+ i32.const 99 local.set 0
19127+ local.get 0)
19128+ (func $caller (export "c") (param i32) (result i32)
19129+ local.get 0 call $writer
19130+ local.get 0 i32.add)
19131+ )"#;
19132+ let mut module = parse::parse_wat(wat).expect("parse");
19133+ optimize::inline_functions(&mut module).expect("inline must not panic");
19134+
19135+ let caller = &module.functions[1];
19136+ // The forwarded path would have remapped the writer's `local.set 0` onto
19137+ // caller local 0 (clobbering the arg used in the trailing add). The guard
19138+ // must keep a spill: the inlined `local.set` targets a TEMP (index >= the
19139+ // caller's own local count = 1), never caller local 0.
19140+ let clobbers_caller_local0 = caller
19141+ .instructions
19142+ .iter()
19143+ .any(|i| matches!(i, Instruction::LocalSet(0) | Instruction::LocalTee(0)));
19144+ assert!(
19145+ !clobbers_caller_local0,
19146+ "written param must be spilled to a temp, never forwarded onto caller local 0"
19147+ );
19148+ let wasm = encode::encode_wasm(&module).expect("encode");
19149+ wasmparser::validate(&wasm).expect("output validates");
19150+ }
19151+
1902219152 #[cfg(feature = "verification")]
1902319153 #[test]
1902419154 fn test_inline_verifier_proves_correct_and_rejects_wrong_i64_inline() {
0 commit comments