Skip to content

Commit 062ca89

Browse files
committed
cranelift/pulley: pass first 4 indirect-call args via call_indirectN
Extend `Inst::IndirectCall`'s `info.dest` from `XReg` to `PulleyCallIndirect { target, args: SmallVec<[XReg; 4]> }`, parallel to `PulleyCall`. `gen_call_ind_info` pulls the first 0–4 integer args from `uses` (where they were going through regalloc's `reg_fixed_use`, synthesising an `xmov` each) into `args`, where they flow as free reg uses and the emitted `call_indirect{1,2,3,4}` opcode moves them at call time. The emit side picks the narrowest op after the same "drop args already in their ABI register" loop used by direct calls. Phase-3's `xband_funcref_dispatch_*` writing `dst_vmctx` into a free register + `call_indirect1 dst_code, dst_vmctx` is the headline shrink (one fewer Pulley dispatch per call_indirect on the eager-table fast path). Filetest snapshots updated for the new `dest` shape.
1 parent 6d13a9c commit 062ca89

10 files changed

Lines changed: 85 additions & 16 deletions

File tree

cranelift/codegen/src/isa/pulley_shared/inst/args.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,26 @@ pub struct PulleyCall {
577577
pub args: SmallVec<[XReg; 4]>,
578578
}
579579

580+
/// Payload of `CallInfo` for indirect-call instructions.
581+
///
582+
/// Mirror of `PulleyCall` for `Inst::IndirectCall`: the call target is a
583+
/// runtime register (the loaded `wasm_call` pointer at the call_indirect
584+
/// dispatch tail), and the first 0–4 integer ABI args are passed as free
585+
/// registers so the `call_indirect1/2/3/4` opcodes can move them into
586+
/// `x0..x3` as part of the call (saving one `xmov` per arg on the hot
587+
/// dispatch path). Remaining args live in `CallInfo::uses` with fixed
588+
/// pregs, just as for `PulleyCall`.
589+
#[derive(Clone, Debug)]
590+
pub struct PulleyCallIndirect {
591+
/// The register holding the call target (e.g. the `wasm_call` pointer
592+
/// loaded out of a `VMFuncRef`).
593+
pub target: XReg,
594+
/// Up to 4 integer args destined for `x0..x3`. Tracked separately so
595+
/// regalloc doesn't insert moves and the `call_indirectN` opcode moves
596+
/// them itself.
597+
pub args: SmallVec<[XReg; 4]>,
598+
}
599+
580600
pub use super::super::lower::isle::generated_code::AddrO32;
581601

582602
impl Copy for AddrO32 {}

cranelift/codegen/src/isa/pulley_shared/inst/emit.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,23 @@ fn pulley_emit<P>(
233233
}
234234

235235
Inst::IndirectCall { info } => {
236-
enc::call_indirect(sink, info.dest);
236+
// If x0..xN args are already in their correct ABI register
237+
// (because regalloc allocated the producer's vreg there), drop
238+
// them off the end so we can use a narrower `call_indirectN`
239+
// op — mirror of the direct-call shrink loop above.
240+
let target = info.dest.target;
241+
let mut args = &info.dest.args[..];
242+
while !args.is_empty() && args.last().copied() == XReg::new(x_reg(args.len() - 1)) {
243+
args = &args[..args.len() - 1];
244+
}
245+
match args {
246+
[] => enc::call_indirect(sink, target),
247+
[x0] => enc::call_indirect1(sink, target, *x0),
248+
[x0, x1] => enc::call_indirect2(sink, target, *x0, *x1),
249+
[x0, x1, x2] => enc::call_indirect3(sink, target, *x0, *x1, *x2),
250+
[x0, x1, x2, x3] => enc::call_indirect4(sink, target, *x0, *x1, *x2, *x3),
251+
_ => unreachable!(),
252+
}
237253

238254
if let Some(s) = state.take_stack_map() {
239255
let offset = sink.cur_offset();

cranelift/codegen/src/isa/pulley_shared/inst/mod.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,14 +206,25 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
206206
}
207207
}
208208
Inst::IndirectCall { info } => {
209-
collector.reg_use(&mut info.dest);
210209
let CallInfo {
211210
uses,
212211
defs,
212+
dest,
213213
try_call_info,
214214
clobbers,
215215
..
216216
} = &mut **info;
217+
218+
// Phase-4: the target and the first up-to-4 integer args live
219+
// in `dest` and are passed as free reg uses; the emitted
220+
// `call_indirect{1,2,3,4}` op moves the args into x0..x3 at
221+
// call time. Remaining args still flow through `uses` with
222+
// fixed pregs as before.
223+
let PulleyCallIndirect { target, args } = dest;
224+
collector.reg_use(target);
225+
for arg in args {
226+
collector.reg_use(arg);
227+
}
217228
for CallArgPair { vreg, preg } in uses {
218229
collector.reg_fixed_use(vreg, *preg);
219230
}
@@ -770,7 +781,7 @@ impl Inst {
770781
}
771782

772783
Inst::IndirectCall { info } => {
773-
let callee = format_reg(*info.dest);
784+
let callee = format_reg(*info.dest.target);
774785
let try_call = info
775786
.try_call_info
776787
.as_ref()

cranelift/codegen/src/isa/pulley_shared/lower/isle.rs

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ use crate::ir::{condcodes::*, immediates::*, types::*, *};
1010
use crate::isa::CallConv;
1111
use crate::isa::pulley_shared::{
1212
inst::{
13-
FReg, OperandSize, PulleyCall, ReturnCallInfo, VReg, WritableFReg, WritableVReg,
14-
WritableXReg, XReg,
13+
FReg, OperandSize, PulleyCall, PulleyCallIndirect, ReturnCallInfo, VReg, WritableFReg,
14+
WritableVReg, WritableXReg, XReg,
1515
},
1616
lower::{Cond, regs},
1717
*,
@@ -30,7 +30,7 @@ type Unit = ();
3030
type VecArgPair = Vec<ArgPair>;
3131
type VecRetPair = Vec<RetPair>;
3232
type BoxCallInfo = Box<CallInfo<PulleyCall>>;
33-
type BoxCallIndInfo = Box<CallInfo<XReg>>;
33+
type BoxCallIndInfo = Box<CallInfo<PulleyCallIndirect>>;
3434
type BoxCallIndirectHostInfo = Box<CallInfo<ExternalName>>;
3535
type BoxReturnCallInfo = Box<ReturnCallInfo<ExternalName>>;
3636
type BoxReturnCallIndInfo = Box<ReturnCallInfo<XReg>>;
@@ -124,7 +124,7 @@ where
124124
&mut self,
125125
sig: Sig,
126126
dest: Reg,
127-
uses: CallArgList,
127+
mut uses: CallArgList,
128128
defs: CallRetList,
129129
try_call_info: Option<TryCallInfo>,
130130
) -> BoxCallIndInfo {
@@ -133,8 +133,30 @@ where
133133
self.lower_ctx
134134
.abi_mut()
135135
.accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
136+
let call_conv = self.lower_ctx.sigs()[sig].call_conv();
136137

137-
let dest = XReg::new(dest).unwrap();
138+
// Mirror of `gen_call_info`: take out the first four integer
139+
// arguments (x0..x3) and pass them through the `args` list so the
140+
// emitted `call_indirect{1,2,3,4}` op can move them at call time.
141+
// Saves one Pulley dispatch per moved arg vs the previous "regalloc
142+
// emits xmov; then `call_indirect`" sequence.
143+
let mut args = SmallVec::new();
144+
uses.sort_by_key(|arg| arg.preg);
145+
if call_conv != CallConv::PreserveAll {
146+
uses.retain(|arg| {
147+
if arg.preg != regs::x0()
148+
&& arg.preg != regs::x1()
149+
&& arg.preg != regs::x2()
150+
&& arg.preg != regs::x3()
151+
{
152+
return true;
153+
}
154+
args.push(XReg::new(arg.vreg).unwrap());
155+
false
156+
});
157+
}
158+
let target = XReg::new(dest).unwrap();
159+
let dest = PulleyCallIndirect { target, args };
138160
Box::new(
139161
self.lower_ctx
140162
.gen_call_info(sig, dest, uses, defs, try_call_info, false),

cranelift/filetests/filetests/isa/pulley32/call.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ block0(v0: i32):
291291
; VCode:
292292
; push_frame
293293
; block0:
294-
; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }], clobbers: PRegSet { bits: [65534, 4294967295, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0, try_call_info: None, patchable: false }
294+
; indirect_call x0, CallInfo { dest: PulleyCallIndirect { target: XReg(p0i), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }], clobbers: PRegSet { bits: [65534, 4294967295, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0, try_call_info: None, patchable: false }
295295
; pop_frame
296296
; ret
297297
;

cranelift/filetests/filetests/isa/pulley32/exceptions.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ function %f2(i32, i32) -> i32, f32, f64 {
7575
; block0:
7676
; fconst64 f1, 4607182418800017408
7777
; fstore64 Slot(0), f1 // flags = notrap aligned
78-
; indirect_call x1, CallInfo { dest: XReg(p1i), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0f }, location: Reg(p0f, types::F32) }, CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I32) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i, types::I32) }], clobbers: PRegSet { bits: [4294967292, 4294967294, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0, try_call_info: Some(TryCallInfo { continuation: MachLabel(1), exception_handlers: [Default(MachLabel(2))] }), patchable: false }; jump MachLabel(1); catch [default: MachLabel(2)]
78+
; indirect_call x1, CallInfo { dest: PulleyCallIndirect { target: XReg(p1i), args: [XReg(p0i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0f }, location: Reg(p0f, types::F32) }, CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I32) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i, types::I32) }], clobbers: PRegSet { bits: [4294967292, 4294967294, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0, try_call_info: Some(TryCallInfo { continuation: MachLabel(1), exception_handlers: [Default(MachLabel(2))] }), patchable: false }; jump MachLabel(1); catch [default: MachLabel(2)]
7979
; block1:
8080
; xone x0
8181
; f1 = fload64 Slot(0) // flags = notrap aligned

cranelift/filetests/filetests/isa/pulley32/preserve-all.clif

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ block0(v0: i64):
1515
; xmov x3, x0
1616
; xmov x1, x3
1717
; xmov x2, x3
18-
; indirect_call x3, CallInfo { dest: XReg(p3i), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [0, 0, 0, 0] }, callee_conv: PreserveAll, caller_conv: SystemV, callee_pop_size: 0, try_call_info: None, patchable: false }
19-
; indirect_call x3, CallInfo { dest: XReg(p3i), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [0, 0, 0, 0] }, callee_conv: PreserveAll, caller_conv: SystemV, callee_pop_size: 0, try_call_info: None, patchable: false }
18+
; indirect_call x3, CallInfo { dest: PulleyCallIndirect { target: XReg(p3i), args: [] }, uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [0, 0, 0, 0] }, callee_conv: PreserveAll, caller_conv: SystemV, callee_pop_size: 0, try_call_info: None, patchable: false }
19+
; indirect_call x3, CallInfo { dest: PulleyCallIndirect { target: XReg(p3i), args: [] }, uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [0, 0, 0, 0] }, callee_conv: PreserveAll, caller_conv: SystemV, callee_pop_size: 0, try_call_info: None, patchable: false }
2020
; pop_frame
2121
; ret
2222
;

cranelift/filetests/filetests/isa/pulley64/call.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ block0(v0: i64):
291291
; VCode:
292292
; push_frame
293293
; block0:
294-
; indirect_call x0, CallInfo { dest: XReg(p0i), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }], clobbers: PRegSet { bits: [65534, 4294967295, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0, try_call_info: None, patchable: false }
294+
; indirect_call x0, CallInfo { dest: PulleyCallIndirect { target: XReg(p0i), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }], clobbers: PRegSet { bits: [65534, 4294967295, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0, try_call_info: None, patchable: false }
295295
; pop_frame
296296
; ret
297297
;

cranelift/filetests/filetests/isa/pulley64/exceptions.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ function %f2(i32, i64) -> i32, f32, f64 {
7777
; block0:
7878
; fconst64 f1, 4607182418800017408
7979
; fstore64 Slot(0), f1 // flags = notrap aligned
80-
; indirect_call x1, CallInfo { dest: XReg(p1i), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0f }, location: Reg(p0f, types::F32) }, CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i, types::I64) }], clobbers: PRegSet { bits: [4294967292, 4294967294, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0, try_call_info: Some(TryCallInfo { continuation: MachLabel(1), exception_handlers: [Default(MachLabel(2))] }), patchable: false }; jump MachLabel(1); catch [default: MachLabel(2)]
80+
; indirect_call x1, CallInfo { dest: PulleyCallIndirect { target: XReg(p1i), args: [XReg(p0i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0f }, location: Reg(p0f, types::F32) }, CallRetPair { vreg: Writable { reg: p0i }, location: Reg(p0i, types::I64) }, CallRetPair { vreg: Writable { reg: p1i }, location: Reg(p1i, types::I64) }], clobbers: PRegSet { bits: [4294967292, 4294967294, 4294967295, 0] }, callee_conv: Tail, caller_conv: Fast, callee_pop_size: 0, try_call_info: Some(TryCallInfo { continuation: MachLabel(1), exception_handlers: [Default(MachLabel(2))] }), patchable: false }; jump MachLabel(1); catch [default: MachLabel(2)]
8181
; block1:
8282
; xone x0
8383
; f1 = fload64 Slot(0) // flags = notrap aligned

cranelift/filetests/filetests/isa/pulley64/preserve-all.clif

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ block0(v0: i64):
1515
; xmov x3, x0
1616
; xmov x1, x3
1717
; xmov x2, x3
18-
; indirect_call x3, CallInfo { dest: XReg(p3i), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [0, 0, 0, 0] }, callee_conv: PreserveAll, caller_conv: SystemV, callee_pop_size: 0, try_call_info: None, patchable: false }
19-
; indirect_call x3, CallInfo { dest: XReg(p3i), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [0, 0, 0, 0] }, callee_conv: PreserveAll, caller_conv: SystemV, callee_pop_size: 0, try_call_info: None, patchable: false }
18+
; indirect_call x3, CallInfo { dest: PulleyCallIndirect { target: XReg(p3i), args: [] }, uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [0, 0, 0, 0] }, callee_conv: PreserveAll, caller_conv: SystemV, callee_pop_size: 0, try_call_info: None, patchable: false }
19+
; indirect_call x3, CallInfo { dest: PulleyCallIndirect { target: XReg(p3i), args: [] }, uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [0, 0, 0, 0] }, callee_conv: PreserveAll, caller_conv: SystemV, callee_pop_size: 0, try_call_info: None, patchable: false }
2020
; pop_frame
2121
; ret
2222
;

0 commit comments

Comments
 (0)