Skip to content

Commit 6d13a9c

Browse files
committed
pulley: add call_indirect{1,2,3,4} fused indirect-call ops
Mirror of the direct-call `call{1,2,3,4}` family: each new op combines `xmov xN, argN` ABI fixups with the indirect call. Reads arg values before writing the ABI registers so the sequence is safe when an argN aliases the corresponding ABI register. `call_indirect1 dst, arg1`: x0 = state[arg1] lr = pc pc = state[dst] Saves up to N Pulley dispatches per call_indirect site (one per moved arg). In practice at least one — the callee vmctx ABI fixup. Cranelift wiring in the next commit.
1 parent c569ec1 commit 6d13a9c

2 files changed

Lines changed: 89 additions & 0 deletions

File tree

pulley/src/interp.rs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,6 +1425,84 @@ impl OpVisitor for Interpreter<'_> {
14251425
ControlFlow::Continue(())
14261426
}
14271427

1428+
fn call_indirect1(&mut self, dst: XReg, arg1: XReg) -> ControlFlow<Done> {
1429+
// Phase-4 fusion: combines `xmov x0, arg1` with `call_indirect dst`.
1430+
// Read arg1 BEFORE writing x0 so this is safe even when `arg1 == x0`.
1431+
let arg1_val = self.state[arg1];
1432+
let target = self.state[dst].get_ptr();
1433+
let return_addr = self.pc.as_ptr();
1434+
self.state.lr = return_addr.as_ptr();
1435+
self.state[XReg::x0] = arg1_val;
1436+
// SAFETY: same as `call_indirect`.
1437+
unsafe {
1438+
self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(target));
1439+
}
1440+
ControlFlow::Continue(())
1441+
}
1442+
1443+
fn call_indirect2(&mut self, dst: XReg, arg1: XReg, arg2: XReg) -> ControlFlow<Done> {
1444+
let (a1, a2) = (self.state[arg1], self.state[arg2]);
1445+
let target = self.state[dst].get_ptr();
1446+
let return_addr = self.pc.as_ptr();
1447+
self.state.lr = return_addr.as_ptr();
1448+
self.state[XReg::x0] = a1;
1449+
self.state[XReg::x1] = a2;
1450+
// SAFETY: same as `call_indirect`.
1451+
unsafe {
1452+
self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(target));
1453+
}
1454+
ControlFlow::Continue(())
1455+
}
1456+
1457+
fn call_indirect3(
1458+
&mut self,
1459+
dst: XReg,
1460+
arg1: XReg,
1461+
arg2: XReg,
1462+
arg3: XReg,
1463+
) -> ControlFlow<Done> {
1464+
let (a1, a2, a3) = (self.state[arg1], self.state[arg2], self.state[arg3]);
1465+
let target = self.state[dst].get_ptr();
1466+
let return_addr = self.pc.as_ptr();
1467+
self.state.lr = return_addr.as_ptr();
1468+
self.state[XReg::x0] = a1;
1469+
self.state[XReg::x1] = a2;
1470+
self.state[XReg::x2] = a3;
1471+
// SAFETY: same as `call_indirect`.
1472+
unsafe {
1473+
self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(target));
1474+
}
1475+
ControlFlow::Continue(())
1476+
}
1477+
1478+
fn call_indirect4(
1479+
&mut self,
1480+
dst: XReg,
1481+
arg1: XReg,
1482+
arg2: XReg,
1483+
arg3: XReg,
1484+
arg4: XReg,
1485+
) -> ControlFlow<Done> {
1486+
let (a1, a2, a3, a4) = (
1487+
self.state[arg1],
1488+
self.state[arg2],
1489+
self.state[arg3],
1490+
self.state[arg4],
1491+
);
1492+
let target = self.state[dst].get_ptr();
1493+
let return_addr = self.pc.as_ptr();
1494+
self.state.lr = return_addr.as_ptr();
1495+
self.state[XReg::x0] = a1;
1496+
self.state[XReg::x1] = a2;
1497+
self.state[XReg::x2] = a3;
1498+
self.state[XReg::x3] = a4;
1499+
// SAFETY: same as `call_indirect`.
1500+
unsafe {
1501+
self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(target));
1502+
}
1503+
ControlFlow::Continue(())
1504+
}
1505+
14281506
fn jump(&mut self, offset: PcRelOffset) -> ControlFlow<Done> {
14291507
self.pc_rel_jump::<crate::Jump>(offset)
14301508
}

pulley/src/lib.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,17 @@ macro_rules! for_each_op {
115115
/// Transfer control to the PC in `reg` and set `lr` to the PC just
116116
/// after this instruction.
117117
call_indirect = CallIndirect { reg: XReg };
118+
/// Like `call_indirect`, but also `x0 = arg1`. Saves one Pulley
119+
/// dispatch vs `xmov x0, arg1; call_indirect reg` for the common
120+
/// call_indirect pattern where one ABI register (usually `vmctx`)
121+
/// is set up immediately before the indirect call.
122+
call_indirect1 = CallIndirect1 { reg: XReg, arg1: XReg };
123+
/// Like `call_indirect`, but also `x0, x1 = arg1, arg2`.
124+
call_indirect2 = CallIndirect2 { reg: XReg, arg1: XReg, arg2: XReg };
125+
/// Like `call_indirect`, but also `x0, x1, x2 = arg1, arg2, arg3`.
126+
call_indirect3 = CallIndirect3 { reg: XReg, arg1: XReg, arg2: XReg, arg3: XReg };
127+
/// Like `call_indirect`, but also `x0, x1, x2, x3 = arg1, arg2, arg3, arg4`.
128+
call_indirect4 = CallIndirect4 { reg: XReg, arg1: XReg, arg2: XReg, arg3: XReg, arg4: XReg };
118129

119130
/// Unconditionally transfer control to the PC at the given offset.
120131
jump = Jump { offset: PcRelOffset };

0 commit comments

Comments
 (0)