Skip to content

Commit 80b51f7

Browse files
committed
pulley: trap on null in 8 fused funcref-dispatch handlers
Codex review on the rebeckerspecialties wasmtime fork PR pointed out that phase-2/3's continuation-block load absorption breaks the lazy-init slow path's correctness: the slow path's libcall rejoins `continuation_block` via a block param, and after absorption the loads are gone — `call_indirect` would see uninitialized `dst_code`/`dst_vmctx` if the slow path is ever reached. Fusion is gated on `is_eagerly_initialized_funcref_table` so the slow path is unreachable at runtime, but the previous handler's `ControlFlow::Continue(())` on null was advertised as defence-in- depth and was itself broken. Replace it with `done_trap` in the 8 affected handlers (4 forward + 4 `_not` variants across x64/x32 × xfuncref_dispatch/xband_funcref_dispatch). `offset` on the `_not` variants becomes vestigial; kept for encoding-shape parity.
1 parent 1318256 commit 80b51f7

2 files changed

Lines changed: 70 additions & 33 deletions

File tree

pulley/src/interp.rs

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2449,15 +2449,25 @@ impl OpVisitor for Interpreter<'_> {
24492449
) -> ControlFlow<Done> {
24502450
// `src` is the ALREADY-MASKED funcref (`band v, -2` upstream — the
24512451
// band stays as a separate Pulley op; this fusion absorbs only the
2452-
// brif + the two field loads). The branch fires when src != 0,
2453-
// matching the brif's original semantics in the eager-init
2454-
// predicate's IR rewrite (`brif value_masked, taken, null`). Under
2455-
// the predicate `src` is never zero at runtime, but the handler
2456-
// still has to match the brif's null fall-through as
2457-
// defence-in-depth.
2452+
// brif + the two field loads from the continuation block). The
2453+
// branch fires when src != 0, matching the brif's original
2454+
// semantics in the eager-init predicate's IR rewrite
2455+
// (`brif value_masked, continuation, null`).
2456+
//
2457+
// The null path TRAPS rather than falling through to the original
2458+
// lazy-init `null_block`. The fusion absorbed the continuation
2459+
// block's loads into this op, so the slow path's
2460+
// `null_block → lazy_init → jump continuation([returned_entry])`
2461+
// edge now lands in a continuation block whose loads are gone —
2462+
// call_indirect there would see uninitialized dst_code/dst_vmctx.
2463+
// The fusion is gated on `is_eagerly_initialized_funcref_table`,
2464+
// which guarantees `src != 0` at runtime, so trapping here is
2465+
// unreachable in correct code. If we ever do reach it (predicate
2466+
// bug, runtime invariant violation), we fail closed with a clean
2467+
// trap rather than misdispatching.
24582468
let s = self.state[src].get_u64();
24592469
if s == 0 {
2460-
ControlFlow::Continue(())
2470+
self.done_trap::<crate::XfuncrefDispatchX64>()
24612471
} else {
24622472
// SAFETY: under the eager-init predicate, the wasmtime runtime
24632473
// enforces that the funcref slot contains a real VMFuncRef
@@ -2482,9 +2492,15 @@ impl OpVisitor for Interpreter<'_> {
24822492
offset_vmctx: i8,
24832493
offset: PcRelOffset,
24842494
) -> ControlFlow<Done> {
2495+
// Inverted form: fast path falls through, null path used to
2496+
// branch to `offset`. After the trap-on-null fix the branch
2497+
// target is dead — `offset` is kept in the encoding only for
2498+
// bytecode-shape compatibility with the forward variant. See
2499+
// `xfuncref_dispatch_x64` for the soundness rationale.
2500+
let _ = offset;
24852501
let s = self.state[src].get_u64();
24862502
if s == 0 {
2487-
self.pc_rel_jump::<crate::XfuncrefDispatchNotX64>(offset)
2503+
self.done_trap::<crate::XfuncrefDispatchNotX64>()
24882504
} else {
24892505
let base = s as *const u8;
24902506
unsafe {
@@ -2508,7 +2524,7 @@ impl OpVisitor for Interpreter<'_> {
25082524
) -> ControlFlow<Done> {
25092525
let s = self.state[src].get_u32();
25102526
if s == 0 {
2511-
ControlFlow::Continue(())
2527+
self.done_trap::<crate::XfuncrefDispatchX32>()
25122528
} else {
25132529
let base = s as usize as *const u8;
25142530
unsafe {
@@ -2530,9 +2546,10 @@ impl OpVisitor for Interpreter<'_> {
25302546
offset_vmctx: i8,
25312547
offset: PcRelOffset,
25322548
) -> ControlFlow<Done> {
2549+
let _ = offset;
25332550
let s = self.state[src].get_u32();
25342551
if s == 0 {
2535-
self.pc_rel_jump::<crate::XfuncrefDispatchNotX32>(offset)
2552+
self.done_trap::<crate::XfuncrefDispatchNotX32>()
25362553
} else {
25372554
let base = s as usize as *const u8;
25382555
unsafe {
@@ -2558,10 +2575,15 @@ impl OpVisitor for Interpreter<'_> {
25582575
// Phase-3 fusion: combines the standalone xband64_s8 (mask init
25592576
// bit) with the phase-2 brif+xload+xload dispatch into one op.
25602577
// The masked value is written unconditionally to dst_masked so
2561-
// the brif's block-call-arg machinery still finds it; the two
2562-
// loads only fire on the non-null side. Soundness under the
2563-
// eager-init predicate: `src` is provably non-zero at runtime,
2564-
// so the loads are valid memory accesses.
2578+
// the brif's block-call-arg machinery still finds it.
2579+
//
2580+
// The null path TRAPS rather than falling through to the original
2581+
// `null_block`. See `xfuncref_dispatch_x64` for the rationale: the
2582+
// fusion absorbed the continuation block's loads, so the slow
2583+
// path's rejoin to the continuation would see uninitialized
2584+
// dst_code/dst_vmctx. Gated on `is_eagerly_initialized_funcref_table`,
2585+
// so `src != 0` at runtime; trapping here is unreachable in correct
2586+
// code but fails closed if the runtime invariant is ever violated.
25652587
let s = self.state[src].get_u64();
25662588
let masked = s & !1u64;
25672589
self.state[dst_masked].set_u64(masked);
@@ -2575,7 +2597,7 @@ impl OpVisitor for Interpreter<'_> {
25752597
}
25762598
self.pc_rel_jump::<crate::XbandFuncrefDispatchX64>(offset)
25772599
} else {
2578-
ControlFlow::Continue(())
2600+
self.done_trap::<crate::XbandFuncrefDispatchX64>()
25792601
}
25802602
}
25812603

@@ -2589,11 +2611,13 @@ impl OpVisitor for Interpreter<'_> {
25892611
offset_vmctx: i8,
25902612
offset: PcRelOffset,
25912613
) -> ControlFlow<Done> {
2614+
// Inverted form; `offset` is vestigial after the trap-on-null fix.
2615+
let _ = offset;
25922616
let s = self.state[src].get_u64();
25932617
let masked = s & !1u64;
25942618
self.state[dst_masked].set_u64(masked);
25952619
if s == 0 {
2596-
self.pc_rel_jump::<crate::XbandFuncrefDispatchNotX64>(offset)
2620+
self.done_trap::<crate::XbandFuncrefDispatchNotX64>()
25972621
} else {
25982622
let base = masked as *const u8;
25992623
unsafe {
@@ -2629,7 +2653,7 @@ impl OpVisitor for Interpreter<'_> {
26292653
}
26302654
self.pc_rel_jump::<crate::XbandFuncrefDispatchX32>(offset)
26312655
} else {
2632-
ControlFlow::Continue(())
2656+
self.done_trap::<crate::XbandFuncrefDispatchX32>()
26332657
}
26342658
}
26352659

@@ -2643,11 +2667,12 @@ impl OpVisitor for Interpreter<'_> {
26432667
offset_vmctx: i8,
26442668
offset: PcRelOffset,
26452669
) -> ControlFlow<Done> {
2670+
let _ = offset;
26462671
let s = self.state[src].get_u32();
26472672
let masked = s & !1u32;
26482673
self.state[dst_masked].set_u32(masked);
26492674
if s == 0 {
2650-
self.pc_rel_jump::<crate::XbandFuncrefDispatchNotX32>(offset)
2675+
self.done_trap::<crate::XbandFuncrefDispatchNotX32>()
26512676
} else {
26522677
let base = masked as usize as *const u8;
26532678
unsafe {

pulley/src/lib.rs

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -603,16 +603,24 @@ macro_rules! for_each_op {
603603
/// already-masked funcref pointer (`band v, -2` upstream).
604604
///
605605
/// Forward form: loads-and-branch fire on the non-null side
606-
/// (`src != 0`); the null side falls through. Used at the
606+
/// (`src != 0`); the **null side TRAPS**. Used at the
607607
/// call_indirect lazy-init brif site under
608608
/// `is_eagerly_initialized_funcref_table` AND when the signature
609609
/// check is statically elided — under those conditions the only
610610
/// uses of the masked funcref pointer are the two `VMFuncRef`
611611
/// field loads, and the brif's null branch is provably
612-
/// unreachable at runtime. The handler's runtime null check is
613-
/// defence-in-depth (matching the original brif's role); it MUST
614-
/// fall through on null so the slow path's lazy-init builtin
615-
/// stays callable in the (provably-unreachable) error case.
612+
/// unreachable at runtime.
613+
///
614+
/// The handler traps on null (rather than falling through to
615+
/// the original `null_block`'s lazy-init libcall) because the
616+
/// fusion absorbs the two field loads from the continuation
617+
/// block — and the lazy-init slow path rejoins THAT same
618+
/// continuation. If we fell through and the slow path ran,
619+
/// call_indirect would observe uninitialized `dst_code` /
620+
/// `dst_vmctx`. The predicate guarantees the null path is
621+
/// unreachable in correct code; trapping there is a fail-closed
622+
/// defence-in-depth that protects against future predicate bugs
623+
/// or runtime invariant violations.
616624
///
617625
/// Fused form of `br_if + xload64 + xload64` (the preceding
618626
/// `xband64_s8` stays as a separate op since `src` is consumed
@@ -623,10 +631,11 @@ macro_rules! for_each_op {
623631
/// pattern matches), so the per-new-opcode predictor cost
624632
/// stays at one new op family rather than two.
625633
xfuncref_dispatch_x64 = XfuncrefDispatchX64 { dst_code: XReg, dst_vmctx: XReg, src: XReg, offset_code: i8, offset_vmctx: i8, offset: PcRelOffset };
626-
/// Inverted form of `xfuncref_dispatch_x64`: the null side
627-
/// branches and the loads-and-fall-through fire on `src != 0`.
628-
/// Used by MachBuffer's branch-direction-flip fallthrough
629-
/// optimization when the fast path is the natural fall-through.
634+
/// Inverted form of `xfuncref_dispatch_x64`: fast-path
635+
/// (loads-and-fall-through) fires on `src != 0`; the null side
636+
/// **traps** (same rationale as the forward variant). `offset`
637+
/// is vestigial after the trap-on-null fix; kept in the
638+
/// encoding for shape parity with the forward variant.
630639
xfuncref_dispatch_not_x64 = XfuncrefDispatchNotX64 { dst_code: XReg, dst_vmctx: XReg, src: XReg, offset_code: i8, offset_vmctx: i8, offset: PcRelOffset };
631640
/// 32-bit pointer-width form of `xfuncref_dispatch_x64`. Same
632641
/// semantics; `src`, `dst_code`, `dst_vmctx` are i32 loaded into
@@ -646,7 +655,9 @@ macro_rules! for_each_op {
646655
/// if `src != 0`, load `wasm_call` from `dst_masked + offset_code`
647656
/// into `dst_code`, load callee `vmctx` from `dst_masked +
648657
/// offset_vmctx` into `dst_vmctx`, and branch by `offset`. The
649-
/// null side falls through to the slow path.
658+
/// null side **traps** (same rationale as `xfuncref_dispatch_*`:
659+
/// the continuation-block loads were absorbed, so the slow path
660+
/// would misdispatch if we fell through).
650661
///
651662
/// Soundness: same as `xfuncref_dispatch_*` — gated on
652663
/// `is_eagerly_initialized_funcref_table` so `src` is provably
@@ -656,11 +667,12 @@ macro_rules! for_each_op {
656667
/// match_loop dispatch per call_indirect site vs phase 2 (the
657668
/// preceding standalone `xband64_s8` is absorbed).
658669
xband_funcref_dispatch_x64 = XbandFuncrefDispatchX64 { dst_masked: XReg, dst_code: XReg, dst_vmctx: XReg, src: XReg, offset_code: i8, offset_vmctx: i8, offset: PcRelOffset };
659-
/// Inverted form: branch when `src == 0`, loads-and-fall-through
660-
/// on `src != 0`. Used by MachBuffer's branch-direction flip
661-
/// when the fast path is the natural fall-through. The
662-
/// `dst_masked = src & -2` write is unconditional in both
663-
/// forms.
670+
/// Inverted form: fast-path (loads-and-fall-through) on
671+
/// `src != 0`; null path traps. Used by MachBuffer's
672+
/// branch-direction flip when the fast path is the natural
673+
/// fall-through. The `dst_masked = src & -2` write is
674+
/// unconditional in both forms. `offset` is vestigial after
675+
/// the trap-on-null fix; kept for encoding-shape parity.
664676
xband_funcref_dispatch_not_x64 = XbandFuncrefDispatchNotX64 { dst_masked: XReg, dst_code: XReg, dst_vmctx: XReg, src: XReg, offset_code: i8, offset_vmctx: i8, offset: PcRelOffset };
665677
/// 32-bit pointer-width form of `xband_funcref_dispatch_x64`.
666678
/// Used on `pulley32` / arm64_32-apple-watchos.

0 commit comments

Comments
 (0)