Skip to content

Commit 9528946

Browse files
committed
pulley: trap on null in 8 fused funcref-dispatch handlers
Codex review on the rebeckerspecialties wasmtime fork PR pointed out that phase-2/3's continuation-block load absorption breaks the lazy-init slow path's correctness: the slow path's libcall rejoins `continuation_block` via a block param, and after absorption the loads are gone — `call_indirect` would see uninitialized `dst_code`/`dst_vmctx` if the slow path is ever reached. Fusion is gated on `is_eagerly_initialized_funcref_table` so the slow path is unreachable at runtime, but the previous handler's `ControlFlow::Continue(())` on null was advertised as defence-in- depth and was itself broken. Replace it with `done_trap` in the 8 affected handlers (4 forward + 4 `_not` variants across x64/x32 × xfuncref_dispatch/xband_funcref_dispatch). `offset` on the `_not` variants becomes vestigial; kept for encoding-shape parity.
1 parent 062ca89 commit 9528946

19 files changed

Lines changed: 747 additions & 811 deletions

cranelift/codegen/src/isa/pulley_shared/inst.isle

Lines changed: 13 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -67,16 +67,10 @@
6767
;; Jump to `then` if `c` is true, otherwise to `else`.
6868
(BrIf (cond Cond) (taken MachLabel) (not_taken MachLabel))
6969

70-
;; Fused band-immediate + brif: compute `dst = src & sign_extend(mask)`,
71-
;; then conditionally branch to `taken` if `src` is non-zero (low-32 or
72-
;; full 64-bit comparison per `size`), otherwise fall through to
73-
;; `not_taken`. The mask + dst write happen unconditionally.
74-
;;
75-
;; Emitted by the Cranelift Pulley backend at the call_indirect lazy-init
76-
;; brif site when the same funcref-pointer value feeds both the init-bit
77-
;; mask (`band v, -2`) and the null-check branch (`brif v`). The fusion
78-
;; saves one match_loop dispatch per call_indirect site. See pulley/
79-
;; `xband*_s8_br_if_*` ops for the underlying bytecode.
70+
;; Fused `band src, mask` + `brif src` emitted at the call_indirect
71+
;; lazy-init brif site. `dst = src & sign_extend(mask)` is
72+
;; unconditional; the branch test is on `src`'s low-32 or full-64 bits
73+
;; per `size`. Pulley-side: `xband*_s8_br_if_*`.
8074
(BandBrIf
8175
(dst WritableXReg)
8276
(src XReg)
@@ -85,19 +79,11 @@
8579
(taken MachLabel)
8680
(not_taken MachLabel))
8781

88-
;; Funcref-dispatch fusion: branch-and-load fused dispatch op emitted
89-
;; at the call_indirect lazy-init brif site under the eager-init
90-
;; predicate + statically-elided sig check. Fuses
91-
;; `band v, -2 ; brif ; xload (v_masked + offset_code) ; xload (v_masked + offset_vmctx)`
92-
;; from across the brif's predecessor and continuation blocks into one
93-
;; Pulley dispatch. The pulley-side ops are `xfuncref_dispatch_{x64,
94-
;; not_x64, x32, not_x32}`; the runtime null check is defence-in-depth
95-
;; (the predicate guarantees the funcref is non-null at runtime, but
96-
;; the handler must match the original brif's null branch as a
97-
;; correctness fallback). See `try_fuse_funcref_dispatch` in
98-
;; `pulley_shared::lower` for the recogniser + Pulley's
99-
;; `pre_lower_analysis` for the cross-block sink that lets the
100-
;; continuation-block loads be absorbed into one MachInst here.
82+
;; Funcref-dispatch fusion: `brif (band v -2) + load code + load vmctx`
83+
;; across the brif and its continuation block. Emitted at the
84+
;; call_indirect lazy-init site under
85+
;; `is_eagerly_initialized_funcref_table`. Pulley-side:
86+
;; `xfuncref_dispatch_{x64,not_x64,x32,not_x32}`.
10187
(FuncrefDispatch
10288
(dst_code WritableXReg)
10389
(dst_vmctx WritableXReg)
@@ -108,15 +94,10 @@
10894
(taken MachLabel)
10995
(not_taken MachLabel))
11096

111-
;; Phase-3 fusion: absorbs the preceding standalone `xband_s8 dst, src,
112-
;; -2` into the same MachInst as a `FuncrefDispatch`. Operand
113-
;; structure differs: `src` here is the UNMASKED funcref (the band's
114-
;; input), and `dst_masked` is added as a third def so the brif's
115-
;; block-call-arg copy to the continuation block still has a real
116-
;; producer for the funcref-ptr block param. The underlying Pulley
117-
;; ops are `xband_funcref_dispatch_{x64,not_x64,x32,not_x32}`. See
118-
;; `try_fuse_band_into_funcref_dispatch` in `pulley_shared::lower`
119-
;; for when phase 3 fires vs phase 2.
97+
;; FuncrefDispatch + the preceding `xband_s8 -2` absorbed. `src` is
98+
;; the unmasked funcref; the fused op writes `dst_masked = src & -2`
99+
;; so the brif's block-call-arg copy still has a producer.
100+
;; Pulley-side: `xband_funcref_dispatch_{x64,not_x64,x32,not_x32}`.
120101
(BandFuncrefDispatch
121102
(dst_masked WritableXReg)
122103
(dst_code WritableXReg)

cranelift/codegen/src/isa/pulley_shared/inst/args.rs

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -577,23 +577,16 @@ pub struct PulleyCall {
577577
pub args: SmallVec<[XReg; 4]>,
578578
}
579579

580-
/// Payload of `CallInfo` for indirect-call instructions.
581-
///
582-
/// Mirror of `PulleyCall` for `Inst::IndirectCall`: the call target is a
583-
/// runtime register (the loaded `wasm_call` pointer at the call_indirect
584-
/// dispatch tail), and the first 0–4 integer ABI args are passed as free
585-
/// registers so the `call_indirect1/2/3/4` opcodes can move them into
586-
/// `x0..x3` as part of the call (saving one `xmov` per arg on the hot
587-
/// dispatch path). Remaining args live in `CallInfo::uses` with fixed
588-
/// pregs, just as for `PulleyCall`.
580+
/// Payload of `CallInfo` for `Inst::IndirectCall`. Mirror of `PulleyCall`:
581+
/// the first 0–4 integer ABI args are tracked here so the emitted
582+
/// `call_indirect{1,2,3,4}` opcode moves them into `x0..x3` itself
583+
/// instead of regalloc synthesising `xmov`s. Remaining args use the
584+
/// fixed-preg path in `CallInfo::uses`.
589585
#[derive(Clone, Debug)]
590586
pub struct PulleyCallIndirect {
591-
/// The register holding the call target (e.g. the `wasm_call` pointer
592-
/// loaded out of a `VMFuncRef`).
587+
/// The register holding the call target.
593588
pub target: XReg,
594-
/// Up to 4 integer args destined for `x0..x3`. Tracked separately so
595-
/// regalloc doesn't insert moves and the `call_indirectN` opcode moves
596-
/// them itself.
589+
/// Up to 4 integer args destined for `x0..x3`.
597590
pub args: SmallVec<[XReg; 4]>,
598591
}
599592

cranelift/codegen/src/isa/pulley_shared/inst/emit.rs

Lines changed: 95 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,9 @@ fn pulley_emit<P>(
233233
}
234234

235235
Inst::IndirectCall { info } => {
236-
// If x0..xN args are already in their correct ABI register
237-
// (because regalloc allocated the producer's vreg there), drop
238-
// them off the end so we can use a narrower `call_indirectN`
239-
// op — mirror of the direct-call shrink loop above.
236+
// Drop args already in their ABI register so we can pick a
237+
// narrower `call_indirectN` — mirrors the direct-call shrink
238+
// above.
240239
let target = info.dest.target;
241240
let mut args = &info.dest.args[..];
242241
while !args.is_empty() && args.last().copied() == XReg::new(x_reg(args.len() - 1)) {
@@ -407,12 +406,20 @@ fn pulley_emit<P>(
407406
match size {
408407
OperandSize::Size32 => {
409408
enc::xband32_s8_br_if_not_x32(
410-
&mut inverted, dst_writable, src_reg, mask_imm, 0,
409+
&mut inverted,
410+
dst_writable,
411+
src_reg,
412+
mask_imm,
413+
0,
411414
);
412415
}
413416
OperandSize::Size64 => {
414417
enc::xband64_s8_br_if_not_x64(
415-
&mut inverted, dst_writable, src_reg, mask_imm, 0,
418+
&mut inverted,
419+
dst_writable,
420+
src_reg,
421+
mask_imm,
422+
0,
416423
);
417424
}
418425
}
@@ -422,12 +429,20 @@ fn pulley_emit<P>(
422429
match size {
423430
OperandSize::Size32 => {
424431
enc::xband32_s8_br_if_not_x32(
425-
&mut inverted, dst_writable, src_reg, mask_imm, inv_rel,
432+
&mut inverted,
433+
dst_writable,
434+
src_reg,
435+
mask_imm,
436+
inv_rel,
426437
);
427438
}
428439
OperandSize::Size64 => {
429440
enc::xband64_s8_br_if_not_x64(
430-
&mut inverted, dst_writable, src_reg, mask_imm, inv_rel,
441+
&mut inverted,
442+
dst_writable,
443+
src_reg,
444+
mask_imm,
445+
inv_rel,
431446
);
432447
}
433448
}
@@ -482,12 +497,24 @@ fn pulley_emit<P>(
482497
match size {
483498
OperandSize::Size32 => {
484499
enc::xfuncref_dispatch_not_x32(
485-
&mut inverted, dst_code_w, dst_vmctx_w, src_reg, oc, ov, 0,
500+
&mut inverted,
501+
dst_code_w,
502+
dst_vmctx_w,
503+
src_reg,
504+
oc,
505+
ov,
506+
0,
486507
);
487508
}
488509
OperandSize::Size64 => {
489510
enc::xfuncref_dispatch_not_x64(
490-
&mut inverted, dst_code_w, dst_vmctx_w, src_reg, oc, ov, 0,
511+
&mut inverted,
512+
dst_code_w,
513+
dst_vmctx_w,
514+
src_reg,
515+
oc,
516+
ov,
517+
0,
491518
);
492519
}
493520
}
@@ -497,12 +524,24 @@ fn pulley_emit<P>(
497524
match size {
498525
OperandSize::Size32 => {
499526
enc::xfuncref_dispatch_not_x32(
500-
&mut inverted, dst_code_w, dst_vmctx_w, src_reg, oc, ov, inv_rel,
527+
&mut inverted,
528+
dst_code_w,
529+
dst_vmctx_w,
530+
src_reg,
531+
oc,
532+
ov,
533+
inv_rel,
501534
);
502535
}
503536
OperandSize::Size64 => {
504537
enc::xfuncref_dispatch_not_x64(
505-
&mut inverted, dst_code_w, dst_vmctx_w, src_reg, oc, ov, inv_rel,
538+
&mut inverted,
539+
dst_code_w,
540+
dst_vmctx_w,
541+
src_reg,
542+
oc,
543+
ov,
544+
inv_rel,
506545
);
507546
}
508547
}
@@ -513,12 +552,12 @@ fn pulley_emit<P>(
513552
sink.use_label_at_offset(taken_end - 4, *taken, LabelUse::PcRel);
514553
sink.add_cond_branch(*start_offset, taken_end, *taken, &inverted);
515554
patch_pc_rel_offset(sink, |sink| match size {
516-
OperandSize::Size32 => enc::xfuncref_dispatch_x32(
517-
sink, dst_code_w, dst_vmctx_w, src_reg, oc, ov, 0,
518-
),
519-
OperandSize::Size64 => enc::xfuncref_dispatch_x64(
520-
sink, dst_code_w, dst_vmctx_w, src_reg, oc, ov, 0,
521-
),
555+
OperandSize::Size32 => {
556+
enc::xfuncref_dispatch_x32(sink, dst_code_w, dst_vmctx_w, src_reg, oc, ov, 0)
557+
}
558+
OperandSize::Size64 => {
559+
enc::xfuncref_dispatch_x64(sink, dst_code_w, dst_vmctx_w, src_reg, oc, ov, 0)
560+
}
522561
});
523562
debug_assert_eq!(sink.cur_offset(), taken_end);
524563

@@ -559,12 +598,26 @@ fn pulley_emit<P>(
559598
match size {
560599
OperandSize::Size32 => {
561600
enc::xband_funcref_dispatch_not_x32(
562-
&mut inverted, dm_w, dc_w, dv_w, src_reg, oc, ov, 0,
601+
&mut inverted,
602+
dm_w,
603+
dc_w,
604+
dv_w,
605+
src_reg,
606+
oc,
607+
ov,
608+
0,
563609
);
564610
}
565611
OperandSize::Size64 => {
566612
enc::xband_funcref_dispatch_not_x64(
567-
&mut inverted, dm_w, dc_w, dv_w, src_reg, oc, ov, 0,
613+
&mut inverted,
614+
dm_w,
615+
dc_w,
616+
dv_w,
617+
src_reg,
618+
oc,
619+
ov,
620+
0,
568621
);
569622
}
570623
}
@@ -574,12 +627,26 @@ fn pulley_emit<P>(
574627
match size {
575628
OperandSize::Size32 => {
576629
enc::xband_funcref_dispatch_not_x32(
577-
&mut inverted, dm_w, dc_w, dv_w, src_reg, oc, ov, inv_rel,
630+
&mut inverted,
631+
dm_w,
632+
dc_w,
633+
dv_w,
634+
src_reg,
635+
oc,
636+
ov,
637+
inv_rel,
578638
);
579639
}
580640
OperandSize::Size64 => {
581641
enc::xband_funcref_dispatch_not_x64(
582-
&mut inverted, dm_w, dc_w, dv_w, src_reg, oc, ov, inv_rel,
642+
&mut inverted,
643+
dm_w,
644+
dc_w,
645+
dv_w,
646+
src_reg,
647+
oc,
648+
ov,
649+
inv_rel,
583650
);
584651
}
585652
}
@@ -589,12 +656,12 @@ fn pulley_emit<P>(
589656
sink.use_label_at_offset(taken_end - 4, *taken, LabelUse::PcRel);
590657
sink.add_cond_branch(*start_offset, taken_end, *taken, &inverted);
591658
patch_pc_rel_offset(sink, |sink| match size {
592-
OperandSize::Size32 => enc::xband_funcref_dispatch_x32(
593-
sink, dm_w, dc_w, dv_w, src_reg, oc, ov, 0,
594-
),
595-
OperandSize::Size64 => enc::xband_funcref_dispatch_x64(
596-
sink, dm_w, dc_w, dv_w, src_reg, oc, ov, 0,
597-
),
659+
OperandSize::Size32 => {
660+
enc::xband_funcref_dispatch_x32(sink, dm_w, dc_w, dv_w, src_reg, oc, ov, 0)
661+
}
662+
OperandSize::Size64 => {
663+
enc::xband_funcref_dispatch_x64(sink, dm_w, dc_w, dv_w, src_reg, oc, ov, 0)
664+
}
598665
});
599666
debug_assert_eq!(sink.cur_offset(), taken_end);
600667

cranelift/codegen/src/isa/pulley_shared/inst/mod.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,9 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
215215
..
216216
} = &mut **info;
217217

218-
// Phase-4: the target and the first up-to-4 integer args live
219-
// in `dest` and are passed as free reg uses; the emitted
220-
// `call_indirect{1,2,3,4}` op moves the args into x0..x3 at
221-
// call time. Remaining args still flow through `uses` with
222-
// fixed pregs as before.
218+
// First 0–4 integer args are passed as free reg uses; the
219+
// emitted `call_indirect{1,2,3,4}` op moves them into x0..x3.
220+
// Remaining args use the fixed-preg path in `uses`.
223221
let PulleyCallIndirect { target, args } = dest;
224222
collector.reg_use(target);
225223
for arg in args {

0 commit comments

Comments
 (0)