Skip to content

Commit 45c7f7e

Browse files
committed
adapt to the split table-mutability base
The eager-table-initialization commits moved out of the base PR to land separately, and upstream moved MemFlags behind an interner. Two adaptations: - sink_pure_inst: resolve the MemFlags handle through dfg.mem_flags before checking readonly/notrap. - call_indirect lazy-init site: test the raw funcref value again (upstream behavior). Testing the masked value, which lets the Pulley backend absorb the band into the dispatch op, is only sound when every reachable slot is eagerly initialized; that variant returns with the eager-init PR. Goldens re-blessed accordingly: the xband*_s8_brif and band_funcref_dispatch families are dormant until then. The dispatch tail at the lazy-init site is xband ; xfuncref_dispatch ; call_indirectN -- three interpreter dispatches, down from five unfused.
1 parent c800891 commit 45c7f7e

8 files changed

Lines changed: 210 additions & 198 deletions

cranelift/codegen/src/machinst/lower.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,16 +1724,18 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
17241724
/// unnecessary because we're not moving a side-effecting op — we're
17251725
/// telling the lowerer it has been handled elsewhere.
17261726
pub fn sink_pure_inst(&mut self, ir_inst: Inst) {
1727-
let dfg_inst = &self.f.dfg.insts[ir_inst];
17281727
let is_pure = !has_lowering_side_effect(self.f, ir_inst);
1729-
let is_safe_load = matches!(
1730-
dfg_inst,
1728+
let is_safe_load = match &self.f.dfg.insts[ir_inst] {
17311729
InstructionData::Load {
17321730
opcode: crate::ir::Opcode::Load,
17331731
flags,
17341732
..
1735-
} if flags.readonly() && flags.notrap()
1736-
);
1733+
} => {
1734+
let flags = self.f.dfg.mem_flags[*flags];
1735+
flags.readonly() && flags.notrap()
1736+
}
1737+
_ => false,
1738+
};
17371739
assert!(is_pure || is_safe_load);
17381740
self.inst_absorbed_pure.insert(ir_inst);
17391741
}

crates/cranelift/src/func_environ.rs

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,20 +1074,12 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
10741074
let result_param = builder.append_block_param(continuation_block, pointer_type);
10751075
builder.set_cold_block(null_block);
10761076

1077-
// Under `is_eagerly_initialized_funcref_table`, `value != 0` and
1078-
// `value_masked != 0` agree on every reachable slot, so we can
1079-
// test the masked result. The Pulley backend then fuses the
1080-
// `band + brif` pair.
1081-
let brif_cond = if self
1082-
.module
1083-
.is_eagerly_initialized_funcref_table(table_index)
1084-
{
1085-
value_masked
1086-
} else {
1087-
value
1088-
};
1077+
// Branching on `value_masked` instead (letting the Pulley backend
1078+
// fuse the `band + brif` pair) requires a table whose slots are
1079+
// all eagerly initialized; that variant comes with eager
1080+
// initialization support.
10891081
builder.ins().brif(
1090-
brif_cond,
1082+
value,
10911083
continuation_block,
10921084
&[value_masked.into()],
10931085
null_block,

tests/disas/call-indirect-immutable-elide-null.wat

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@
2727
;; Fully cover the table — no null slot anywhere.
2828
(elem (i32.const 0) func $f1 $f2 $f3))
2929
;; function u0:0(i64 vmctx, i64) -> i32 tail {
30+
;; region0 = 8 "VMContext+0x8"
3031
;; gv0 = vmctx
31-
;; gv1 = load.i64 notrap aligned readonly gv0+8
32+
;; gv1 = load.i64 notrap aligned readonly region0 gv0+8
3233
;; gv2 = load.i64 notrap aligned gv1+24
3334
;; stack_limit = gv2
3435
;;
@@ -41,8 +42,9 @@
4142
;; }
4243
;;
4344
;; function u0:1(i64 vmctx, i64) -> i32 tail {
45+
;; region0 = 8 "VMContext+0x8"
4446
;; gv0 = vmctx
45-
;; gv1 = load.i64 notrap aligned readonly gv0+8
47+
;; gv1 = load.i64 notrap aligned readonly region0 gv0+8
4648
;; gv2 = load.i64 notrap aligned gv1+24
4749
;; stack_limit = gv2
4850
;;
@@ -55,8 +57,9 @@
5557
;; }
5658
;;
5759
;; function u0:2(i64 vmctx, i64) -> i32 tail {
60+
;; region0 = 8 "VMContext+0x8"
5861
;; gv0 = vmctx
59-
;; gv1 = load.i64 notrap aligned readonly gv0+8
62+
;; gv1 = load.i64 notrap aligned readonly region0 gv0+8
6063
;; gv2 = load.i64 notrap aligned gv1+24
6164
;; stack_limit = gv2
6265
;;
@@ -69,43 +72,45 @@
6972
;; }
7073
;;
7174
;; function u0:3(i64 vmctx, i64, i32) -> i32 tail {
75+
;; region0 = 8 "VMContext+0x8"
76+
;; region1 = 1342177280 "DefinedTable(StaticModuleIndex(0), DefinedTableIndex(0))"
7277
;; gv0 = vmctx
73-
;; gv1 = load.i64 notrap aligned readonly gv0+8
78+
;; gv1 = load.i64 notrap aligned readonly region0 gv0+8
7479
;; gv2 = load.i64 notrap aligned gv1+24
7580
;; gv3 = vmctx
7681
;; gv4 = load.i64 notrap aligned readonly can_move gv3+48
7782
;; sig0 = (i64 vmctx, i64) -> i32 tail
7883
;; sig1 = (i64 vmctx, i32, i64) -> i64 tail
79-
;; fn0 = colocated u805306368:9 sig1
84+
;; fn0 = colocated u805306368:7 sig1
8085
;; stack_limit = gv2
8186
;;
8287
;; block0(v0: i64, v1: i64, v2: i32):
8388
;; @0050 v4 = iconst.i32 3
8489
;; @0050 v5 = icmp uge v2, v4 ; v4 = 3
8590
;; @0050 v6 = uextend.i64 v2
8691
;; @0050 v7 = load.i64 notrap aligned readonly can_move v0+48
87-
;; v23 = iconst.i64 3
88-
;; @0050 v8 = ishl v6, v23 ; v23 = 3
89-
;; @0050 v9 = iadd v7, v8
90-
;; @0050 v10 = iconst.i64 0
91-
;; @0050 v11 = select_spectre_guard v5, v10, v9 ; v10 = 0
92-
;; @0050 v12 = load.i64 user5 aligned table v11
93-
;; v22 = iconst.i64 -2
94-
;; @0050 v13 = band v12, v22 ; v22 = -2
95-
;; @0050 brif v13, block3(v13), block2
92+
;; @0050 v8 = iconst.i64 3
93+
;; @0050 v9 = ishl v6, v8 ; v8 = 3
94+
;; @0050 v10 = iadd v7, v9
95+
;; @0050 v11 = iconst.i64 0
96+
;; @0050 v12 = select_spectre_guard v5, v11, v10 ; v11 = 0
97+
;; @0050 v13 = load.i64 user6 aligned region1 v12
98+
;; @0050 v14 = iconst.i64 -2
99+
;; @0050 v15 = band v13, v14 ; v14 = -2
100+
;; @0050 brif v13, block3(v15), block2
96101
;;
97102
;; block2 cold:
98-
;; @0050 v15 = iconst.i32 0
99-
;; @0050 v17 = uextend.i64 v2
100-
;; @0050 v18 = call fn0(v0, v15, v17) ; v15 = 0
101-
;; @0050 jump block3(v18)
103+
;; @0050 v17 = iconst.i32 0
104+
;; @0050 v18 = uextend.i64 v2
105+
;; @0050 v19 = call fn0(v0, v17, v18) ; v17 = 0
106+
;; @0050 jump block3(v19)
102107
;;
103-
;; block3(v14: i64):
104-
;; @0050 v19 = load.i64 notrap aligned readonly v14+8
105-
;; @0050 v20 = load.i64 notrap aligned readonly v14+24
106-
;; @0050 v21 = call_indirect sig0, v19(v20, v0)
108+
;; block3(v16: i64):
109+
;; @0050 v20 = load.i64 notrap aligned readonly v16+8
110+
;; @0050 v21 = load.i64 notrap aligned readonly v16+24
111+
;; @0050 v22 = call_indirect sig0, v20(v21, v0)
107112
;; @0053 jump block1
108113
;;
109114
;; block1:
110-
;; @0053 return v21
115+
;; @0053 return v22
111116
;; }

tests/disas/pulley-call-indirect-band-brif-fusion.wat

Lines changed: 53 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -53,25 +53,28 @@
5353
;; ret
5454
;;
5555
;; wasm[0]::function[3]:
56-
;; push_frame_save 32, x16, x17, x24
56+
;; push_frame_save 16, x25
5757
;; xmov x3, x0
58-
;; br_if_xugteq32_u8 x2, 3, 0x4b // target = 0x64
58+
;; br_if_xugteq32_u8 x2, 3, 0x59 // target = 0x72
5959
;; 20: xmov x1, x3
6060
;; xload64le_o32 x0, x1, 48
6161
;; zext32 x15, x2
6262
;; xshl64_u6 x1, x15, 3
6363
;; xadd64 x0, x0, x1
64-
;; xload64le_o32 x0, x0, 0
65-
;; xband_funcref_dispatch_not_x64 x0, x17, x16, x0, 8, 24, 0x18 // target = 0x52
66-
;; xmov x24, x3
67-
;; call_indirect2 x17, x16, x24
68-
;; pop_frame_restore 32, x16, x17, x24
64+
;; xload64le_o32 x1, x0, 0
65+
;; xband64_s8 x0, x1, -2
66+
;; br_if_xeq64_i8 x1, 0, 0x22 // target = 0x60
67+
;; 45: xmov x25, x3
68+
;; xload64le_o32 x1, x0, 8
69+
;; xload64le_o32 x0, x0, 24
70+
;; call_indirect2 x1, x0, x25
71+
;; pop_frame_restore 16, x25
6972
;; ret
70-
;; 52: xzero x0
71-
;; 54: xmov x24, x3
72-
;; 57: call3 x24, x0, x15, 0x267 // target = 0x2be
73-
;; 5f: jump -0x17 // target = 0x48
74-
;; 64: trap
73+
;; 60: xzero x0
74+
;; 62: xmov x25, x3
75+
;; 65: call3 x25, x0, x15, 0x267 // target = 0x2cc
76+
;; 6d: jump -0x25 // target = 0x48
77+
;; 72: trap
7578
;; ╰─╼ trap: Normal(TableOutOfBounds)
7679
;;
7780
;; wasm[0]::array_to_wasm_trampoline[0]:
@@ -82,19 +85,19 @@
8285
;; xstore64le_o32 x13, 72, x14
8386
;; xmov x14, sp
8487
;; xstore64le_o32 x13, 64, x14
85-
;; xpcadd x15, 0x2a // target = 0xb9
88+
;; xpcadd x15, 0x2a // target = 0xc7
8689
;; xstore64le_o32 x13, 80, x15
87-
;; call -0x9e // target = 0x0
90+
;; call -0xac // target = 0x0
8891
;; ├─╼ exception frame offset: SP = FP - 0x90
89-
;; ╰─╼ exception handler: default handler, no dynamic context, handler=0xb9
92+
;; ╰─╼ exception handler: default handler, no dynamic context, handler=0xc7
9093
;; xload64le_o32 x2, sp, 0
9194
;; xstore32le_o32 x2, 0, x0
9295
;; xone x0
9396
;; pop_frame_restore 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
9497
;; ret
95-
;; b9: xzero x0
96-
;; bb: pop_frame_restore 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
97-
;; c0: ret
98+
;; c7: xzero x0
99+
;; c9: pop_frame_restore 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
100+
;; ce: ret
98101
;;
99102
;; wasm[0]::array_to_wasm_trampoline[1]:
100103
;; push_frame_save 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
@@ -104,19 +107,19 @@
104107
;; xstore64le_o32 x13, 72, x14
105108
;; xmov x14, sp
106109
;; xstore64le_o32 x13, 64, x14
107-
;; xpcadd x15, 0x2a // target = 0x113
110+
;; xpcadd x15, 0x2a // target = 0x121
108111
;; xstore64le_o32 x13, 80, x15
109-
;; call -0xf3 // target = 0x5
112+
;; call -0x101 // target = 0x5
110113
;; ├─╼ exception frame offset: SP = FP - 0x90
111-
;; ╰─╼ exception handler: default handler, no dynamic context, handler=0x113
114+
;; ╰─╼ exception handler: default handler, no dynamic context, handler=0x121
112115
;; xload64le_o32 x2, sp, 0
113116
;; xstore32le_o32 x2, 0, x0
114117
;; xone x0
115118
;; pop_frame_restore 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
116119
;; ret
117-
;; 113: xzero x0
118-
;; 115: pop_frame_restore 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
119-
;; 11a: ret
120+
;; 121: xzero x0
121+
;; 123: pop_frame_restore 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
122+
;; 128: ret
120123
;;
121124
;; wasm[0]::array_to_wasm_trampoline[2]:
122125
;; push_frame_save 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
@@ -126,19 +129,19 @@
126129
;; xstore64le_o32 x13, 72, x14
127130
;; xmov x14, sp
128131
;; xstore64le_o32 x13, 64, x14
129-
;; xpcadd x15, 0x2a // target = 0x16d
132+
;; xpcadd x15, 0x2a // target = 0x17b
130133
;; xstore64le_o32 x13, 80, x15
131-
;; call -0x147 // target = 0xb
134+
;; call -0x155 // target = 0xb
132135
;; ├─╼ exception frame offset: SP = FP - 0x90
133-
;; ╰─╼ exception handler: default handler, no dynamic context, handler=0x16d
136+
;; ╰─╼ exception handler: default handler, no dynamic context, handler=0x17b
134137
;; xload64le_o32 x2, sp, 0
135138
;; xstore32le_o32 x2, 0, x0
136139
;; xone x0
137140
;; pop_frame_restore 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
138141
;; ret
139-
;; 16d: xzero x0
140-
;; 16f: pop_frame_restore 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
141-
;; 174: ret
142+
;; 17b: xzero x0
143+
;; 17d: pop_frame_restore 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
144+
;; 182: ret
142145
;;
143146
;; wasm[0]::array_to_wasm_trampoline[3]:
144147
;; push_frame_save 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
@@ -149,19 +152,19 @@
149152
;; xstore64le_o32 x15, 72, x2
150153
;; xmov x2, sp
151154
;; xstore64le_o32 x15, 64, x2
152-
;; xpcadd x2, 0x2d // target = 0x1d1
155+
;; xpcadd x2, 0x2d // target = 0x1df
153156
;; xstore64le_o32 x15, 80, x2
154-
;; call3 x0, x1, x14, -0x1a2 // target = 0x11
157+
;; call3 x0, x1, x14, -0x1b0 // target = 0x11
155158
;; ├─╼ exception frame offset: SP = FP - 0x90
156-
;; ╰─╼ exception handler: default handler, no dynamic context, handler=0x1d1
159+
;; ╰─╼ exception handler: default handler, no dynamic context, handler=0x1df
157160
;; xload64le_o32 x2, sp, 0
158161
;; xstore32le_o32 x2, 0, x0
159162
;; xone x0
160163
;; pop_frame_restore 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
161164
;; ret
162-
;; 1d1: xzero x0
163-
;; 1d3: pop_frame_restore 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
164-
;; 1d8: ret
165+
;; 1df: xzero x0
166+
;; 1e1: pop_frame_restore 144, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, sp, spilltmp0
167+
;; 1e6: ret
165168
;;
166169
;; signatures[0]::wasm_to_array_trampoline:
167170
;; push_frame_save 32, x16, x17
@@ -180,15 +183,15 @@
180183
;; xmov x3, x16
181184
;; call_indirect_host 0
182185
;; zext8 x15, x0
183-
;; br_if_not32 x15, 0x13 // target = 0x230
184-
;; 223: xload32le_o32 x0, x16, 0
186+
;; br_if_not32 x15, 0x13 // target = 0x23e
187+
;; 231: xload32le_o32 x0, x16, 0
185188
;; pop_frame_restore 32, x16, x17
186189
;; ret
187-
;; 230: xmov x1, x17
188-
;; 233: xload64le_o32 x0, x1, 16
189-
;; 23a: xload64le_o32 x0, x0, 328
190-
;; 241: call_indirect_host 42
191-
;; 245: trap
190+
;; 23e: xmov x1, x17
191+
;; 241: xload64le_o32 x0, x1, 16
192+
;; 248: xload64le_o32 x0, x0, 328
193+
;; 24f: call_indirect_host 42
194+
;; 253: trap
192195
;;
193196
;; signatures[1]::wasm_to_array_trampoline:
194197
;; push_frame_save 32, x16, x17
@@ -208,15 +211,15 @@
208211
;; xmov x3, x16
209212
;; call_indirect_host 0
210213
;; zext8 x0, x0
211-
;; br_if_not32 x0, 0x13 // target = 0x2a6
212-
;; 299: xload32le_o32 x0, x16, 0
214+
;; br_if_not32 x0, 0x13 // target = 0x2b4
215+
;; 2a7: xload32le_o32 x0, x16, 0
213216
;; pop_frame_restore 32, x16, x17
214217
;; ret
215-
;; 2a6: xmov x1, x17
216-
;; 2a9: xload64le_o32 x0, x1, 16
217-
;; 2b0: xload64le_o32 x0, x0, 328
218-
;; 2b7: call_indirect_host 42
219-
;; 2bb: trap
218+
;; 2b4: xmov x1, x17
219+
;; 2b7: xload64le_o32 x0, x1, 16
220+
;; 2be: xload64le_o32 x0, x0, 328
221+
;; 2c5: call_indirect_host 42
222+
;; 2c9: trap
220223
;;
221224
;; wasmtime_builtin_table_get_lazy_init_func_ref:
222225
;; push_frame

0 commit comments

Comments
 (0)