Skip to content

Commit a80dd80

Browse files
authored
Cranelift: Rewrite conditional branches to unconditional traps into conditional traps during legalization (#10988)
* Cranelift: Legalize via a backwards walk, rather than forwards Note: the test expectation change in `filetests/egraph/misc.clif` is simply because we happen to change the order in which we created the legalized `stack_addr` instructions that get GVN'd together, and therefore also changed their relative value numbering. We dedupe to the value that occurs first in the function, which is the one inserted into the DFG *after* the other now because of the backwards traversal, and it therefore has a different value number from before. The resulting program is identical, modulo value numbering. * Cranelift: Rewrite conditional branches to unconditional traps into conditional traps during legalization Given this instruction: ```clif brif v0, block1, block2 ``` If we know that `block1` does nothing but immediately trap then we can rewrite that `brif` into the following: ```clif trapz v0, <trapcode> jump block2 ``` (And we can do the equivalent with `trapz` if `block2` immediately traps). This transformation allows for the conditional trap instructions to be GVN'd and for our egraphs mid-end to generally better optimize the program. We additionally have better codegen in our backends for `trapz` than branches to unconditional traps. Fixes #10941 * Update CLIF filetests and Wasmtime disas tests
1 parent 901b441 commit a80dd80

75 files changed

Lines changed: 835 additions & 703 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

cranelift/codegen/src/context.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,13 +168,13 @@ impl Context {
168168
self.func.display()
169169
);
170170

171-
self.compute_cfg();
172171
if isa.flags().enable_nan_canonicalization() {
173172
self.canonicalize_nans(isa)?;
174173
}
175174

176175
self.legalize(isa)?;
177176

177+
self.compute_cfg();
178178
self.compute_domtree();
179179
self.eliminate_unreachable_code(isa)?;
180180
self.remove_constant_phis(isa)?;
@@ -291,6 +291,7 @@ impl Context {
291291
// TODO: Avoid doing this when legalization doesn't actually mutate the CFG.
292292
self.domtree.clear();
293293
self.loop_analysis.clear();
294+
self.cfg.clear();
294295

295296
// Run some specific legalizations only.
296297
simple_legalize(&mut self.func, isa);

cranelift/codegen/src/ir/layout.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,12 @@ impl Layout {
516516
}
517517
}
518518

519+
/// Does the given block contain exactly one instruction?
520+
pub fn block_contains_exactly_one_inst(&self, block: Block) -> bool {
521+
let block = &self.blocks[block];
522+
block.first_inst.is_some() && block.first_inst == block.last_inst
523+
}
524+
519525
/// Split the block containing `before` in two.
520526
///
521527
/// Insert `new_block` after the old block and move `before` and the following instructions to
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
//! Rewrite branch-to-unconditional-trap into conditional trap instructions.
2+
//!
3+
//! Given this instruction:
4+
//!
5+
//! ```clif
6+
//! brif v0, block1, block2
7+
//! ```
8+
//!
9+
//! If we know that `block1` does nothing but immediately trap then we can
10+
//! rewrite that `brif` into the following:
11+
//!
12+
//! ```clif
13+
//! trapz v0, <trapcode>
14+
//! jump block2
15+
//! ```
16+
//!
17+
//! (And we can do the equivalent with `trapz` if `block2` immediately traps).
18+
//!
19+
//! This transformation allows for the conditional trap instructions to be GVN'd
20+
//! and for our egraphs mid-end to generally better optimize the program. We
21+
//! additionally have better codegen in our backends for `trapz` than branches
22+
//! to unconditional traps.
23+
24+
use super::*;
25+
26+
#[derive(Default)]
27+
pub struct BranchToTrap {
28+
/// The set of blocks that contain exactly one unconditional trap
29+
/// instruction.
30+
just_trap_blocks: EntitySet<ir::Block>,
31+
}
32+
33+
impl BranchToTrap {
34+
/// Analyze the given block.
35+
///
36+
/// The `block` must be terminated by a `trap` instruction.
37+
pub fn analyze_trapping_block(&mut self, func: &ir::Function, block: ir::Block) {
38+
if func.layout.block_contains_exactly_one_inst(block) {
39+
self.just_trap_blocks.insert(block);
40+
}
41+
}
42+
43+
fn just_trap_block_code(&self, func: &ir::Function, block: ir::Block) -> ir::TrapCode {
44+
debug_assert!(self.just_trap_blocks.contains(block));
45+
debug_assert!(func.layout.block_contains_exactly_one_inst(block));
46+
let inst = func.layout.first_inst(block).unwrap();
47+
match func.dfg.insts[inst] {
48+
InstructionData::Trap { code, .. } => code,
49+
_ => unreachable!(),
50+
}
51+
}
52+
53+
/// Process a `brif` instruction, potentially performing our rewrite.
54+
///
55+
/// The `inst` must be a `brif` containing the given `arg` and `blocks`.
56+
pub fn process_brif(
57+
&self,
58+
func: &mut ir::Function,
59+
inst: ir::Inst,
60+
arg: ir::Value,
61+
blocks: [ir::BlockCall; 2],
62+
) {
63+
let consequent = blocks[0].block(&func.dfg.value_lists);
64+
let alternative = blocks[1].block(&func.dfg.value_lists);
65+
66+
if self.just_trap_blocks.contains(consequent) {
67+
let mut pos = FuncCursor::new(func);
68+
pos.goto_inst(inst);
69+
70+
let code = self.just_trap_block_code(pos.func, consequent);
71+
pos.ins().trapnz(arg, code);
72+
73+
let args: SmallVec<[_; 8]> = blocks[1].args(&pos.func.dfg.value_lists).collect();
74+
pos.func.dfg.replace(inst).jump(alternative, &args);
75+
} else if self.just_trap_blocks.contains(alternative) {
76+
let mut pos = FuncCursor::new(func);
77+
pos.goto_inst(inst);
78+
79+
let code = self.just_trap_block_code(pos.func, alternative);
80+
pos.ins().trapz(arg, code);
81+
82+
let args: SmallVec<[_; 8]> = blocks[0].args(&pos.func.dfg.value_lists).collect();
83+
pos.func.dfg.replace(inst).jump(consequent, &args);
84+
}
85+
}
86+
}

cranelift/codegen/src/legalizer/mod.rs

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,13 @@ use crate::ir::types::{self, I64, I128};
1919
use crate::ir::{self, InstBuilder, InstructionData, MemFlags, Value};
2020
use crate::isa::TargetIsa;
2121
use crate::trace;
22+
use cranelift_entity::EntitySet;
23+
use smallvec::SmallVec;
2224

25+
mod branch_to_trap;
2326
mod globalvalue;
2427

28+
use self::branch_to_trap::BranchToTrap;
2529
use self::globalvalue::expand_global_value;
2630

2731
fn imm_const(pos: &mut FuncCursor, arg: Value, imm: Imm64, is_signed: bool) -> Value {
@@ -51,34 +55,34 @@ fn imm_const(pos: &mut FuncCursor, arg: Value, imm: Imm64, is_signed: bool) -> V
5155

5256
/// A command describing how the walk over instructions should proceed.
5357
enum WalkCommand {
54-
/// Continue to the next instruction, if any.
58+
/// Continue walking to the next instruction, if any.
5559
Continue,
5660
/// Revisit the current instruction (presumably because it was legalized
5761
/// into a new instruction that may also require further legalization).
5862
Revisit,
5963
}
6064

61-
/// A simple, naive forwards walk over every instruction in every block in the
65+
/// A simple, naive backwards walk over every instruction in every block in the
6266
/// function's layout.
6367
///
64-
/// This does not guarantee any kind of post-order visitation or anything like
65-
/// that, it is just iterating over blocks in layout order, not any kind of
66-
/// control-flow graph visitation order.
68+
/// This does not guarantee any kind of reverse post-order visitation or
69+
/// anything like that, it is just iterating over blocks in reverse layout
70+
/// order, not any kind of control-flow graph visitation order.
6771
///
6872
/// The `f` visitor closure controls how the walk proceeds via its `WalkCommand`
6973
/// result.
70-
fn forward_walk(
74+
fn backward_walk(
7175
func: &mut ir::Function,
72-
mut f: impl FnMut(&mut ir::Function, ir::Inst) -> WalkCommand,
76+
mut f: impl FnMut(&mut ir::Function, ir::Block, ir::Inst) -> WalkCommand,
7377
) {
7478
let mut pos = FuncCursor::new(func);
75-
while let Some(_block) = pos.next_block() {
79+
while let Some(block) = pos.prev_block() {
7680
let mut prev_pos;
7781
while let Some(inst) = {
7882
prev_pos = pos.position();
79-
pos.next_inst()
83+
pos.prev_inst()
8084
} {
81-
match f(pos.func, inst) {
85+
match f(pos.func, block, inst) {
8286
WalkCommand::Continue => continue,
8387
WalkCommand::Revisit => pos.set_position(prev_pos),
8488
}
@@ -91,7 +95,31 @@ fn forward_walk(
9195
pub fn simple_legalize(func: &mut ir::Function, isa: &dyn TargetIsa) {
9296
trace!("Pre-legalization function:\n{}", func.display());
9397

94-
forward_walk(func, |func, inst| match func.dfg.insts[inst] {
98+
let mut branch_to_trap = BranchToTrap::default();
99+
100+
// We walk the IR backwards because in practice, given the way that
101+
// frontends tend to produce CLIF, this means we will visit in roughly
102+
// reverse post order, which is helpful for getting the most optimizations
103+
// out of the `branch-to-trap` pass that we can (it must analyze trapping
104+
// blocks before it can rewrite branches to them) but the order does not
105+
// actually affect correctness.
106+
backward_walk(func, |func, block, inst| match func.dfg.insts[inst] {
107+
InstructionData::Trap {
108+
opcode: ir::Opcode::Trap,
109+
code: _,
110+
} => {
111+
branch_to_trap.analyze_trapping_block(func, block);
112+
WalkCommand::Continue
113+
}
114+
InstructionData::Brif {
115+
opcode: ir::Opcode::Brif,
116+
arg,
117+
blocks,
118+
} => {
119+
branch_to_trap.process_brif(func, inst, arg, blocks);
120+
WalkCommand::Continue
121+
}
122+
95123
InstructionData::UnaryGlobalValue {
96124
opcode: ir::Opcode::GlobalValue,
97125
global_value,

cranelift/filetests/filetests/egraph/misc.clif

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ block0(v0: i64):
1414
; check: function %stack_load(i64) -> i64 fast {
1515
; nextln: ss0 = explicit_slot 8
1616
; check: block0(v0: i64):
17-
; nextln: v2 = stack_addr.i64 ss0
18-
; nextln: store notrap v0, v2
17+
; nextln: v3 = stack_addr.i64 ss0
18+
; nextln: store notrap v0, v3
1919
; nextln: return v0
2020
; nextln: }

cranelift/filetests/filetests/isa/x64/branches.clif

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -638,15 +638,12 @@ block202:
638638
; ucomiss const(1), %xmm0
639639
; jp,nz label2; j label1
640640
; block1:
641-
; jmp label5
641+
; jmp label3
642642
; block2:
643643
; ucomiss const(0), %xmm0
644-
; jnp label4; j label3
644+
; jnp #trap=heap_oob
645+
; jmp label3
645646
; block3:
646-
; jmp label5
647-
; block4:
648-
; ud2 heap_oob
649-
; block5:
650647
; movq %rbp, %rsp
651648
; popq %rbp
652649
; ret
@@ -658,16 +655,15 @@ block202:
658655
; block1: ; offset 0x4
659656
; ucomiss 0x25(%rip), %xmm0
660657
; jp 0x17
661-
; je 0x26
658+
; je 0x24
662659
; block2: ; offset 0x17
663660
; ucomiss 0x22(%rip), %xmm0
664-
; jp 0x26
661+
; jnp 0x29
665662
; block3: ; offset 0x24
666-
; ud2 ; trap: heap_oob
667-
; block4: ; offset 0x26
668663
; movq %rbp, %rsp
669664
; popq %rbp
670665
; retq
666+
; ud2 ; trap: heap_oob
671667
; addb %al, (%rax)
672668
; addb %al, (%rax)
673669
; addb %al, (%rax)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
test legalizer
2+
target aarch64
3+
4+
function %trapnz(i64, i64) -> i64 {
5+
block0(v0: i64, v1: i64):
6+
brif v0, block1, block2(v1)
7+
block1:
8+
trap user42
9+
block2(v2: i64):
10+
return v2
11+
}
12+
13+
; check: block0(v0: i64, v1: i64):
14+
; nextln: trapnz v0, user42
15+
; nextln: jump block2(v1)
16+
17+
function %trapz(i64, i64) -> i64 {
18+
block0(v0: i64, v1: i64):
19+
brif v0, block1(v1), block2
20+
block1(v2: i64):
21+
return v2
22+
block2:
23+
trap user42
24+
}
25+
26+
; check: block0(v0: i64, v1: i64):
27+
; nextln: trapz v0, user42
28+
; nextln: jump block1(v1)
29+
30+
function %could_be_either_doesnt_matter(i64) -> i64 {
31+
block0(v0: i64):
32+
brif v0, block1, block2
33+
block1:
34+
trap user36
35+
block2:
36+
trap user42
37+
}
38+
39+
; check: block0(v0: i64):
40+
; nextln: trapnz v0, user36
41+
; nextln: jump block2

tests/disas/arith.wat

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,8 @@
2525
;; @0021 v3 = iconst.i32 4
2626
;; @0023 v4 = iconst.i32 4
2727
;; @0025 v5 = isub v3, v4 ; v3 = 4, v4 = 4
28-
;; @002a brif v5, block2, block4
29-
;;
30-
;; block2:
31-
;; @002c trap user11
28+
;; trapnz v5, user11
29+
;; @002a jump block4
3230
;;
3331
;; block4:
3432
;; @002e v6 = iconst.i32 6

tests/disas/component-model/enum.wat

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -50,34 +50,31 @@
5050
;; @0061 v3 = iconst.i32 0
5151
;; @0068 v8 = icmp eq v7, v3 ; v3 = 0
5252
;; @0068 v9 = uextend.i32 v8
53-
;; @0069 brif v9, block2, block3
54-
;;
55-
;; block2:
56-
;; @006b trap user11
53+
;; trapnz v9, user11
54+
;; @0069 jump block3
5755
;;
5856
;; block3:
5957
;; @006d v10 = load.i64 notrap aligned readonly can_move v0+80
6058
;; @006d v11 = load.i32 notrap aligned table v10
6159
;; @006f v12 = iconst.i32 2
6260
;; @0071 v13 = band v11, v12 ; v12 = 2
63-
;; v82 = iconst.i32 0
64-
;; v83 = icmp eq v13, v82 ; v82 = 0
65-
;; @0072 v15 = uextend.i32 v83
66-
;; @0073 brif v15, block4, block5
67-
;;
68-
;; block4:
69-
;; @0075 trap user11
61+
;; v80 = iconst.i32 0
62+
;; v81 = icmp eq v13, v80 ; v80 = 0
63+
;; @0072 v15 = uextend.i32 v81
64+
;; trapnz v15, user11
65+
;; @0073 jump block5
7066
;;
7167
;; block5:
68+
;; @0077 v17 = load.i32 notrap aligned table v10
7269
;; @0079 v18 = iconst.i32 -3
73-
;; @007b v19 = band.i32 v11, v18 ; v18 = -3
70+
;; @007b v19 = band v17, v18 ; v18 = -3
7471
;; @007c store notrap aligned table v19, v10
7572
;; v70 = iconst.i32 -4
76-
;; v76 = band.i32 v11, v70 ; v70 = -4
73+
;; v76 = band v17, v70 ; v70 = -4
7774
;; @0083 store notrap aligned table v76, v10
78-
;; v84 = iconst.i32 1
79-
;; v85 = bor v19, v84 ; v84 = 1
80-
;; @008a store notrap aligned table v85, v10
75+
;; v82 = iconst.i32 1
76+
;; v83 = bor v19, v82 ; v82 = 1
77+
;; @008a store notrap aligned table v83, v10
8178
;; @008c v32 = load.i64 notrap aligned readonly can_move v0+56
8279
;; @008c v33 = load.i64 notrap aligned readonly can_move v0+72
8380
;; @008c v34 = call_indirect sig0, v32(v33, v0)
@@ -88,19 +85,18 @@
8885
;; @009b v40 = iconst.i32 3
8986
;; @009d v41 = icmp ugt v34, v40 ; v40 = 3
9087
;; @009d v42 = uextend.i32 v41
91-
;; @009e brif v42, block6, block7
92-
;;
93-
;; block6:
94-
;; @00a0 trap user11
88+
;; trapnz v42, user11
89+
;; @009e jump block7
9590
;;
9691
;; block7:
97-
;; v86 = iconst.i32 1
98-
;; v87 = bor.i32 v36, v86 ; v86 = 1
99-
;; @00a9 store notrap aligned table v87, v4
92+
;; @00a4 v44 = load.i32 notrap aligned table v4
93+
;; v84 = iconst.i32 1
94+
;; v85 = bor v44, v84 ; v84 = 1
95+
;; @00a9 store notrap aligned table v85, v4
10096
;; @00ab v49 = load.i32 notrap aligned table v10
101-
;; v88 = iconst.i32 2
102-
;; v89 = bor v49, v88 ; v88 = 2
103-
;; @00b0 store notrap aligned table v89, v10
97+
;; v86 = iconst.i32 2
98+
;; v87 = bor v49, v86 ; v86 = 2
99+
;; @00b0 store notrap aligned table v87, v10
104100
;; @00b2 jump block1
105101
;;
106102
;; block1:

0 commit comments

Comments
 (0)