|
1 | | -;; rewrites for integer and floating-point arithmetic |
2 | | -;; eg: `iadd`, `isub`, `ineg`, `imul`, `fadd`, `fsub`, `fmul` |
3 | | - |
4 | | -;; For commutative instructions, we depend on cprop.isle pushing immediates to |
5 | | -;; the right, and thus only simplify patterns like `x+0`, not `0+x`. |
6 | | - |
7 | | -;; x+0 == x. |
8 | | -(rule (simplify (iadd ty |
9 | | - x |
10 | | - (iconst_u ty 0))) |
11 | | - (subsume x)) |
12 | | -;; x-0 == x. |
13 | | -(rule (simplify (isub ty |
14 | | - x |
15 | | - (iconst_u ty 0))) |
16 | | - (subsume x)) |
17 | | -;; 0-x == (ineg x). |
18 | | -(rule (simplify (isub ty |
19 | | - (iconst_u ty 0) |
20 | | - x)) |
21 | | - (ineg ty x)) |
22 | | - |
23 | | -;; x + -y == -y + x == -(y - x) == x - y |
24 | | -(rule (simplify (iadd ty x (ineg ty y))) |
25 | | - (isub ty x y)) |
26 | | -(rule (simplify (iadd ty (ineg ty y) x)) |
27 | | - (isub ty x y)) |
28 | | -(rule (simplify (ineg ty (isub ty y x))) |
29 | | - (isub ty x y)) |
30 | | -;; x - -y == x + y |
31 | | -(rule (simplify (isub ty x (ineg ty y))) |
32 | | - (iadd ty x y)) |
33 | | - |
34 | | -;; ineg(ineg(x)) == x. |
35 | | -(rule (simplify (ineg ty (ineg ty x))) (subsume x)) |
36 | | - |
37 | | -;; ineg(x) * ineg(y) == x*y. |
38 | | -(rule (simplify (imul ty (ineg ty x) (ineg ty y))) |
39 | | - (subsume (imul ty x y))) |
40 | | - |
41 | | -;; iabs(ineg(x)) == iabs(x). |
42 | | -(rule (simplify (iabs ty (ineg ty x))) |
43 | | - (iabs ty x)) |
44 | | - |
45 | | -;; iabs(iabs(x)) == iabs(x). |
46 | | -(rule (simplify (iabs ty inner @ (iabs ty x))) |
47 | | - (subsume inner)) |
48 | | - |
49 | | -;; x-x == 0. |
50 | | -(rule (simplify (isub (ty_int ty) x x)) (subsume (iconst_u ty 0))) |
51 | | - |
52 | | -;; x*1 == x. |
53 | | -(rule (simplify (imul ty |
54 | | - x |
55 | | - (iconst_u ty 1))) |
56 | | - (subsume x)) |
57 | | - |
58 | | -;; x*0 == 0. |
59 | | -(rule (simplify (imul ty |
60 | | - _ |
61 | | - zero @ (iconst_u ty 0))) |
62 | | - (subsume zero)) |
63 | | - |
64 | | -;; x*-1 == ineg(x). |
65 | | -(rule (simplify (imul ty x (iconst_s ty -1))) |
66 | | - (ineg ty x)) |
67 | | - |
68 | | -;; (!x) + 1 == ineg(x) |
69 | | -(rule (simplify (iadd ty (bnot ty x) (iconst_u ty 1))) |
70 | | - (ineg ty x)) |
71 | | - |
72 | | -;; !(x - 1) == !(x + (-1)) == ineg(x) |
73 | | -(rule (simplify (bnot ty (isub ty x (iconst_s ty 1)))) |
74 | | - (ineg ty x)) |
75 | | -(rule (simplify (bnot ty (iadd ty x (iconst_s ty -1)))) |
76 | | - (ineg ty x)) |
77 | | - |
78 | | -;; x / 1 == x. |
79 | | -(rule (simplify_skeleton (sdiv x (iconst_s ty 1))) x) |
80 | | -(rule (simplify_skeleton (udiv x (iconst_u ty 1))) x) |
81 | | - |
82 | | -;; Unsigned `x / d == x >> ilog2(d)` when d is a power of two. |
83 | | -(rule (simplify_skeleton (udiv x (iconst_u ty (u64_extract_power_of_two d)))) |
84 | | - (ushr ty x (iconst_u ty (u64_ilog2 d)))) |
85 | | - |
86 | | -;; Signed `x / d` when d is a power of two is a bit more involved... |
87 | | -(rule (simplify_skeleton (sdiv x (iconst_u ty (u64_extract_power_of_two d)))) |
88 | | - (if-let true (u64_gt d 1)) |
89 | | - ;; This rule musn't fire for the most negative number - which looks like |
90 | | - ;; a power of two (sign bit set and otherwise all zeros) |
91 | | - (if-let true (u32_lt (u64_trailing_zeros d) |
92 | | - (u32_sub (ty_bits ty) 1))) |
93 | | - (let ((k u32 (u64_trailing_zeros d)) |
94 | | - (t1 Value (sshr ty x (iconst_u ty (u32_sub k 1)))) |
95 | | - (t2 Value (ushr ty t1 (iconst_u ty (u32_sub (ty_bits ty) k)))) |
96 | | - (t3 Value (iadd ty x t2)) |
97 | | - (t4 Value (sshr ty t3 (iconst_s ty k)))) |
98 | | - t4)) |
99 | | - |
100 | | -;; And signed `x / d` when d is a negative power of two is the same, but with a |
101 | | -;; negation. |
102 | | -(rule (simplify_skeleton (sdiv x (iconst_s ty d))) |
103 | | - (if-let true (i64_is_negative_power_of_two d)) |
104 | | - (if-let true (i64_ne d -1)) |
105 | | - (let ((k u32 (i64_trailing_zeros d)) |
106 | | - (t1 Value (sshr ty x (iconst_u ty (u32_sub k 1)))) |
107 | | - (t2 Value (ushr ty t1 (iconst_u ty (u32_sub (ty_bits ty) k)))) |
108 | | - (t3 Value (iadd ty x t2)) |
109 | | - (t4 Value (sshr ty t3 (iconst_s ty k))) |
110 | | - (t5 Value (ineg ty t4))) |
111 | | - t5)) |
112 | | - |
113 | | -;; General cases for `udiv` with constant divisors. |
114 | | -(rule (simplify_skeleton (udiv x (iconst_u $I32 (u64_extract_non_zero (u32_from_u64 d))))) |
115 | | - (if-let false (u32_is_power_of_two d)) |
116 | | - (apply_div_const_magic_u32 (Opcode.Udiv) x d)) |
117 | | -(rule (simplify_skeleton (udiv x (iconst_u $I64 (u64_extract_non_zero d)))) |
118 | | - (if-let false (u64_is_power_of_two d)) |
119 | | - (apply_div_const_magic_u64 (Opcode.Udiv) x d)) |
120 | | - |
121 | | -;; General cases for `sdiv` with constant divisors. |
122 | | -(rule (simplify_skeleton (sdiv x (iconst_s $I32 (i64_extract_non_zero (i32_from_i64 d))))) |
123 | | - (if-let false (i64_is_any_sign_power_of_two d)) |
124 | | - (apply_div_const_magic_s32 (Opcode.Sdiv) x d)) |
125 | | -(rule (simplify_skeleton (sdiv x (iconst_s $I64 (i64_extract_non_zero d)))) |
126 | | - (if-let false (i64_is_any_sign_power_of_two d)) |
127 | | - (apply_div_const_magic_s64 (Opcode.Sdiv) x d)) |
128 | | - |
129 | | -;; x % 1 == 0 |
130 | | -(rule (simplify_skeleton (urem x (iconst_u ty 1))) (iconst_u ty 0)) |
131 | | -(rule (simplify_skeleton (srem x (iconst_u ty 1))) (iconst_u ty 0)) |
132 | | -(rule (simplify_skeleton (srem x (iconst_s ty -1))) (iconst_u ty 0)) |
133 | | - |
134 | | -;; Unsigned `x % d == x & ((1 << ilog2(d)) - 1)` when `d` is a power of two. |
135 | | -(rule (simplify_skeleton (urem x (iconst_u ty (u64_extract_power_of_two d)))) |
136 | | - (if-let true (u64_gt d 1)) |
137 | | - (let ((mask Value (iconst_u ty (u64_sub (u64_shl 1 (u64_ilog2 d)) 1)))) |
138 | | - (band ty x mask))) |
139 | | - |
140 | | -;; Signed `x % d` when `d` is a (possibly negative) power of two is a little |
141 | | -;; more complicated. |
142 | | -(rule (simplify_skeleton (srem x d_val @ (iconst_s ty d))) |
143 | | - ;; Interestingly, this same sequence works for both positive and negative |
144 | | - ;; powers of two. |
145 | | - (if-let true (i64_is_any_sign_power_of_two d)) |
146 | | - (if-let true (i64_ne d 1)) |
147 | | - (if-let true (i64_ne d -1)) |
148 | | - (let ((k u32 (i64_trailing_zeros d)) |
149 | | - (t1 Value (sshr ty x (iconst_u ty (u32_sub k 1)))) |
150 | | - (t2 Value (ushr ty t1 (iconst_u ty (u32_sub (ty_bits ty) k)))) |
151 | | - (t3 Value (iadd ty x t2)) |
152 | | - (t4 Value (band ty t3 (iconst_s ty (i64_wrapping_neg (i64_shl 1 k))))) |
153 | | - (t5 Value (isub ty x t4))) |
154 | | - t5)) |
155 | | - |
156 | | -;; General cases for `urem` with constant divisors. |
157 | | -(rule (simplify_skeleton (urem x (iconst_u $I32 (u64_extract_non_zero (u32_from_u64 d))))) |
158 | | - (if-let false (u32_is_power_of_two d)) |
159 | | - (apply_div_const_magic_u32 (Opcode.Urem) x d)) |
160 | | -(rule (simplify_skeleton (urem x (iconst_u $I64 (u64_extract_non_zero d)))) |
161 | | - (if-let false (u64_is_power_of_two d)) |
162 | | - (apply_div_const_magic_u64 (Opcode.Urem) x d)) |
163 | | - |
164 | | -;; General cases for `srem` with constant divisors. |
165 | | -(rule (simplify_skeleton (srem x (iconst_s $I32 (i64_extract_non_zero (i32_from_i64 d))))) |
166 | | - (if-let false (i64_is_any_sign_power_of_two d)) |
167 | | - (apply_div_const_magic_s32 (Opcode.Srem) x d)) |
168 | | -(rule (simplify_skeleton (srem x (iconst_s $I64 (i64_extract_non_zero d)))) |
169 | | - (if-let false (i64_is_any_sign_power_of_two d)) |
170 | | - (apply_div_const_magic_s64 (Opcode.Srem) x d)) |
171 | | - |
172 | | -;; x*2 == x+x. |
173 | | -(rule (simplify (imul ty x (iconst_u _ 2))) |
174 | | - (iadd ty x x)) |
175 | | - |
176 | | -;; x*c == x<<log2(c) when c is a power of two. |
177 | | -;; |
178 | | -;; Note that the type of `iconst` must be the same as the type of `imul`, |
179 | | -;; so these rules can only fire in situations where it's safe to construct an |
180 | | -;; `iconst` of that type. |
181 | | -(rule (simplify (imul ty x (iconst _ (imm64_power_of_two c)))) |
182 | | - (ishl ty x (iconst ty (imm64 c)))) |
183 | | -(rule (simplify (imul ty (iconst _ (imm64_power_of_two c)) x)) |
184 | | - (ishl ty x (iconst ty (imm64 c)))) |
185 | | - |
186 | | -;; fneg(fneg(x)) == x. |
187 | | -(rule (simplify (fneg ty (fneg ty x))) (subsume x)) |
188 | | - |
189 | | -;; If both of the multiplied arguments to an `fma` are negated then remove |
190 | | -;; both of them since they cancel out. |
191 | | -(rule (simplify (fma ty (fneg ty x) (fneg ty y) z)) |
192 | | - (fma ty x y z)) |
193 | | - |
194 | | -;; If both of the multiplied arguments to an `fmul` are negated then remove |
195 | | -;; both of them since they cancel out. |
196 | | -(rule (simplify (fmul ty (fneg ty x) (fneg ty y))) |
197 | | - (fmul ty x y)) |
198 | | - |
199 | | -;; Detect people open-coding `mulhi`: (x as big * y as big) >> bits |
200 | | -;; LLVM doesn't have an intrinsic for it, so you'll see it in code like |
201 | | -;; <https://github.com/rust-lang/rust/blob/767453eb7ca188e991ac5568c17b984dd4893e77/library/core/src/num/mod.rs#L174-L180> |
202 | | -(rule (simplify (sshr ty (imul ty (sextend _ x@(value_type half_ty)) |
203 | | - (sextend _ y@(value_type half_ty))) |
204 | | - (iconst_u _ k))) |
205 | | - (if-let true (ty_equal half_ty (ty_half_width ty))) |
206 | | - (if-let true (u64_eq k (ty_bits_u64 half_ty))) |
207 | | - (sextend ty (smulhi half_ty x y))) |
208 | | -(rule (simplify (ushr ty (imul ty (uextend _ x@(value_type half_ty)) |
209 | | - (uextend _ y@(value_type half_ty))) |
210 | | - (iconst_u _ k))) |
211 | | - (if-let true (ty_equal half_ty (ty_half_width ty))) |
212 | | - (if-let true (u64_eq k (ty_bits_u64 half_ty))) |
213 | | - (uextend ty (umulhi half_ty x y))) |
214 | | - |
215 | | -;; Cranelift's `fcvt_from_{u,s}int` instructions are polymorphic over the input |
216 | | -;; type so remove any unnecessary `uextend` or `sextend` to give backends |
217 | | -;; the chance to convert from the smallest integral type to the float. This |
218 | | -;; can help lowerings on x64 for example which has a less efficient u64-to-float |
219 | | -;; conversion than other bit widths. |
220 | | -(rule (simplify (fcvt_from_uint ty (uextend _ val))) |
221 | | - (fcvt_from_uint ty val)) |
222 | | -(rule (simplify (fcvt_from_sint ty (sextend _ val))) |
223 | | - (fcvt_from_sint ty val)) |
224 | | - |
225 | | - |
226 | | -;; or(x, C) + (-C) --> and(x, ~C) |
227 | | -(rule |
228 | | - (simplify (iadd ty |
229 | | - (bor ty x (iconst_s ty n)) |
230 | | - (iconst_s ty m))) |
231 | | - (if-let m (i64_checked_neg n)) |
232 | | - (band ty x (iconst ty (imm64_masked ty (i64_cast_unsigned (i64_not n)))))) |
233 | | - |
234 | | -;; (x + y) - (x | y) --> x & y |
235 | | -(rule (simplify (isub ty (iadd ty x y) (bor ty x y))) (band ty x y)) |
236 | | - |
237 | | -;; x * (1 << y) == x << y |
238 | | -(rule (simplify (imul ty x (ishl ty (iconst_s ty 1) y))) (ishl ty x y)) |
239 | | - |
240 | | -;; (x - y) + x --> x |
241 | | -(rule (simplify (iadd ty (isub ty x y) y)) x) |
242 | | -(rule (simplify (iadd ty y (isub ty x y))) x) |
243 | | - |
244 | | -;; (x + y) - y --> x |
245 | | -(rule (simplify (isub ty (iadd ty x y) x)) y) |
246 | | -(rule (simplify (isub ty (iadd ty x y) y)) x) |
247 | | - |
248 | | -;; (x - y) - x => -y |
249 | | -(rule (simplify (isub ty (isub ty x y) x))(ineg ty y)) |
250 | | - |
251 | | -;; (x * C) (==/!=) D --> x (==/!=) (D / C) when C is odd and divides D |
252 | | -(rule |
253 | | - (simplify (ne ty (iconst_u ty1 x) (imul ty1 y (iconst_u ty1 z)))) |
254 | | - (if-let 0 (u64_checked_rem x z)) |
255 | | - (if-let 1 (u64_rem z 2)) |
256 | | - (ne ty y (iconst ty1 (imm64 (u64_div x z))))) |
257 | | -(rule |
258 | | - (simplify (ne ty (iconst_u ty1 x) (imul ty1 (iconst_u ty1 y) z))) |
259 | | - (if-let 0 (u64_checked_rem x y)) |
260 | | - (if-let 1 (u64_rem y 2)) |
261 | | - (ne ty z (iconst ty1 (imm64 (u64_div x y))))) |
262 | | -(rule |
263 | | - (simplify (ne ty (imul ty1 x (iconst_u ty1 y)) (iconst_u ty1 z))) |
264 | | - (if-let 0 (u64_checked_rem z y)) |
265 | | - (if-let 1 (u64_rem y 2)) |
266 | | - (ne ty x (iconst ty1 (imm64 (u64_div z y))))) |
267 | | -(rule |
268 | | - (simplify (ne ty (imul ty1 (iconst_u ty1 x) y) (iconst_u ty1 z))) |
269 | | - (if-let 0 (u64_checked_rem z x)) |
270 | | - (if-let 1 (u64_rem x 2)) |
271 | | - (ne ty y (iconst ty1 (imm64 (u64_div z x))))) |
272 | | - |
273 | | - |
274 | | -(rule |
275 | | - (simplify (eq ty (iconst_u ty1 x) (imul ty1 y (iconst_u ty1 z)))) |
276 | | - (if-let 0 (u64_checked_rem x z)) |
277 | | - (if-let 1 (u64_rem z 2)) |
278 | | - (eq ty y (iconst ty1 (imm64 (u64_div x z))))) |
279 | | -(rule |
280 | | - (simplify (eq ty (iconst_u ty1 x) (imul ty1 (iconst_u ty1 y) z))) |
281 | | - (if-let 0 (u64_checked_rem x y)) |
282 | | - (if-let 1 (u64_rem y 2)) |
283 | | - (eq ty z (iconst ty1 (imm64 (u64_div x y))))) |
284 | | -(rule |
285 | | - (simplify (eq ty (imul ty1 x (iconst_u ty1 y)) (iconst_u ty1 z))) |
286 | | - (if-let 0 (u64_checked_rem z y)) |
287 | | - (if-let 1 (u64_rem y 2)) |
288 | | - (eq ty x (iconst ty1 (imm64 (u64_div z y))))) |
289 | | -(rule |
290 | | - (simplify (eq ty (imul ty1 (iconst_u ty1 x) y) (iconst_u ty1 z))) |
291 | | - (if-let 0 (u64_checked_rem z x)) |
292 | | - (if-let 1 (u64_rem x 2)) |
293 | | - (eq ty y (iconst ty1 (imm64 (u64_div z x))))) |
294 | | - |
295 | | -;; (x + y) + (-y) ==> x |
296 | | -;; and equivalent operand-order variants. |
297 | | -(rule (simplify (iadd ty (iadd ty x y) (ineg ty y))) x) |
298 | | -(rule (simplify (iadd ty (ineg ty y) (iadd ty x y))) x) |
299 | | -(rule (simplify (iadd ty (iadd ty y x) (ineg ty y))) x) |
300 | | -(rule (simplify (iadd ty (ineg ty y) (iadd ty y x))) x) |
301 | | - |
302 | | -;; (x | y) - (x & y) ==> (x ^ y) |
303 | | -(rule (simplify (isub ty (bor ty x y) (band ty x y))) (bxor ty x y)) |
304 | | -(rule (simplify (isub ty (bor ty x y) (band ty y x))) (bxor ty x y)) |
305 | | -(rule (simplify (isub ty (bor ty y x) (band ty x y))) (bxor ty x y)) |
306 | | -(rule (simplify (isub ty (bor ty y x) (band ty y x))) (bxor ty x y)) |
307 | | - |
308 | | -;; (x + y) - (x & y) ==> (x | y) |
309 | | -(rule (simplify (isub ty (iadd ty x y) (band ty x y))) (bor ty x y)) |
310 | | -(rule (simplify (isub ty (iadd ty x y) (band ty y x))) (bor ty x y)) |
311 | | -(rule (simplify (isub ty (iadd ty y x) (band ty x y))) (bor ty x y)) |
312 | | -(rule (simplify (isub ty (iadd ty y x) (band ty y x))) (bor ty x y)) |
313 | | - |
314 | | -;; (x | y) - (x ^ y) ==> (x & y) |
315 | | -(rule (simplify (isub ty (bor ty x y) (bxor ty x y))) (band ty x y)) |
316 | | -(rule (simplify (isub ty (bor ty x y) (bxor ty y x))) (band ty x y)) |
317 | | -(rule (simplify (isub ty (bor ty y x) (bxor ty x y))) (band ty x y)) |
318 | | -(rule (simplify (isub ty (bor ty y x) (bxor ty y x))) (band ty x y)) |
319 | | - |
320 | | -;; (~x) + x == -1 |
321 | | -(rule (simplify (iadd ty (bnot ty x) x)) (iconst_s ty -1)) |
322 | | -(rule (simplify (iadd ty x (bnot ty x))) (iconst_s ty -1)) |
0 commit comments