[Cranelift] folds two shifts over add|sub|and (#11640)

bongjunj · web-flow · commit bdd8d1b73e1d · 2025-09-09T17:10:24.000Z
* [Cranelift] add opts and tests

* [Cranelift] add opts and tests

* [Cranelift] add opts and tests

* [Cranelift] update tests
diff --git a/cranelift/codegen/src/opts/shifts.isle b/cranelift/codegen/src/opts/shifts.isle
@@ -305,3 +305,8 @@
 (rule (simplify (rotl ty x (iconst_u kty k)))
       (if-let false (u64_eq k (u64_and k (ty_shift_mask ty))))
       (rotl ty x (iconst_u kty (u64_and k (ty_shift_mask ty)))))
+
+(rule (simplify (band ty (ishl ty x z) (ishl ty y z))) (ishl ty (band ty x y) z))
+(rule (simplify (isub ty (ishl ty x z) (ishl ty y z))) (ishl ty (isub ty x y) z))
+(rule (simplify (iadd ty (ishl ty x z) (ishl ty y z))) (ishl ty (iadd ty x y) z))
+
diff --git a/cranelift/filetests/filetests/egraph/fold-add-shifts.clif b/cranelift/filetests/filetests/egraph/fold-add-shifts.clif
@@ -0,0 +1,64 @@
+test optimize precise-output
+set opt_level=speed
+target x86_64
+
+function %test1(i8, i8, i8) -> i8 {
+block0(v0: i8, v1: i8, v3: i8):
+    v2 = ishl v0, v1
+    v4 = ishl v3, v1
+    v5 = iadd v2, v4
+    return v5
+}
+
+; function %test1(i8, i8, i8) -> i8 fast {
+; block0(v0: i8, v1: i8, v3: i8):
+;     v6 = iadd v0, v3
+;     v7 = ishl v6, v1
+;     return v7
+; }
+
+function %test2(i16, i16, i16) -> i16 {
+block0(v0: i16, v1: i16, v3: i16):
+    v2 = ishl v0, v1
+    v4 = ishl v3, v1
+    v5 = iadd v2, v4
+    return v5
+}
+
+; function %test2(i16, i16, i16) -> i16 fast {
+; block0(v0: i16, v1: i16, v3: i16):
+;     v6 = iadd v0, v3
+;     v7 = ishl v6, v1
+;     return v7
+; }
+
+function %test3(i32, i32, i32) -> i32 {
+block0(v0: i32, v1: i32, v3: i32):
+    v2 = ishl v0, v1
+    v4 = ishl v3, v1
+    v5 = iadd v2, v4
+    return v5
+}
+
+; function %test3(i32, i32, i32) -> i32 fast {
+; block0(v0: i32, v1: i32, v3: i32):
+;     v6 = iadd v0, v3
+;     v7 = ishl v6, v1
+;     return v7
+; }
+
+function %test4(i64, i64, i64) -> i64 {
+block0(v0: i64, v1: i64, v3: i64):
+    v2 = ishl v0, v1
+    v4 = ishl v3, v1
+    v5 = iadd v2, v4
+    return v5
+}
+
+; function %test4(i64, i64, i64) -> i64 fast {
+; block0(v0: i64, v1: i64, v3: i64):
+;     v6 = iadd v0, v3
+;     v7 = ishl v6, v1
+;     return v7
+; }
+
diff --git a/cranelift/filetests/filetests/egraph/fold-and-shifts.clif b/cranelift/filetests/filetests/egraph/fold-and-shifts.clif
@@ -0,0 +1,64 @@
+test optimize precise-output
+set opt_level=speed
+target x86_64
+
+function %test1(i8, i8, i8) -> i8 {
+block0(v0: i8, v1: i8, v3: i8):
+    v2 = ishl v0, v1
+    v4 = ishl v3, v1
+    v5 = band v2, v4
+    return v5
+}
+
+; function %test1(i8, i8, i8) -> i8 fast {
+; block0(v0: i8, v1: i8, v3: i8):
+;     v6 = band v0, v3
+;     v7 = ishl v6, v1
+;     return v7
+; }
+
+function %test2(i16, i16, i16) -> i16 {
+block0(v0: i16, v1: i16, v3: i16):
+    v2 = ishl v0, v1
+    v4 = ishl v3, v1
+    v5 = band v2, v4
+    return v5
+}
+
+; function %test2(i16, i16, i16) -> i16 fast {
+; block0(v0: i16, v1: i16, v3: i16):
+;     v6 = band v0, v3
+;     v7 = ishl v6, v1
+;     return v7
+; }
+
+function %test3(i32, i32, i32) -> i32 {
+block0(v0: i32, v1: i32, v3: i32):
+    v2 = ishl v0, v1
+    v4 = ishl v3, v1
+    v5 = band v2, v4
+    return v5
+}
+
+; function %test3(i32, i32, i32) -> i32 fast {
+; block0(v0: i32, v1: i32, v3: i32):
+;     v6 = band v0, v3
+;     v7 = ishl v6, v1
+;     return v7
+; }
+
+function %test4(i64, i64, i64) -> i64 {
+block0(v0: i64, v1: i64, v3: i64):
+    v2 = ishl v0, v1
+    v4 = ishl v3, v1
+    v5 = band v2, v4
+    return v5
+}
+
+; function %test4(i64, i64, i64) -> i64 fast {
+; block0(v0: i64, v1: i64, v3: i64):
+;     v6 = band v0, v3
+;     v7 = ishl v6, v1
+;     return v7
+; }
+
diff --git a/cranelift/filetests/filetests/egraph/fold-sub-shifts.clif b/cranelift/filetests/filetests/egraph/fold-sub-shifts.clif
@@ -0,0 +1,64 @@
+test optimize precise-output
+set opt_level=speed
+target x86_64
+
+function %test1(i8, i8, i8) -> i8 {
+block0(v0: i8, v1: i8, v3: i8):
+    v2 = ishl v0, v1
+    v4 = ishl v3, v1
+    v5 = isub v2, v4
+    return v5
+}
+
+; function %test1(i8, i8, i8) -> i8 fast {
+; block0(v0: i8, v1: i8, v3: i8):
+;     v6 = isub v0, v3
+;     v7 = ishl v6, v1
+;     return v7
+; }
+
+function %test2(i16, i16, i16) -> i16 {
+block0(v0: i16, v1: i16, v3: i16):
+    v2 = ishl v0, v1
+    v4 = ishl v3, v1
+    v5 = isub v2, v4
+    return v5
+}
+
+; function %test2(i16, i16, i16) -> i16 fast {
+; block0(v0: i16, v1: i16, v3: i16):
+;     v6 = isub v0, v3
+;     v7 = ishl v6, v1
+;     return v7
+; }
+
+function %test3(i32, i32, i32) -> i32 {
+block0(v0: i32, v1: i32, v3: i32):
+    v2 = ishl v0, v1
+    v4 = ishl v3, v1
+    v5 = isub v2, v4
+    return v5
+}
+
+; function %test3(i32, i32, i32) -> i32 fast {
+; block0(v0: i32, v1: i32, v3: i32):
+;     v6 = isub v0, v3
+;     v7 = ishl v6, v1
+;     return v7
+; }
+
+function %test4(i64, i64, i64) -> i64 {
+block0(v0: i64, v1: i64, v3: i64):
+    v2 = ishl v0, v1
+    v4 = ishl v3, v1
+    v5 = isub v2, v4
+    return v5
+}
+
+; function %test4(i64, i64, i64) -> i64 fast {
+; block0(v0: i64, v1: i64, v3: i64):
+;     v6 = isub v0, v3
+;     v7 = ishl v6, v1
+;     return v7
+; }
+