Commit 7a5ba65
authored
[AArch64] optimize vselect of bitcast (llvm#180375)
Using code/ideas from the x86 backend to optimize a select on a bitcast
integer. The previous aarch64 approach was to individually extract the
bits from the mask, which is kind of terrible.
https://rust.godbolt.org/z/576sndT66
```llvm
define void @if_then_else8(ptr %out, i8 %mask, ptr %if_true, ptr %if_false) {
start:
%t = load <8 x i32>, ptr %if_true, align 4
%f = load <8 x i32>, ptr %if_false, align 4
%m = bitcast i8 %mask to <8 x i1>
%s = select <8 x i1> %m, <8 x i32> %t, <8 x i32> %f
store <8 x i32> %s, ptr %out, align 4
ret void
}
```
turned into
```asm
if_then_else8: // @if_then_else8
sub sp, sp, llvm#16
ubfx w8, w1, llvm#4, #1
and w11, w1, #0x1
ubfx w9, w1, llvm#5, #1
fmov s1, w11
ubfx w10, w1, #1, #1
fmov s0, w8
ubfx w8, w1, llvm#6, #1
ldp q5, q2, [x3]
mov v1.h[1], w10
ldp q4, q3, [x2]
mov v0.h[1], w9
ubfx w9, w1, #2, #1
mov v1.h[2], w9
ubfx w9, w1, llvm#3, #1
mov v0.h[2], w8
ubfx w8, w1, llvm#7, #1
mov v1.h[3], w9
mov v0.h[3], w8
ushll v1.4s, v1.4h, #0
ushll v0.4s, v0.4h, #0
shl v1.4s, v1.4s, llvm#31
shl v0.4s, v0.4s, llvm#31
cmlt v1.4s, v1.4s, #0
cmlt v0.4s, v0.4s, #0
bsl v1.16b, v4.16b, v5.16b
bsl v0.16b, v3.16b, v2.16b
stp q1, q0, [x0]
add sp, sp, llvm#16
ret
```
With this PR that instead emits
```asm
if_then_else8:
adrp x8, .LCPI0_1
dup v0.4s, w1
ldr q1, [x8, :lo12:.LCPI0_1]
adrp x8, .LCPI0_0
ldr q2, [x8, :lo12:.LCPI0_0]
ldp q4, q3, [x2]
and v1.16b, v0.16b, v1.16b
and v0.16b, v0.16b, v2.16b
ldp q5, q2, [x3]
cmeq v1.4s, v1.4s, #0
cmeq v0.4s, v0.4s, #0
bsl v1.16b, v2.16b, v3.16b
bsl v0.16b, v5.16b, v4.16b
stp q0, q1, [x0]
ret
```
So substantially shorter. Instead of building the mask
element-by-element, this approach (by virtue of not splitting) instead
splats the mask value into all vector lanes, performs a bitwise and with
powers of 2, and compares with zero to construct the mask vector.
cc rust-lang/rust#122376
cc llvm#1757691 parent 9e95cff commit 7a5ba65
File tree
2 files changed
+1220
-4
lines changed- llvm
- lib/Target/AArch64
- test/CodeGen/AArch64
2 files changed
+1220
-4
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
24368 | 24368 | | |
24369 | 24369 | | |
24370 | 24370 | | |
| 24371 | + | |
| 24372 | + | |
| 24373 | + | |
| 24374 | + | |
| 24375 | + | |
| 24376 | + | |
| 24377 | + | |
| 24378 | + | |
| 24379 | + | |
| 24380 | + | |
| 24381 | + | |
| 24382 | + | |
| 24383 | + | |
| 24384 | + | |
| 24385 | + | |
| 24386 | + | |
| 24387 | + | |
| 24388 | + | |
| 24389 | + | |
| 24390 | + | |
| 24391 | + | |
| 24392 | + | |
| 24393 | + | |
| 24394 | + | |
| 24395 | + | |
| 24396 | + | |
| 24397 | + | |
| 24398 | + | |
| 24399 | + | |
| 24400 | + | |
| 24401 | + | |
| 24402 | + | |
| 24403 | + | |
| 24404 | + | |
| 24405 | + | |
| 24406 | + | |
| 24407 | + | |
| 24408 | + | |
| 24409 | + | |
| 24410 | + | |
| 24411 | + | |
| 24412 | + | |
| 24413 | + | |
| 24414 | + | |
| 24415 | + | |
| 24416 | + | |
| 24417 | + | |
| 24418 | + | |
| 24419 | + | |
| 24420 | + | |
| 24421 | + | |
| 24422 | + | |
| 24423 | + | |
| 24424 | + | |
| 24425 | + | |
| 24426 | + | |
| 24427 | + | |
| 24428 | + | |
| 24429 | + | |
| 24430 | + | |
| 24431 | + | |
| 24432 | + | |
| 24433 | + | |
| 24434 | + | |
| 24435 | + | |
| 24436 | + | |
| 24437 | + | |
| 24438 | + | |
| 24439 | + | |
| 24440 | + | |
| 24441 | + | |
| 24442 | + | |
| 24443 | + | |
| 24444 | + | |
| 24445 | + | |
| 24446 | + | |
| 24447 | + | |
| 24448 | + | |
| 24449 | + | |
| 24450 | + | |
| 24451 | + | |
| 24452 | + | |
| 24453 | + | |
24371 | 24454 | | |
24372 | 24455 | | |
24373 | 24456 | | |
| |||
24432 | 24515 | | |
24433 | 24516 | | |
24434 | 24517 | | |
24435 | | - | |
| 24518 | + | |
| 24519 | + | |
24436 | 24520 | | |
24437 | 24521 | | |
24438 | 24522 | | |
| |||
24455 | 24539 | | |
24456 | 24540 | | |
24457 | 24541 | | |
| 24542 | + | |
| 24543 | + | |
| 24544 | + | |
| 24545 | + | |
| 24546 | + | |
| 24547 | + | |
| 24548 | + | |
24458 | 24549 | | |
24459 | 24550 | | |
24460 | 24551 | | |
| |||
27712 | 27803 | | |
27713 | 27804 | | |
27714 | 27805 | | |
27715 | | - | |
| 27806 | + | |
| 27807 | + | |
| 27808 | + | |
| 27809 | + | |
| 27810 | + | |
27716 | 27811 | | |
27717 | 27812 | | |
27718 | 27813 | | |
| |||
27776 | 27871 | | |
27777 | 27872 | | |
27778 | 27873 | | |
| 27874 | + | |
| 27875 | + | |
| 27876 | + | |
| 27877 | + | |
| 27878 | + | |
| 27879 | + | |
| 27880 | + | |
| 27881 | + | |
| 27882 | + | |
| 27883 | + | |
| 27884 | + | |
| 27885 | + | |
| 27886 | + | |
| 27887 | + | |
27779 | 27888 | | |
27780 | 27889 | | |
27781 | 27890 | | |
| |||
29188 | 29297 | | |
29189 | 29298 | | |
29190 | 29299 | | |
29191 | | - | |
| 29300 | + | |
29192 | 29301 | | |
29193 | 29302 | | |
29194 | 29303 | | |
| |||
29200 | 29309 | | |
29201 | 29310 | | |
29202 | 29311 | | |
29203 | | - | |
| 29312 | + | |
29204 | 29313 | | |
29205 | 29314 | | |
29206 | 29315 | | |
| |||
0 commit comments