Skip to content

Commit f90ddeb

Browse files
authored
Handle some float(widening_op(uint16, uint16)) better in x86 backend (#8976)
Fixes #8913 I have inspected manually, but no test because the place to put one would be simd_op_check_x86, but there was a deliberate decision not to assert on instruction selection outcomes for cast patterns on x86, because LLVM frequently changes it.
1 parent f3c4822 commit f90ddeb

1 file changed

Lines changed: 26 additions & 3 deletions

File tree

src/CodeGen_X86.cpp

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -566,13 +566,36 @@ void CodeGen_X86::visit(const Cast *op) {
566566
}
567567
}
568568

569-
if (const Call *mul = Call::as_intrinsic(op->value, {Call::widening_mul})) {
570-
if (src.bits() < dst.bits() && dst.bits() <= 32) {
569+
if (const Call *widening_op = Call::as_intrinsic(op->value, {Call::widening_mul, Call::widening_add, Call::widening_sub})) {
570+
bool should_upcast_args_to_dst_type =
571+
dst.can_represent(widening_op->args[0].type()) &&
572+
dst.can_represent(widening_op->args[1].type()) &&
571573
// LLVM/x86 really doesn't like 8 -> 16 bit multiplication. If we're
572574
// widening to 32-bits after a widening multiply, LLVM prefers to see a
573575
// widening multiply directly to 32-bits. This may result in extra
574576
// casts, so simplify to remove them.
575-
value = codegen(simplify(Mul::make(Cast::make(dst, mul->args[0]), Cast::make(dst, mul->args[1]))));
577+
((widening_op->is_intrinsic(Call::widening_mul) &&
578+
src.bits() < dst.bits() &&
579+
dst.bits() <= 32) ||
580+
// X86 doesn't have uint to float conversions before avx512
581+
(!target.has_feature(Target::AVX512) &&
582+
src.is_uint() &&
583+
src.bits() >= 32 &&
584+
dst.is_float()));
585+
586+
if (should_upcast_args_to_dst_type) {
587+
Expr arg0 = Cast::make(dst, widening_op->args[0]);
588+
Expr arg1 = Cast::make(dst, widening_op->args[1]);
589+
Expr equiv;
590+
if (widening_op->is_intrinsic(Call::widening_mul)) {
591+
equiv = arg0 * arg1;
592+
} else if (widening_op->is_intrinsic(Call::widening_add)) {
593+
equiv = arg0 + arg1;
594+
} else {
595+
internal_assert(widening_op->is_intrinsic(Call::widening_sub));
596+
equiv = arg0 - arg1;
597+
}
598+
value = codegen(simplify(equiv));
576599
return;
577600
}
578601
}

0 commit comments

Comments
 (0)