Skip to content

Commit d1eaced

Browse files
zhongjuzheIncarnation-p-lee
authored andcommitted
RISC-V: Disallow transformation into VLMAX AVL for cond_len_xxx when length is in range [0, 31]
Notice we have this following situation: vsetivli zero,4,e32,m1,ta,ma vlseg4e32.v v4,(a5) vlseg4e32.v v12,(a3) vsetvli a5,zero,e32,m1,tu,ma ---> This is redundant since VLMAX AVL = 4 when it is fixed-vlmax vfadd.vf v3,v13,fa0 vfadd.vf v1,v12,fa1 vfmul.vv v17,v3,v5 vfmul.vv v16,v1,v5 The rootcause is that we transform COND_LEN_xxx into VLMAX AVL when len == NUNITS blindly. However, we don't need to transform all of them since when len is range of [0,31], we don't need to consume scalar registers. After this patch: vsetivli zero,4,e32,m1,tu,ma addi a4,a5,400 vlseg4e32.v v12,(a3) vfadd.vf v3,v13,fa0 vfadd.vf v1,v12,fa1 vlseg4e32.v v4,(a4) vfadd.vf v2,v14,fa1 vfmul.vv v17,v3,v5 vfmul.vv v16,v1,v5 Tested on both RV32 and RV64 no regression. Ok for trunk ? gcc/ChangeLog: * config/riscv/riscv-v.cc (is_vlmax_len_p): New function. (expand_load_store): Disallow transformation into VLMAX when len is in range of [0,31] (expand_cond_len_op): Ditto. (expand_gather_scatter): Ditto. (expand_lanes_load_store): Ditto. (expand_fold_extract_last): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/post-ra-avl.c: Adapt test. * gcc.target/riscv/rvv/base/vf_avl-2.c: New test.
1 parent 7de05ad commit d1eaced

File tree

3 files changed

+37
-7
lines changed

3 files changed

+37
-7
lines changed

gcc/config/riscv/riscv-v.cc

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,16 @@ imm_avl_p (machine_mode mode)
6868
: false;
6969
}
7070

71+
/* Return true if LEN is equal to NUNITS that out of the range [0, 31]. */
72+
static bool
73+
is_vlmax_len_p (machine_mode mode, rtx len)
74+
{
75+
poly_int64 value;
76+
return poly_int_rtx_p (len, &value)
77+
&& known_eq (value, GET_MODE_NUNITS (mode))
78+
&& !satisfies_constraint_K (len);
79+
}
80+
7181
/* Helper functions for insn_flags && insn_types */
7282

7383
/* Return true if caller need pass mask operand for insn pattern with
@@ -3776,7 +3786,7 @@ expand_load_store (rtx *ops, bool is_load)
37763786
rtx len = ops[3];
37773787
machine_mode mode = GET_MODE (ops[0]);
37783788

3779-
if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
3789+
if (is_vlmax_len_p (mode, len))
37803790
{
37813791
/* If the length operand is equal to VF, it is VLMAX load/store. */
37823792
if (is_load)
@@ -3842,8 +3852,7 @@ expand_cond_len_op (unsigned icode, insn_flags op_type, rtx *ops, rtx len)
38423852
machine_mode mask_mode = GET_MODE (mask);
38433853
poly_int64 value;
38443854
bool is_dummy_mask = rtx_equal_p (mask, CONSTM1_RTX (mask_mode));
3845-
bool is_vlmax_len
3846-
= poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode));
3855+
bool is_vlmax_len = is_vlmax_len_p (mode, len);
38473856

38483857
unsigned insn_flags = HAS_DEST_P | HAS_MASK_P | HAS_MERGE_P | op_type;
38493858
if (is_dummy_mask)
@@ -4012,7 +4021,7 @@ expand_gather_scatter (rtx *ops, bool is_load)
40124021
unsigned inner_offsize = GET_MODE_BITSIZE (inner_idx_mode);
40134022
poly_int64 nunits = GET_MODE_NUNITS (vec_mode);
40144023
poly_int64 value;
4015-
bool is_vlmax = poly_int_rtx_p (len, &value) && known_eq (value, nunits);
4024+
bool is_vlmax = is_vlmax_len_p (vec_mode, len);
40164025

40174026
/* Extend the offset element to address width. */
40184027
if (inner_offsize < BITS_PER_WORD)
@@ -4199,7 +4208,7 @@ expand_lanes_load_store (rtx *ops, bool is_load)
41994208
rtx reg = is_load ? ops[0] : ops[1];
42004209
machine_mode mode = GET_MODE (ops[0]);
42014210

4202-
if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
4211+
if (is_vlmax_len_p (mode, len))
42034212
{
42044213
/* If the length operand is equal to VF, it is VLMAX load/store. */
42054214
if (is_load)
@@ -4252,7 +4261,7 @@ expand_fold_extract_last (rtx *ops)
42524261
rtx slide_vect = gen_reg_rtx (mode);
42534262
insn_code icode;
42544263

4255-
if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
4264+
if (is_vlmax_len_p (mode, len))
42564265
len = NULL_RTX;
42574266

42584267
/* Calculate the number of 1-bit in mask. */

gcc/testsuite/gcc.target/riscv/rvv/autovec/post-ra-avl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ int foo() {
1313
return a;
1414
}
1515

16-
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero} 1 } } */
16+
/* { dg-final { scan-assembler-not {vsetvli\s+[a-x0-9]+,\s*zero} } } */
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax" } */
3+
4+
float f[12][100];
5+
6+
void bad1(float v1, float v2)
7+
{
8+
for (int r = 0; r < 100; r += 4)
9+
{
10+
int i = r + 1;
11+
f[0][r] = f[1][r] * (f[2][r] + v2) - f[1][i] * (f[2][i] + v1);
12+
f[0][i] = f[1][r] * (f[2][i] + v1) + f[1][i] * (f[2][r] + v2);
13+
f[0][r+2] = f[1][r+2] * (f[2][r+2] + v2) - f[1][i+2] * (f[2][i+2] + v1);
14+
f[0][i+2] = f[1][r+2] * (f[2][i+2] + v1) + f[1][i+2] * (f[2][r+2] + v2);
15+
}
16+
}
17+
18+
/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */
19+
/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */
20+
/* { dg-final { scan-assembler-times {vsetivli} 2 } } */
21+
/* { dg-final { scan-assembler-not {vsetvli} } } */

0 commit comments

Comments
 (0)