Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions kernel/riscv64/gemv_n_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m8)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m8)
#define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f32m8)
#define VFILL_ZERO_FLOAT RISCV_RVV(vfsub_vv_f32m8)
#define VREINTERPRET_FLOAT RISCV_RVV(vreinterpret_v_i32m8_f32m8)
#define VFILL_INT RISCV_RVV(vmv_v_x_i32m8)
#else
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define FLOAT_V_T vfloat64m4_t
Expand All @@ -45,7 +46,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
#define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f64m4)
#define VFILL_ZERO_FLOAT RISCV_RVV(vfsub_vv_f64m4)
#define VREINTERPRET_FLOAT RISCV_RVV(vreinterpret_v_i64m4_f64m4)
#define VFILL_INT RISCV_RVV(vmv_v_x_i64m4)
#endif

int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
Expand All @@ -56,7 +58,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
if(n < 0) return(0);
FLOAT *a_ptr = a;
FLOAT temp[4];
FLOAT_V_T va0, va1, vy0, vy1,vy0_temp, vy1_temp , temp_v ,va0_0 , va0_1 , va1_0 ,va1_1 ,va2_0 ,va2_1 ,va3_0 ,va3_1 ;
FLOAT_V_T va0, va1, vy0, vy1,vy0_temp, vy1_temp ,va0_0 , va0_1 , va1_0 ,va1_1 ,va2_0 ,va2_1 ,va3_0 ,va3_1 ;
unsigned int gvl = 0;
if(inc_y == 1 && inc_x == 1){
gvl = VSETVL(m);
Expand All @@ -66,8 +68,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
ix = 0;
vy0_temp = VLEV_FLOAT(&y[j], gvl);
vy1_temp = VLEV_FLOAT(&y[j+gvl], gvl);
vy0 = VFILL_ZERO_FLOAT(vy0 , vy0 , gvl);
vy1 = VFILL_ZERO_FLOAT(vy1 , vy1 , gvl);
vy0 = VREINTERPRET_FLOAT(VFILL_INT(0, gvl));
vy1 = VREINTERPRET_FLOAT(VFILL_INT(0, gvl));
int i;

int remainder = n % 4;
Expand Down Expand Up @@ -118,7 +120,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
a_ptr = a;
ix = 0;
vy0_temp = VLEV_FLOAT(&y[j], gvl);
vy0 = VFILL_ZERO_FLOAT(vy0 , vy0 , gvl);
vy0 = VREINTERPRET_FLOAT(VFILL_INT(0, gvl));
int i;

int remainder = n % 4;
Expand Down Expand Up @@ -251,4 +253,4 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
}
}
return(0);
}
}
2 changes: 1 addition & 1 deletion kernel/riscv64/omatcopy_cn_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLO
FLOAT *aptr,*bptr;
size_t vl;

FLOAT_V_T va, vb,va1,vb1;
FLOAT_V_T va,va1;
if ( rows <= 0 ) return(0);
if ( cols <= 0 ) return(0);

Expand Down
1 change: 0 additions & 1 deletion kernel/riscv64/zamax_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
FLOAT_V_T_M1 v_res;
v_res = VFMVVF_FLOAT_M1(0, 1);

MASK_T mask0, mask1;
BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2;
gvl = VSETVL(n);
v_max = VFMVVF_FLOAT(0, gvl);
Expand Down
1 change: 0 additions & 1 deletion kernel/riscv64/zamin_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
FLOAT_V_T_M1 v_res;
v_res = VFMVVF_FLOAT_M1(FLT_MAX, 1);

MASK_T mask0, mask1;
BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2;
gvl = VSETVL(n);
v_min = VFMVVF_FLOAT(FLT_MAX, gvl);
Expand Down
4 changes: 1 addition & 3 deletions kernel/riscv64/zasum_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
FLOAT asumf=0.0;
if (n <= 0 || inc_x <= 0) return(asumf);
unsigned int gvl = 0;
FLOAT_V_T v0, v1, v_zero,v_sum;
FLOAT_V_T v0, v1,v_sum;
FLOAT_V_T_M1 v_res;
v_res = VFMVVF_FLOAT_M1(0, 1);

if(inc_x == 1){
BLASLONG n2 = n * 2;
gvl = VSETVL(n2);
v_zero = VFMVVF_FLOAT(0, gvl);
if(gvl <= n2/2){
v_sum = VFMVVF_FLOAT(0, gvl);
for(i=0,j=0; i<n2/(gvl*2); i++){
Expand All @@ -108,7 +107,6 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
}else{
gvl = VSETVL(n);
unsigned int stride_x = inc_x * sizeof(FLOAT) * 2;
v_zero = VFMVVF_FLOAT(0, gvl);

BLASLONG inc_xv = inc_x * 2 * gvl;
v_sum = VFMVVF_FLOAT(0, gvl);
Expand Down
4 changes: 2 additions & 2 deletions kernel/riscv64/zgemv_n_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
BLASLONG i = 0, j = 0, k = 0;
BLASLONG ix = 0, iy = 0;
FLOAT *a_ptr = a;
FLOAT temp_r = 0.0, temp_i = 0.0, temp_r1, temp_i1, temp_r2, temp_i2, temp_r3, temp_i3, temp_rr[4], temp_ii[4];
FLOAT_V_T va0, va1, vy0, vy1, vy0_new, vy1_new, va2, va3, va4, va5, va6, va7, temp_iv, temp_rv, x_v0, x_v1, temp_v1, temp_v2, temp_v3, temp_v4;
FLOAT temp_r = 0.0, temp_i = 0.0, temp_rr[4], temp_ii[4];
FLOAT_V_T va0, va1, vy0, vy1, vy0_new, vy1_new, va2, va3, va4, va5, va6, va7, temp_iv, temp_rv, x_v0, x_v1;
unsigned int gvl = 0;
BLASLONG stride_a = sizeof(FLOAT) * 2;
BLASLONG stride_y = inc_y * sizeof(FLOAT) * 2;
Expand Down
4 changes: 1 addition & 3 deletions kernel/riscv64/zsum_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
FLOAT asumf=0.0;
if (n <= 0 || inc_x <= 0) return(asumf);
unsigned int gvl = 0;
FLOAT_V_T v0, v1, v_zero,v_sum;
FLOAT_V_T v0, v1,v_sum;
FLOAT_V_T_M1 v_res;
v_res = VFMVVF_FLOAT_M1(0, 1);

if(inc_x == 1){
BLASLONG n2 = n * 2;
gvl = VSETVL(n2);
v_zero = VFMVVF_FLOAT(0, gvl);
if(gvl <= n2/2){
v_sum = VFMVVF_FLOAT(0, gvl);
for(i=0,j=0; i<n2/(gvl*2); i++){
Expand All @@ -100,7 +99,6 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
}else{
gvl = VSETVL(n);
unsigned int stride_x = inc_x * sizeof(FLOAT) * 2;
v_zero = VFMVVF_FLOAT(0, gvl);

BLASLONG inc_xv = inc_x * 2 * gvl;
v_sum = VFMVVF_FLOAT(0, gvl);
Expand Down
Loading