@@ -399,7 +399,7 @@ int lapack_dgesvd(char *jobu, char *jobvt, aocl_int64_t *m, aocl_int64_t *n, dou
399399 if (wntun )
400400 {
401401 /* Path 1 (M much larger than N, JOBU='N') */
402- if ((!wntvo ) && (* m <= FLA_SVD_SMALL_SIZE_THRESH2 )
402+ if ((!wntvo ) && (* m <= FLA_SVD_SMALL_SIZE_THRESH1 )
403403 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
404404 {
405405 i__2 = * n << 2 ;
@@ -575,7 +575,7 @@ int lapack_dgesvd(char *jobu, char *jobvt, aocl_int64_t *m, aocl_int64_t *n, dou
575575 {
576576 /* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or */
577577 /* 'A') */
578- if (* m <= FLA_SVD_SMALL_SIZE_THRESH2 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
578+ if (* m <= FLA_SVD_SMALL_SIZE_THRESH1 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
579579 {
580580 i__2 = * n * 3 + * m ;
581581 maxwrk = fla_max (i__2 , bdspac );
@@ -724,9 +724,8 @@ int lapack_dgesvd(char *jobu, char *jobvt, aocl_int64_t *m, aocl_int64_t *n, dou
724724 else
725725 {
726726 /* Path 10 (M at least N, but not much larger) */
727- if (!(wntuo || wntvo ) && (((wntun && wntvn ) && (* m < FLA_SVD_SMALL_SIZE_THRESH3 )))
728- || ((wntuas || wntvas ) && (* m < FLA_SVD_SMALL_SIZE_THRESH1 ))
729- && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
727+ if (((wntun || wntus ) && (wntvn || wntvs ) && (* m < FLA_SVD_SMALL_SIZE_THRESH1 ))
728+ && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
730729 {
731730 i__2 = * n * 3 + * m ;
732731 maxwrk = fla_max (i__2 , bdspac );
@@ -782,7 +781,7 @@ int lapack_dgesvd(char *jobu, char *jobvt, aocl_int64_t *m, aocl_int64_t *n, dou
782781 if (wntvn )
783782 {
784783 /* Path 1t(N much larger than M, JOBVT='N') */
785- if ((wntun && wntvn ) && (* n <= FLA_SVD_SMALL_SIZE_THRESH2 )
784+ if ((wntun && wntvn ) && (* n <= FLA_SVD_SMALL_SIZE_THRESH2 ) && ( * m < FLA_SVD_SMALL_SIZE_THRESH0 )
786785 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
787786 {
788787 i__2 = * m << 2 ;
@@ -956,7 +955,7 @@ int lapack_dgesvd(char *jobu, char *jobvt, aocl_int64_t *m, aocl_int64_t *n, dou
956955 {
957956 /* Path 6t(N much larger than M, JOBU='S' or 'A', */
958957 /* JOBVT='S') */
959- if (* n <= FLA_SVD_SMALL_SIZE_THRESH2 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
958+ if (* n <= FLA_SVD_SMALL_SIZE_THRESH1 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
960959 {
961960 i__2 = * m * 3 + * n ;
962961 minwrk = fla_max (i__2 , bdspac );
@@ -1793,7 +1792,7 @@ int lapack_dgesvd(char *jobu, char *jobvt, aocl_int64_t *m, aocl_int64_t *n, dou
17931792 /* Path 1 (M much larger than N, JOBU='N') */
17941793 /* No left singular vectors to be computed */
17951794#if FLA_ENABLE_AMD_OPT
1796- if ((!wntvo ) && (* m <= FLA_SVD_SMALL_SIZE_THRESH2 )
1795+ if ((!wntvo ) && (* m <= FLA_SVD_SMALL_SIZE_THRESH1 )
17971796 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
17981797 {
17991798 fla_dgesvd_small6 (0 , wntvas , m , n , & a [a_offset ], lda , NULL , ldu , & s [1 ], NULL ,
@@ -2384,7 +2383,7 @@ int lapack_dgesvd(char *jobu, char *jobvt, aocl_int64_t *m, aocl_int64_t *n, dou
23842383 /* N right singular vectors to be computed in VT */
23852384 /* Computing MAX */
23862385#if FLA_ENABLE_AMD_OPT
2387- if (* m <= FLA_SVD_SMALL_SIZE_THRESH2 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
2386+ if (* m <= FLA_SVD_SMALL_SIZE_THRESH1 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
23882387 {
23892388 fla_dgesvd_small6 (wntus , wntvas , m , n , & a [a_offset ], lda , & a [a_offset ], lda ,
23902389 & s [1 ], & u [u_offset ], ldu , & vt [vt_offset ], ldvt , & work [1 ],
@@ -2923,7 +2922,7 @@ int lapack_dgesvd(char *jobu, char *jobvt, aocl_int64_t *m, aocl_int64_t *n, dou
29232922 /* Path 10 (M at least N, but not much larger) */
29242923 /* Reduce to bidiagonal form without QR decomposition */
29252924#if FLA_ENABLE_AMD_OPT
2926- if (((wntun || wntus ) && (wntvn || wntvs ) && (* m < FLA_SVD_SMALL_SIZE_THRESH3 ))
2925+ if (((wntun || wntus ) && (wntvn || wntvs ) && (* m < FLA_SVD_SMALL_SIZE_THRESH1 ))
29272926 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
29282927 {
29292928 fla_dgesvd_xx_small10 (wntuas , wntvas , m , n , n , & a [a_offset ], lda , & s [1 ],
@@ -3048,7 +3047,7 @@ int lapack_dgesvd(char *jobu, char *jobvt, aocl_int64_t *m, aocl_int64_t *n, dou
30483047 itau = 1 ;
30493048 iwork = itau + * m ;
30503049#if FLA_ENABLE_AMD_OPT
3051- if ((wntun && wntvn ) && (* n <= FLA_SVD_SMALL_SIZE_THRESH2 )
3050+ if ((wntun && wntvn ) && (* n <= FLA_SVD_SMALL_SIZE_THRESH2 ) && ( * m < FLA_SVD_SMALL_SIZE_THRESH0 )
30523051 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 ))
30533052 {
30543053 /* Compute A=L*Q */
@@ -3636,7 +3635,7 @@ int lapack_dgesvd(char *jobu, char *jobvt, aocl_int64_t *m, aocl_int64_t *n, dou
36363635 /* M right singular vectors to be computed in VT and */
36373636 /* M left singular vectors to be computed in U */
36383637#if FLA_ENABLE_AMD_OPT
3639- if (* n <= FLA_SVD_SMALL_SIZE_THRESH2 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 )
3638+ if (* n <= FLA_SVD_SMALL_SIZE_THRESH1 && FLA_IS_MIN_ARCH_ID (FLA_ARCH_AVX2 )
36403639 && * lwork >= maxwrk )
36413640 {
36423641 iu = 1 ;
0 commit comments