@@ -62,6 +62,7 @@ function ctrsv( uplo, trans, diag, N, A, strideA1, strideA2, offsetA, x, strideX
6262 var magsq ;
6363 var remul ;
6464 var immul ;
65+ var ixend ;
6566 var isrm ;
6667 var sign ;
6768 var rex ;
@@ -119,13 +120,13 @@ function ctrsv( uplo, trans, diag, N, A, strideA1, strideA2, offsetA, x, strideX
119120 ( isrm && uplo === 'lower' && trans !== 'no-transpose' )
120121 ) {
121122 ix1 = ox + ( ( N - 1 ) * sx ) ;
123+ oa2 = oa + ( ( sa1 + sa0 ) * ( N - 1 ) ) ;
122124 for ( i1 = N - 1 ; i1 >= 0 ; i1 -- ) {
123125 rex = viewX [ ix1 ] ;
124126 imx = viewX [ ix1 + 1 ] ;
125127 if ( rex !== 0.0 || imx !== 0.0 ) {
126- oa2 = oa + ( sa1 * i1 ) ;
127128 if ( nonunit ) {
128- ia = oa2 + ( sa0 * i1 ) ;
129+ ia = oa2 ;
129130 rea = viewA [ ia ] ;
130131 ima = sign * viewA [ ia + 1 ] ;
131132 magsq = f32 ( ( rea * rea ) + ( ima * ima ) ) ;
@@ -137,10 +138,9 @@ function ctrsv( uplo, trans, diag, N, A, strideA1, strideA2, offsetA, x, strideX
137138 retmp = rex ;
138139 imtmp = imx ;
139140 }
140- ix0 = ix1 ;
141+ ia = oa2 - sa0 ;
142+ ix0 = ix1 - sx ;
141143 for ( i0 = i1 - 1 ; i0 >= 0 ; i0 -- ) {
142- ix0 -= sx ;
143- ia = oa2 + ( sa0 * i0 ) ;
144144 rea = viewA [ ia ] ;
145145 ima = sign * viewA [ ia + 1 ] ;
146146 rex = viewX [ ix0 ] ;
@@ -149,8 +149,11 @@ function ctrsv( uplo, trans, diag, N, A, strideA1, strideA2, offsetA, x, strideX
149149 immul = f32 ( ( retmp * ima ) + ( imtmp * rea ) ) ;
150150 viewX [ ix0 ] = f32 ( rex - remul ) ;
151151 viewX [ ix0 + 1 ] = f32 ( imx - immul ) ;
152+ ix0 -= sx ;
153+ ia -= sa0 ;
152154 }
153155 }
156+ oa2 -= ( sa1 + sa0 ) ;
154157 ix1 -= sx ;
155158 }
156159 return x ;
@@ -160,13 +163,13 @@ function ctrsv( uplo, trans, diag, N, A, strideA1, strideA2, offsetA, x, strideX
160163 ( isrm && uplo === 'upper' && trans !== 'no-transpose' )
161164 ) {
162165 ix1 = ox ;
166+ oa2 = oa ;
163167 for ( i1 = 0 ; i1 < N ; i1 ++ ) {
164168 rex = viewX [ ix1 ] ;
165169 imx = viewX [ ix1 + 1 ] ;
166170 if ( rex !== 0.0 || imx !== 0.0 ) {
167- oa2 = oa + ( sa1 * i1 ) ;
168171 if ( nonunit ) {
169- ia = oa2 + ( sa0 * i1 ) ;
172+ ia = oa2 ;
170173 rea = viewA [ ia ] ;
171174 ima = sign * viewA [ ia + 1 ] ;
172175 magsq = f32 ( ( rea * rea ) + ( ima * ima ) ) ;
@@ -178,10 +181,9 @@ function ctrsv( uplo, trans, diag, N, A, strideA1, strideA2, offsetA, x, strideX
178181 retmp = rex ;
179182 imtmp = imx ;
180183 }
181- ix0 = ix1 ;
184+ ia = oa2 + sa0 ;
185+ ix0 = ix1 + sx ;
182186 for ( i0 = i1 + 1 ; i0 < N ; i0 ++ ) {
183- ix0 += sx ;
184- ia = oa2 + ( sa0 * i0 ) ;
185187 rea = viewA [ ia ] ;
186188 ima = sign * viewA [ ia + 1 ] ;
187189 rex = viewX [ ix0 ] ;
@@ -190,8 +192,11 @@ function ctrsv( uplo, trans, diag, N, A, strideA1, strideA2, offsetA, x, strideX
190192 immul = f32 ( ( retmp * ima ) + ( imtmp * rea ) ) ;
191193 viewX [ ix0 ] = f32 ( rex - remul ) ;
192194 viewX [ ix0 + 1 ] = f32 ( imx - immul ) ;
195+ ia += sa0 ;
196+ ix0 += sx ;
193197 }
194198 }
199+ oa2 += ( sa1 + sa0 ) ;
195200 ix1 += sx ;
196201 }
197202 return x ;
@@ -201,25 +206,25 @@ function ctrsv( uplo, trans, diag, N, A, strideA1, strideA2, offsetA, x, strideX
201206 ( isrm && uplo === 'lower' && trans === 'no-transpose' )
202207 ) {
203208 ix1 = ox ;
209+ oa2 = oa ;
204210 for ( i1 = 0 ; i1 < N ; i1 ++ ) {
205- oa2 = oa + ( sa1 * i1 ) ;
206211 rex = viewX [ ix1 ] ;
207212 imx = viewX [ ix1 + 1 ] ;
208213 retmp = rex ;
209214 imtmp = imx ;
210215 ix0 = ox ;
216+ ia = oa2 ;
211217 for ( i0 = 0 ; i0 < i1 ; i0 ++ ) {
212- ia = oa2 + ( sa0 * i0 ) ;
213218 rea = viewA [ ia ] ;
214219 ima = sign * viewA [ ia + 1 ] ;
215220 rex = viewX [ ix0 ] ;
216221 imx = viewX [ ix0 + 1 ] ;
217222 retmp = f32 ( retmp - f32 ( ( rex * rea ) - ( imx * ima ) ) ) ;
218223 imtmp = f32 ( imtmp - f32 ( ( rex * ima ) + ( imx * rea ) ) ) ;
219224 ix0 += sx ;
225+ ia += sa0 ;
220226 }
221227 if ( nonunit ) {
222- ia = oa2 + ( sa0 * i1 ) ;
223228 rea = viewA [ ia ] ;
224229 ima = sign * viewA [ ia + 1 ] ;
225230 magsq = f32 ( ( rea * rea ) + ( ima * ima ) ) ;
@@ -231,30 +236,32 @@ function ctrsv( uplo, trans, diag, N, A, strideA1, strideA2, offsetA, x, strideX
231236 viewX [ ix1 ] = retmp ;
232237 viewX [ ix1 + 1 ] = imtmp ;
233238 ix1 += sx ;
239+ oa2 += sa1 ;
234240 }
235241 return x ;
236242 }
237243 // ( !isrm && uplo === 'lower' && trans !== 'no-transpose' ) || ( isrm && uplo === 'upper' && trans === 'no-transpose' )
238244 ix1 = ox + ( ( N - 1 ) * sx ) ;
245+ oa2 = oa + ( ( sa1 + sa0 ) * ( N - 1 ) ) ;
246+ ixend = ox + ( ( N - 1 ) * sx ) ;
239247 for ( i1 = N - 1 ; i1 >= 0 ; i1 -- ) {
240- oa2 = oa + ( sa1 * i1 ) ;
241248 rex = viewX [ ix1 ] ;
242249 imx = viewX [ ix1 + 1 ] ;
243250 retmp = rex ;
244251 imtmp = imx ;
245- ix0 = ox + ( ( N - 1 ) * sx ) ;
252+ ix0 = ixend ;
253+ ia = oa2 ;
246254 for ( i0 = N - 1 ; i0 > i1 ; i0 -- ) {
247- ia = oa2 + ( sa0 * i0 ) ;
248255 rea = viewA [ ia ] ;
249256 ima = sign * viewA [ ia + 1 ] ;
250257 rex = viewX [ ix0 ] ;
251258 imx = viewX [ ix0 + 1 ] ;
252259 retmp = f32 ( retmp - f32 ( ( rex * rea ) - ( imx * ima ) ) ) ;
253260 imtmp = f32 ( imtmp - f32 ( ( rex * ima ) + ( imx * rea ) ) ) ;
261+ ia -= sa0 ;
254262 ix0 -= sx ;
255263 }
256264 if ( nonunit ) {
257- ia = oa2 + ( sa0 * i1 ) ;
258265 rea = viewA [ ia ] ;
259266 ima = sign * viewA [ ia + 1 ] ;
260267 magsq = f32 ( ( rea * rea ) + ( ima * ima ) ) ;
@@ -266,6 +273,7 @@ function ctrsv( uplo, trans, diag, N, A, strideA1, strideA2, offsetA, x, strideX
266273 viewX [ ix1 ] = retmp ;
267274 viewX [ ix1 + 1 ] = imtmp ;
268275 ix1 -= sx ;
276+ oa2 -= sa1 ;
269277 }
270278 return x ;
271279}
0 commit comments