Skip to content

Commit 775f467

Browse files
authored
Optimize looping over the lower triangular in fat matrix cases
1 parent 41dbca9 commit 775f467

File tree

12 files changed

+136
-112
lines changed

12 files changed

+136
-112
lines changed

lapack-netlib/SRC/clacpy.f

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,13 @@
55
* Online html documentation available at
66
* http://www.netlib.org/lapack/explore-html/
77
*
8-
*> \htmlonly
98
*> Download CLACPY + dependencies
109
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clacpy.f">
1110
*> [TGZ]</a>
1211
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clacpy.f">
1312
*> [ZIP]</a>
1413
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clacpy.f">
1514
*> [TXT]</a>
16-
*> \endhtmlonly
1715
*
1816
* Definition:
1917
* ===========
@@ -96,10 +94,11 @@
9694
*> \author Univ. of Colorado Denver
9795
*> \author NAG Ltd.
9896
*
99-
*> \ingroup complexOTHERauxiliary
97+
*> \ingroup lacpy
10098
*
10199
* =====================================================================
102100
SUBROUTINE CLACPY( UPLO, M, N, A, LDA, B, LDB )
101+
IMPLICIT NONE
103102
*
104103
* -- LAPACK auxiliary routine --
105104
* -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -135,7 +134,7 @@ SUBROUTINE CLACPY( UPLO, M, N, A, LDA, B, LDB )
135134
20 CONTINUE
136135
*
137136
ELSE IF( LSAME( UPLO, 'L' ) ) THEN
138-
DO 40 J = 1, N
137+
DO 40 J = 1, MIN( M, N )
139138
DO 30 I = J, M
140139
B( I, J ) = A( I, J )
141140
30 CONTINUE

lapack-netlib/SRC/clantr.f

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,13 @@
55
* Online html documentation available at
66
* http://www.netlib.org/lapack/explore-html/
77
*
8-
*> \htmlonly
98
*> Download CLANTR + dependencies
109
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clantr.f">
1110
*> [TGZ]</a>
1211
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clantr.f">
1312
*> [ZIP]</a>
1413
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clantr.f">
1514
*> [TXT]</a>
16-
*> \endhtmlonly
1715
*
1816
* Definition:
1917
* ===========
@@ -134,11 +132,13 @@
134132
*> \author Univ. of Colorado Denver
135133
*> \author NAG Ltd.
136134
*
137-
*> \ingroup complexOTHERauxiliary
135+
*> \ingroup lantr
138136
*
139137
* =====================================================================
140-
REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
138+
REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A,
139+
$ LDA,
141140
$ WORK )
141+
IMPLICIT NONE
142142
*
143143
* -- LAPACK auxiliary routine --
144144
* -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -188,14 +188,16 @@ REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
188188
DO 20 J = 1, N
189189
DO 10 I = 1, MIN( M, J-1 )
190190
SUM = ABS( A( I, J ) )
191-
IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
191+
IF( VALUE .LT. SUM .OR.
192+
$ SISNAN( SUM ) ) VALUE = SUM
192193
10 CONTINUE
193194
20 CONTINUE
194195
ELSE
195-
DO 40 J = 1, N
196+
DO 40 J = 1, MIN( M, N )
196197
DO 30 I = J + 1, M
197198
SUM = ABS( A( I, J ) )
198-
IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
199+
IF( VALUE .LT. SUM .OR.
200+
$ SISNAN( SUM ) ) VALUE = SUM
199201
30 CONTINUE
200202
40 CONTINUE
201203
END IF
@@ -205,14 +207,16 @@ REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
205207
DO 60 J = 1, N
206208
DO 50 I = 1, MIN( M, J )
207209
SUM = ABS( A( I, J ) )
208-
IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
210+
IF( VALUE .LT. SUM .OR.
211+
$ SISNAN( SUM ) ) VALUE = SUM
209212
50 CONTINUE
210213
60 CONTINUE
211214
ELSE
212-
DO 80 J = 1, N
215+
DO 80 J = 1, MIN( M, N )
213216
DO 70 I = J, M
214217
SUM = ABS( A( I, J ) )
215-
IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
218+
IF( VALUE .LT. SUM .OR.
219+
$ SISNAN( SUM ) ) VALUE = SUM
216220
70 CONTINUE
217221
80 CONTINUE
218222
END IF
@@ -239,7 +243,7 @@ REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
239243
IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
240244
110 CONTINUE
241245
ELSE
242-
DO 140 J = 1, N
246+
DO 140 J = 1, MIN( M, N )
243247
IF( UDIAG ) THEN
244248
SUM = ONE
245249
DO 120 I = J + 1, M
@@ -286,7 +290,7 @@ REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
286290
DO 220 I = N + 1, M
287291
WORK( I ) = ZERO
288292
220 CONTINUE
289-
DO 240 J = 1, N
293+
DO 240 J = 1, MIN( M, N )
290294
DO 230 I = J + 1, M
291295
WORK( I ) = WORK( I ) + ABS( A( I, J ) )
292296
230 CONTINUE
@@ -295,7 +299,7 @@ REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
295299
DO 250 I = 1, M
296300
WORK( I ) = ZERO
297301
250 CONTINUE
298-
DO 270 J = 1, N
302+
DO 270 J = 1, MIN( M, N )
299303
DO 260 I = J, M
300304
WORK( I ) = WORK( I ) + ABS( A( I, J ) )
301305
260 CONTINUE
@@ -307,36 +311,39 @@ REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
307311
SUM = WORK( I )
308312
IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM
309313
280 CONTINUE
310-
ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
314+
ELSE IF( ( LSAME( NORM, 'F' ) ) .OR.
315+
$ ( LSAME( NORM, 'E' ) ) ) THEN
311316
*
312317
* Find normF(A).
313318
*
314319
IF( LSAME( UPLO, 'U' ) ) THEN
315320
IF( LSAME( DIAG, 'U' ) ) THEN
316321
SCALE = ONE
317-
SUM = MIN( M, N )
322+
SUM = REAL( MIN( M, N ) )
318323
DO 290 J = 2, N
319-
CALL CLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM )
324+
CALL CLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE,
325+
$ SUM )
320326
290 CONTINUE
321327
ELSE
322328
SCALE = ZERO
323329
SUM = ONE
324330
DO 300 J = 1, N
325-
CALL CLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM )
331+
CALL CLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE,
332+
$ SUM )
326333
300 CONTINUE
327334
END IF
328335
ELSE
329336
IF( LSAME( DIAG, 'U' ) ) THEN
330337
SCALE = ONE
331-
SUM = MIN( M, N )
332-
DO 310 J = 1, N
338+
SUM = REAL( MIN( M, N ) )
339+
DO 310 J = 1, MIN( M, N )
333340
CALL CLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE,
334341
$ SUM )
335342
310 CONTINUE
336343
ELSE
337344
SCALE = ZERO
338345
SUM = ONE
339-
DO 320 J = 1, N
346+
DO 320 J = 1, MIN( M, N )
340347
CALL CLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM )
341348
320 CONTINUE
342349
END IF

lapack-netlib/SRC/clascl.f

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,13 @@
55
* Online html documentation available at
66
* http://www.netlib.org/lapack/explore-html/
77
*
8-
*> \htmlonly
98
*> Download CLASCL + dependencies
109
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clascl.f">
1110
*> [TGZ]</a>
1211
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clascl.f">
1312
*> [ZIP]</a>
1413
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clascl.f">
1514
*> [TXT]</a>
16-
*> \endhtmlonly
1715
*
1816
* Definition:
1917
* ===========
@@ -136,10 +134,12 @@
136134
*> \author Univ. of Colorado Denver
137135
*> \author NAG Ltd.
138136
*
139-
*> \ingroup complexOTHERauxiliary
137+
*> \ingroup lascl
140138
*
141139
* =====================================================================
142-
SUBROUTINE CLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO )
140+
SUBROUTINE CLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA,
141+
$ INFO )
142+
IMPLICIT NONE
143143
*
144144
* -- LAPACK auxiliary routine --
145145
* -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -291,7 +291,7 @@ SUBROUTINE CLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO )
291291
*
292292
* Lower triangular matrix
293293
*
294-
DO 50 J = 1, N
294+
DO 50 J = 1, MIN( M, N )
295295
DO 40 I = J, M
296296
A( I, J ) = A( I, J )*MUL
297297
40 CONTINUE

lapack-netlib/SRC/dlacpy.f

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,13 @@
55
* Online html documentation available at
66
* http://www.netlib.org/lapack/explore-html/
77
*
8-
*> \htmlonly
98
*> Download DLACPY + dependencies
109
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlacpy.f">
1110
*> [TGZ]</a>
1211
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlacpy.f">
1312
*> [ZIP]</a>
1413
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlacpy.f">
1514
*> [TXT]</a>
16-
*> \endhtmlonly
1715
*
1816
* Definition:
1917
* ===========
@@ -96,10 +94,11 @@
9694
*> \author Univ. of Colorado Denver
9795
*> \author NAG Ltd.
9896
*
99-
*> \ingroup OTHERauxiliary
97+
*> \ingroup lacpy
10098
*
10199
* =====================================================================
102100
SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
101+
IMPLICIT NONE
103102
*
104103
* -- LAPACK auxiliary routine --
105104
* -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -134,7 +133,7 @@ SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
134133
10 CONTINUE
135134
20 CONTINUE
136135
ELSE IF( LSAME( UPLO, 'L' ) ) THEN
137-
DO 40 J = 1, N
136+
DO 40 J = 1, MIN( M, N )
138137
DO 30 I = J, M
139138
B( I, J ) = A( I, J )
140139
30 CONTINUE

0 commit comments

Comments
 (0)