diff --git a/.cirrus.yml b/.cirrus.yml index bc7fbdf77d..7481d4bfd2 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -89,14 +89,16 @@ task: type: text/plain macos_instance: - image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest + image: ghcr.io/cirruslabs/macos-tahoe-xcode:latest task: name: AppleM1/LLVM armv7-androidndk xbuild compile_script: - brew install --cask android-ndk - export ANDROID_NDK_HOME="/opt/homebrew/share/android-ndk" - export CC=/opt/homebrew/share/android-ndk/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi23-clang - - make TARGET=ARMV7 ARM_SOFTFP_ABI=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l" + - export AR=/opt/homebrew/share/android-ndk/toolchains/llvm/prebuilt/darwin-x86_64/bin/llvm-ar + - export RANLIB=/opt/homebrew/share/android-ndk/toolchains/llvm/prebuilt/darwin-x86_64/bin/llvm-ranlib + - make TARGET=ARMV7 ARM_SOFTFP_ABI=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 always: config_artifacts: path: "*conf*" diff --git a/CMakeLists.txt b/CMakeLists.txt index 57374448a9..8559edef27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -309,8 +309,8 @@ if (USE_OPENMP) endif() # Fix "Argument list too long" for macOS - mostly seen with older OS versions on POWERPC or Intel CPUs -if(APPLE) - # Use response files +if(APPLE AND "${CMAKE_GENERATOR}" MATCHES ".*Makefiles") + # Use response files to get around the ARG_MAX limit, unless using the Ninja generator set(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1) # Always build static library first if(BUILD_STATIC_LIBS) @@ -333,7 +333,7 @@ if(APPLE) endif() if(NOT NOFORTRAN) set(CMAKE_Fortran_USE_RESPONSE_FILE_FOR_OBJECTS 1) - set(CMAKE_Fortran_CREATE_STATIC_LIBRARY ${CREATE_STATIC_LIBRARY_COMMAND}) + set(CMAKE_Fortran_CREATE_STATIC_LIBRARY ${CREATE_STATIC_LIBRARY_COMMAND}) if(BUILD_SHARED_LIBS) set(CMAKE_Fortran_CREATE_SHARED_LIBRARY "sh -c 'echo \"\" | ${CMAKE_Fortran_COMPILER} -o dummy.o -c -x f95-cpp-input - '" diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index d1424b2777..cea42764bc 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -275,3 +275,6 @@ In chronological order: * Fadi Arafeh * [2026-03-05] Accelerate SVE128 SBGEMM/BGEMM + +* Nathan Sircombe + * [2026-04-16] Add CPU ID for Neoverse V3 diff --git a/Changelog.txt b/Changelog.txt index 20c76ff522..ee7701de1d 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,54 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.3.33 +23-Apr-2026 + +general: + - fixed an incorrect cast in the SBGEMM test case that could lead to spurious test failures + - fixed an invalid memory access in the converted C version of the CBLAS tests + - made the BIGNUMA setting automatic when the number of cores exceeds 256 + - Imported recent updates from Reference-LAPACK to realign with its upcoming 3.13.0 release: + - Implement ?LARF1F and ?ORM2R (Reference-LAPACK PRs 1019,1020,1196,1257) + - Change loop order in ?GETC2 to improve performance (Reference-LAPACK PR 1023) + - Change WORK array dimension in ?GELQS/?GEQRS (Reference-LAPACK PR 1094) + - Add NaN checks for input matrix A in ?GEEV (Reference-LAPACK PR 1136) + - Fix support for jobu/v in LAPACKE_?GESVDQ_WORK (Reference-LAPACK PRs 1146,1221) + - Fix display of version number in LAPACK testsuite (Reference-LAPACK PR 1149) + - Fix DGGES test seed to avoid bad matrix cases (Reference-LAPACK PR 1187) + - Fix truncation of large WORK array sizes in ZHE (Reference-LAPACK PR 1195) + - Fix overwriting of LDSWORK parameter in ?TRSYL3 (Reference-LAPACK PR 1206) + - Fix overwriting of error states in some EIG tests (Reference-LAPACK PR 1207) + - Remove unused parameter in DORBDB3/ZUNBDB3 (Reference-LAPACK PR 1209) + - Re-enable testing of ?BB and ?GG driver functions (Reference-LAPACK PR 1211) + - Fix workspace size calculation in ?TGSEN (Reference-LAPACK PR 774) + - Fix typos in the EIG DMD tests and initialized the cutoff variable (PR 1212,1228) + - Optimized looping in ?LACPY/?LASCL/?LANTR with fat matrix and UPLO=L (PR 1251) + +arm64: + - worked around a serious miscompilation of the DDOT kernel by GCC15, affecting + most non-SVE targets, and SVE targets in the case of non-unit array stride) + - fixed an accuracy issue in the GEMV kernel for Neoverse V1 and other SVE targets + - fixed broken STRMM and SSYMM in DYNAMIC_ARCH builds when running on non-SME hardware + - added an optimized SHGEMM kernel for Neoverse N2 + - fixed DYNAMIC_ARCH builds under Windows on Arm + - Added autodetection of Cortex A75/A76 in DYNAMIC_ARCH builds + - Added autodetection of Neoverse V3, currently supported through V2 kernels + - Re-added support for the "VORTEX" target in DYNAMIC_ARCH builds with DYNAMIC_LIST + - Fixed CMake-based builds that use the "Ninja" generator + +loongarch64: + - fixed a build failure due to missing support for the new half-precision float type + - fixed a long-standing bug in asserting 64bit capability in the c_check helper script + +x86_64: + - added a workaround for miscompilation of the AVX512 GEMM kernels by LLVM on Windows + - fixed a build failure in the LAED3 code when compiling with MinGW on Windows + - fixed CMake-based compilation with the NVIDIA HPC compiler + - Fixed CMake-based builds that use the "Ninja" generator + +wasm: + - added optimized kernels for STRSM and DTRSM + ==================================================================== Version 0.3.32 23-Mar-2026 diff --git a/Makefile.rule b/Makefile.rule index b327fc0c2d..ac22da851e 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.3.32 +VERSION = 0.3.32.dev # If you set this prefix, the library name will be lib$(LIBNAMESUFFIX)openblas.a # and lib$(LIBNAMESUFFIX)openblas.so, with a matching soname in the shared library diff --git a/README.md b/README.md index 3fc1971ea4..803ae1b2c3 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,7 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th - **Cortex-A53**: same as ARMV8 (different cpu specifications) - **Cortex-A55**: same as ARMV8 (different cpu specifications) - **Cortex A57**: Optimized Level-3 and Level-2 functions -- **Cortex A72**: same as A57 ( different cpu specifications) +- **Cortex A72**: same as A57 (different cpu specifications) - **Cortex A73**: same as A57 (different cpu specifications) - **Cortex A76**: same as A57 (different cpu specifications) - **Falkor**: same as A57 (different cpu specifications) @@ -189,6 +189,8 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th - **Neoverse V1**: (AWS Graviton3) optimized Level-3 BLAS - **Neoverse N2**: preliminary support - **Neoverse V2**: preliminary support +- **Neoverse V3**: preliminary support +- **Neoverse V3AE**: preliminary support - **Apple Vortex**: preliminary support based on ThunderX2/3 - **Apple VortexM4**: preliminary support based on ThunderX2/3, SME kernels for SGEMM,SSYMM,STRMM,SSYRK,SSYR2K - **A64FX**: preliminary support, optimized Level-3 BLAS diff --git a/c_check b/c_check index 169440c3ab..246c3295be 100755 --- a/c_check +++ b/c_check @@ -131,7 +131,7 @@ case "$architecture" in defined=1 ;; arm|arm64) defined=1 ;; - zarch|e2k|alpha|ia64|riscv64|loonarch64|wasm) + zarch|e2k|alpha|ia64|riscv64|loongarch64|wasm) defined=1 BINARY=64 ;; diff --git a/cmake/arch.cmake b/cmake/arch.cmake index ef0b63654a..faa776343b 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -48,9 +48,11 @@ if (DYNAMIC_ARCH) if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 11) # SVE ACLE supported in LLVM >= 11 set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX) endif () + if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 19 OR (${CMAKE_C_COMPILER_ID} MATCHES AppleClang AND ${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 17) ) # SME ACLE supported in LLVM >= 19 and AppleClang >= 17 set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME VORTEXM4) endif() + endif() endif () if (DYNAMIC_LIST) set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST}) diff --git a/cmake/cc.cmake b/cmake/cc.cmake index 27afd357a5..bacdbef5af 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -113,6 +113,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI") endif () if (${CMAKE_C_COMPILER_ID} STREQUAL "NVHPC") + set (GCC_VERSION 100) if (POWER) set(CCOMMON_OPT "${CCOMMON_OPT} -tp pwr8") elseif (X86_64) diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index a8d1c601c8..04dd49cfbe 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -71,7 +71,7 @@ set(SLASRC slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f - slarrv.f slartv.f + slarf1f.f slarf1l.f slarrv.f slartv.f slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f @@ -178,6 +178,7 @@ set(CLASRC claqz0.f claqz1.f claqz2.f claqz3.f claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f clarf.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f + clarf1f.f clarf1l.f clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f90 clartv.f clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f90 clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f @@ -262,7 +263,7 @@ set(DLASRC dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f - dlargv.f dlarrv.f dlartv.f + dlarf1f.f dlarf1l.f dlargv.f dlarrv.f dlartv.f dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f @@ -371,7 +372,7 @@ set(ZLASRC zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f zlarcm.f zlarf.f zlarfb.f zlarfb_gett.f - zlarfg.f zlarfgp.f zlarft.f + zlarfg.f zlarfgp.f zlarft.f zlarf1f.f zlarf1l.f zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f90 zlartv.f zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f zlassq.f90 zlasyf.f zlasyf_rook.f zlasyf_rk.f zlasyf_aa.f @@ -575,7 +576,7 @@ set(SLASRC slaqr0.c slaqr1.c slaqr2.c slaqr3.c slaqr4.c slaqr5.c slaqtr.c slar1v.c slar2v.c ilaslr.c ilaslc.c slarf.c slarfb.c slarfb_gett.c slarfg.c slarfgp.c slarft.c slarfx.c slarfy.c slargv.c - slarrv.c slartv.c + slarf1f.c slarf1l.c slarrv.c slartv.c slarz.c slarzb.c slarzt.c slasy2.c slasyf.c slasyf_rook.c slasyf_rk.c slasyf_aa.c slatbs.c slatdf.c slatps.c slatrd.c slatrs.c slatrz.c @@ -681,6 +682,7 @@ set(CLASRC claqr0.c claqr1.c claqr2.c claqr3.c claqr4.c claqr5.c claqsp.c claqsy.c clar1v.c clar2v.c ilaclr.c ilaclc.c clarf.c clarfb.c clarfb_gett.c clarfg.c clarfgp.c clarft.c + clarf1f.c clarf1l.c clarfx.c clarfy.c clargv.c clarnv.c clarrv.c clartg.c clartv.c clarz.c clarzb.c clarzt.c clascl.c claset.c clasr.c classq.c clasyf.c clasyf_rook.c clasyf_rk.c clasyf_aa.c @@ -764,7 +766,7 @@ set(DLASRC dlaqr0.c dlaqr1.c dlaqr2.c dlaqr3.c dlaqr4.c dlaqr5.c dlaqtr.c dlar1v.c dlar2v.c iladlr.c iladlc.c dlarf.c dlarfb.c dlarfb_gett.c dlarfg.c dlarfgp.c dlarft.c dlarfx.c dlarfy.c - dlargv.c dlarrv.c dlartv.c + dlarf1f.c dlarf1l.c dlargv.c dlarrv.c dlartv.c dlarz.c dlarzb.c dlarzt.c dlasy2.c dlasyf.c dlasyf_rook.c dlasyf_rk.c dlasyf_aa.c dlatbs.c dlatdf.c dlatps.c dlatrd.c dlatrs.c dlatrz.c @@ -871,7 +873,7 @@ set(ZLASRC zlaqhb.c zlaqhe.c zlaqhp.c zlaqp2.c zlaqp2rk.c zlaqp3rk.c zlaqps.c zlaqsb.c zlaqr0.c zlaqr1.c zlaqr2.c zlaqr3.c zlaqr4.c zlaqr5.c zlaqsp.c zlaqsy.c zlar1v.c zlar2v.c ilazlr.c ilazlc.c - zlarcm.c zlarf.c zlarfb.c zlarfb_gett.c + zlarcm.c zlarf.c zlarfb.c zlarfb_gett.c zlarf1f.c zlarf1l.c zlarfg.c zlarfgp.c zlarft.c zlarfx.c zlarfy.c zlargv.c zlarnv.c zlarrv.c zlartg.c zlartv.c zlarz.c zlarzb.c zlarzt.c zlascl.c zlaset.c zlasr.c diff --git a/cpuid_arm64.c b/cpuid_arm64.c index c7af3295fc..c5dcaf6843 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -321,9 +321,10 @@ int detect(void) return CPU_CORTEXX2; else if (strstr(cpu_part, "0xd4e")) //X3 return CPU_CORTEXX2; - else if (strstr(cpu_part, "0xd4f")) //NVIDIA Grace et al. + else if (strstr(cpu_part, "0xd4f")) return CPU_NEOVERSEV2; - else if (strstr(cpu_part, "0xd87") || strstr(cpu_part, "0xd85") || strstr(cpu_part, "0xd83")) // X925/A725 + else if (strstr(cpu_part, "0xd87") || strstr(cpu_part, "0xd85") // A725,X925 + || strstr(cpu_part, "0xd84") || strstr(cpu_part, "0xd83")) // V3,V3AE return CPU_NEOVERSEV2; else if (strstr(cpu_part, "0xd0b")) return CPU_CORTEXA76; diff --git a/cpuid_mips.c b/cpuid_mips.c index 77567a2e5f..3944934471 100644 --- a/cpuid_mips.c +++ b/cpuid_mips.c @@ -1,5 +1,5 @@ /***************************************************************************** -Copyright (c) 2011-2014, The OpenBLAS Project +Copyright (c) 2011-2026, The OpenBLAS Project All rights reserved. Redistribution and use in source and binary forms, with or without @@ -13,9 +13,9 @@ modification, are permitted provided that the following conditions are notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - 3. Neither the name of the OpenBLAS project nor the names of - its contributors may be used to endorse or promote products - derived from this software without specific prior written + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" @@ -109,7 +109,7 @@ int detect(void){ return CPU_1004K; } else if (strstr(p, " 24K")) { return CPU_24K; - } else + } else return CPU_UNKNOWN; } #endif @@ -136,6 +136,40 @@ void get_subdirname(void){ printf("mips"); } +int get_feature(char *search) { + +#ifdef __linux + FILE *infile; + char buffer[2048], *p, *t; + p = (char *)NULL; + + infile = fopen("/proc/cpuinfo", "r"); + + while (fgets(buffer, sizeof(buffer), infile)) { + + if (!strncmp("Features", buffer, 8) || + !strncmp("ASEs implemented", buffer, 16)) { + p = strchr(buffer, ':') + 2; + break; + } + } + + fclose(infile); + + if (p == NULL) + return 0; + + t = strtok(p, " "); + while (t = strtok(NULL, " ")) { + if (strstr(t, search)) { + return (1); + } + } + +#endif + return (0); +} + void get_cpuconfig(void){ if(detect()==CPU_P5600){ printf("#define P5600\n"); @@ -165,7 +199,7 @@ void get_cpuconfig(void){ }else{ printf("#define UNKNOWN\n"); } -#ifndef NO_MSA +#ifndef NO_MSA if (get_feature("msa")) printf("#define HAVE_MSA\n"); #endif } @@ -181,38 +215,3 @@ void get_libname(void){ printf("mips\n"); } } - -int get_feature(char *search) -{ - -#ifdef __linux - FILE *infile; - char buffer[2048], *p,*t; - p = (char *) NULL ; - - infile = fopen("/proc/cpuinfo", "r"); - - while (fgets(buffer, sizeof(buffer), infile)) - { - - if (!strncmp("Features", buffer, 8) || !strncmp("ASEs implemented", buffer, 16)) - { - p = strchr(buffer, ':') + 2; - break; - } - } - - fclose(infile); - - if( p == NULL ) return 0; - - t = strtok(p," "); - while( t = strtok(NULL," ")) - { - if (strstr(t, search)) { return(1); } - } - -#endif - return(0); -} - diff --git a/cpuid_mips64.c b/cpuid_mips64.c index 8895cb1578..69921265b0 100644 --- a/cpuid_mips64.c +++ b/cpuid_mips64.c @@ -1,5 +1,5 @@ /***************************************************************************** -Copyright (c) 2011-2014, The OpenBLAS Project +Copyright (c) 2011-2026, The OpenBLAS Project All rights reserved. Redistribution and use in source and binary forms, with or without @@ -13,9 +13,9 @@ modification, are permitted provided that the following conditions are notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - 3. Neither the name of the OpenBLAS project nor the names of - its contributors may be used to endorse or promote products - derived from this software without specific prior written + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" @@ -145,13 +145,47 @@ void get_subarchitecture(void){ printf("SICORTEX"); }else{ printf("MIPS64_GENERIC"); - } + } } void get_subdirname(void){ printf("mips64"); } +int get_feature(char *search) { + +#ifdef __linux + FILE *infile; + char buffer[2048], *p, *t; + p = (char *)NULL; + + infile = fopen("/proc/cpuinfo", "r"); + + while (fgets(buffer, sizeof(buffer), infile)) { + + if (!strncmp("Features", buffer, 8) || + !strncmp("ASEs implemented", buffer, 16)) { + p = strchr(buffer, ':') + 2; + break; + } + } + + fclose(infile); + + if (p == NULL) + return 0; + + t = strtok(p, " "); + while (t = strtok(NULL, " ")) { + if (strstr(t, search)) { + return (1); + } + } + +#endif + return (0); +} + void get_cpuconfig(void){ if(detect()==CPU_LOONGSON3R3) { printf("#define LOONGSON3R3\n"); @@ -228,38 +262,3 @@ void get_libname(void){ printf("mips64_generic\n"); } } - -int get_feature(char *search) -{ - -#ifdef __linux - FILE *infile; - char buffer[2048], *p,*t; - p = (char *) NULL ; - - infile = fopen("/proc/cpuinfo", "r"); - - while (fgets(buffer, sizeof(buffer), infile)) - { - - if (!strncmp("Features", buffer, 8) || !strncmp("ASEs implemented", buffer, 16)) - { - p = strchr(buffer, ':') + 2; - break; - } - } - - fclose(infile); - - if( p == NULL ) return 0; - - t = strtok(p," "); - while( t = strtok(NULL," ")) - { - if (strstr(t, search)) { return(1); } - } - -#endif - return(0); -} - diff --git a/ctest/c_sblat3c.c b/ctest/c_sblat3c.c index e42d67c36f..7e703a427d 100644 --- a/ctest/c_sblat3c.c +++ b/ctest/c_sblat3c.c @@ -509,7 +509,7 @@ static logical c_false = FALSE_; if (i__1 < 2) { goto L60; } - for (i__ = 1; i__ <= 9; ++i__) { + for (i__ = 1; i__ <= 6; ++i__) { if (s_cmp(snamet, snames[i__ - 1] , (ftnlen)12, (ftnlen)12) == 0) { goto L50; diff --git a/docs/install.md b/docs/install.md index 5e31b50661..1c3f27ee26 100644 --- a/docs/install.md +++ b/docs/install.md @@ -443,6 +443,10 @@ To then use the built OpenBLAS shared library in Visual Studio: ### Windows on Arm +If you want to use a regular x64 Windows build of OpenBLAS with x64 software in the Prism emulator, be sure to use the latest version of Prism, and to check the box +to "Disable floating point optimization" in the Emulation settings. (Right-click on the executable to open "Properties", then on the "Compatibility" tab click on +"Change emulation settings"). + A fully functional native OpenBLAS for WoA that can be built as both a static and dynamic library using LLVM toolchain and Visual Studio 2022. Before starting to build, make sure that you have installed Visual Studio 2022 on your ARM device, including the "Desktop Development with C++" component (that contains the cmake tool). (Note that you can use the free "Visual Studio 2022 Community Edition" for this task. In principle it would be possible to build with VisualStudio alone, but using the LLVM toolchain enables native compilation of the Fortran sources of LAPACK and of all the optimized assembly files, which VisualStudio cannot handle on its own) diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index 83403aef70..2657bbcfbe 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -48,6 +48,7 @@ #endif #ifdef DYNAMIC_ARCH +#undef GEMM_PREFERRED_SIZE #define GEMM_PREFERRED_SIZE gotoblas->preferred_size #endif #ifndef GEMM_PREFERRED_SIZE diff --git a/driver/others/Makefile b/driver/others/Makefile index fbd016e6b0..0a1bcff9d1 100644 --- a/driver/others/Makefile +++ b/driver/others/Makefile @@ -127,10 +127,10 @@ endif xerbla.$(SUFFIX) : xerbla.c $(CC) $(CFLAGS) -c $< -o $(@F) -dynamic.$(SUFFIX) : dynamic.c +dynamic%$(SUFFIX) : dynamic%c $(CC) $(CFLAGS) -c $< -o $(@F) -dynamic.$(PSUFFIX) : dynamic.c +dynamic%$(PSUFFIX) : dynamic%c $(CC) $(PFLAGS) -c $< -o $(@F) parameter.$(SUFFIX) : parameter.c ../../param.h diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index 1d3d7957d7..bdbcfc45e3 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -1,6 +1,6 @@ /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ -/* Copyright 2023-2024 The OpenBLAS Project */ +/* Copyright 2023-2024, 2026 The OpenBLAS Project */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -129,10 +129,16 @@ extern gotoblas_t gotoblas_ARMV9SME; #else #define gotoblas_ARMV9SME gotoblas_ARMV8 #endif +#ifdef DYN_VORTEX +extern gotoblas_t gotoblas_VORTEX; +#elif defined(DYN_NEOVERSEN1) +#define gotoblas_VORTEX gotoblas_NEOVERSEN1 +#else +#define gotoblas_VORTEX gotoblas_ARMV8 +#endif #ifdef DYN_VORTEXM4 extern gotoblas_t gotoblas_VORTEXM4; #else -#error "dont have vortexm4" #define gotoblas_VORTEXM4 gotoblas_ARMV8 #endif #ifdef DYN_CORTEXA55 @@ -145,7 +151,7 @@ extern gotoblas_t gotoblas_A64FX; #else #define gotoblas_A64FX gotoblas_ARMV8 #endif -#else +#else //not a user-specified dynamic_list extern gotoblas_t gotoblas_CORTEXA53; #define gotoblas_CORTEXA55 gotoblas_CORTEXA53 extern gotoblas_t gotoblas_CORTEXA57; @@ -157,6 +163,7 @@ extern gotoblas_t gotoblas_THUNDERX2T99; extern gotoblas_t gotoblas_TSV110; extern gotoblas_t gotoblas_EMAG8180; extern gotoblas_t gotoblas_NEOVERSEN1; +#define gotoblas_VORTEX gotoblas_NEOVERSEN1 #ifndef NO_SVE extern gotoblas_t gotoblas_NEOVERSEV1; extern gotoblas_t gotoblas_NEOVERSEN2; @@ -192,7 +199,7 @@ extern void openblas_warning(int verbose, const char * msg); #define FALLBACK_VERBOSE 1 #define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n" -#define NUM_CORETYPES 20 +#define NUM_CORETYPES 21 /* * In case asm/hwcap.h is outdated on the build system, make sure @@ -232,6 +239,7 @@ static char *corename[] = { "armv8sve", "a64fx", "armv9sme", + "vortex", "vortexm4", "unknown" }; @@ -256,7 +264,8 @@ char *gotoblas_corename(void) { if (gotoblas == &gotoblas_ARMV8SVE) return corename[16]; if (gotoblas == &gotoblas_A64FX) return corename[17]; if (gotoblas == &gotoblas_ARMV9SME) return corename[18]; - if (gotoblas == &gotoblas_VORTEXM4) return corename[19]; + if (gotoblas == &gotoblas_VORTEX) return corename[19]; + if (gotoblas == &gotoblas_VORTEXM4) return corename[20]; return corename[NUM_CORETYPES]; } @@ -295,7 +304,8 @@ static gotoblas_t *force_coretype(char *coretype) { case 16: return (&gotoblas_ARMV8SVE); case 17: return (&gotoblas_A64FX); case 18: return (&gotoblas_ARMV9SME); - case 19: return (&gotoblas_VORTEXM4); + case 19: return (&gotoblas_VORTEX); + case 20: return (&gotoblas_VORTEXM4); } snprintf(message, 128, "Core not found: %s\n", coretype); openblas_warning(1, message); @@ -312,7 +322,7 @@ static gotoblas_t *get_coretype(void) { return &gotoblas_VORTEXM4; } #endif - return &gotoblas_NEOVERSEN1; + return &gotoblas_VORTEX; #endif #if (!defined OS_LINUX && !defined OS_ANDROID) @@ -397,6 +407,8 @@ static gotoblas_t *get_coretype(void) { case 0xd08: // Cortex A72 return &gotoblas_CORTEXA72; case 0xd09: // Cortex A73 + case 0xd0a: // Cortex A75 + case 0xd0b: // Cortex A76 return &gotoblas_CORTEXA73; case 0xd0c: // Neoverse N1 return &gotoblas_NEOVERSEN1; @@ -413,10 +425,11 @@ static gotoblas_t *get_coretype(void) { return &gotoblas_NEOVERSEN1; }else return &gotoblas_NEOVERSEV1; - case 0xd4f: - case 0xd83: - case 0xd85: - case 0xd87: + case 0xd4f: // Neoverse V2 + case 0xd83: // Neoverse V3AE + case 0xd84: // Neoverse V3 + case 0xd85: // Cortex X925 + case 0xd87: // Cortex A725 if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK); return &gotoblas_NEOVERSEN1; @@ -486,7 +499,7 @@ static gotoblas_t *get_coretype(void) { break; case 0x61: // Apple if (support_sme1()) return &gotoblas_VORTEXM4; - return &gotoblas_NEOVERSEN1; + return &gotoblas_VORTEX; break; default: snprintf(coremsg, 128, "Unknown CPU model - implementer %x part %x\n",implementer,part); diff --git a/driver/others/init.c b/driver/others/init.c index f27955184b..b11e10d381 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -72,6 +72,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" +#if (MAX_CPU_NUMBER > 256) +#ifndef BIGNUMA +#define BIGNUMA +#endif +#endif + #if defined(OS_LINUX) && defined(SMP) #define _GNU_SOURCE diff --git a/driver/others/openblas_get_config.c b/driver/others/openblas_get_config.c index ff52cfba8f..4a424b49ec 100644 --- a/driver/others/openblas_get_config.c +++ b/driver/others/openblas_get_config.c @@ -80,9 +80,9 @@ int openblas_get_parallel(void); char* CNAME(void) { char tmpstr[20]; - strcpy(tmp_config_str, openblas_config_str); + strncpy(tmp_config_str, openblas_config_str, 255-40); #ifdef DYNAMIC_ARCH - strcat(tmp_config_str, gotoblas_corename()); + strncat(tmp_config_str, gotoblas_corename(),20); #endif if (openblas_get_parallel() == 0) sprintf(tmpstr, " SINGLE_THREADED"); diff --git a/interface/symm.c b/interface/symm.c index ea00217050..4f51f9faf7 100644 --- a/interface/symm.c +++ b/interface/symm.c @@ -374,6 +374,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, return; } + if (args.m == 0 || args.n == 0) return; #if !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && !defined(HFLOAT16) #if defined(ARCH_ARM64) && (defined(USE_SSYMM_KERNEL_DIRECT)||defined(DYNAMIC_ARCH)) #if defined(DYNAMIC_ARCH) @@ -383,7 +384,6 @@ if (strcmp(gotoblas_corename(), "armv9sme") == 0 #endif ) #endif - if (args.m == 0 || args.n == 0) return; if (order == CblasRowMajor && m == lda && n == ldb && n == ldc) { if (Side == CblasLeft && Uplo == CblasUpper) { @@ -398,8 +398,6 @@ if (strcmp(gotoblas_corename(), "armv9sme") == 0 #endif - if (args.m == 0 || args.n == 0) return; - IDEBUG_START; FUNCTION_PROFILE_START(); diff --git a/interface/trsm.c b/interface/trsm.c index 6584bb4d75..ae35d0d3b7 100644 --- a/interface/trsm.c +++ b/interface/trsm.c @@ -359,6 +359,8 @@ void CNAME(enum CBLAS_ORDER order, return; } + if (args.m == 0 || args.n == 0) return; + #if !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && !defined(HFLOAT16) #if defined(ARCH_ARM64) && (defined(USE_STRMM_KERNEL_DIRECT)||defined(DYNAMIC_ARCH)) #if defined(DYNAMIC_ARCH) @@ -368,7 +370,6 @@ if (strcmp(gotoblas_corename(), "armv9sme") == 0 #endif ) #endif - if (args.m == 0 || args.n == 0) return; if (order == CblasRowMajor && Diag == CblasNonUnit && Side == CblasLeft && m == lda && n == ldb) { if (Trans == CblasNoTrans) { (Uplo == CblasUpper ? STRMM_DIRECT_LNUN : STRMM_DIRECT_LNLN)(m, n, alpha, a, lda, b, ldb); @@ -382,8 +383,6 @@ if (strcmp(gotoblas_corename(), "armv9sme") == 0 #endif - if ((args.m == 0) || (args.n == 0)) return; - IDEBUG_START; FUNCTION_PROFILE_START(); diff --git a/kernel/arm/dot.c b/kernel/arm/dot.c index 46a84ad189..8feed71353 100644 --- a/kernel/arm/dot.c +++ b/kernel/arm/dot.c @@ -1,5 +1,5 @@ /*************************************************************************** -Copyright (c) 2013, The OpenBLAS Project +Copyright (c) 2013-2026, The OpenBLAS Project All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -50,8 +50,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) while(i < n) { - - dot += y[iy] * x[ix] ; +#if defined(DSDOT) + dot += (double)y[iy] * (double)x[ix] ; +#else + dot += y[iy] * x[ix]; +#endif ix += inc_x ; iy += inc_y ; i++ ; diff --git a/kernel/arm64/KERNEL.NEOVERSEN2 b/kernel/arm64/KERNEL.NEOVERSEN2 index 8269812347..e70486768e 100644 --- a/kernel/arm64/KERNEL.NEOVERSEN2 +++ b/kernel/arm64/KERNEL.NEOVERSEN2 @@ -204,6 +204,25 @@ BGEMMOTCOPYOBJ = bgemm_otcopy$(TSUFFIX).$(SUFFIX) BGEMVTKERNEL = sbgemv_t_bfdot.c BGEMVNKERNEL = bgemv_n_sve_v3x4.c +ifeq ($(BUILD_HFLOAT16), 1) +SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_neoversen2.c +SHGEMMINCOPY = shgemm_ncopy_$(SHGEMM_UNROLL_M)_neoversen2.c +SHGEMMITCOPY = shgemm_tcopy_$(SHGEMM_UNROLL_M)_neoversen2.c +ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N)) + SHGEMMINCOPY = ../generic/gemm_ncopy_$(SHGEMM_UNROLL_M).c + SHGEMMITCOPY = ../generic/gemm_tcopy_$(SHGEMM_UNROLL_M).c +endif +SHGEMMONCOPY = shgemm_ncopy_$(SHGEMM_UNROLL_N)_neoversen2.c +SHGEMMOTCOPY = shgemm_tcopy_$(SHGEMM_UNROLL_N)_neoversen2.c +SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX) +SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX) +SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX) +SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX) +ifndef SHGEMM_BETA +SHGEMM_BETA = sbgemm_beta_neoversen2.c +endif +endif + SBGEMM_BETA = sbgemm_beta_neoversen2.c SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_neoversen2.c ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) diff --git a/kernel/arm64/dot_kernel_asimd.c b/kernel/arm64/dot_kernel_asimd.c index 27f557c705..a1ae01f2d8 100644 --- a/kernel/arm64/dot_kernel_asimd.c +++ b/kernel/arm64/dot_kernel_asimd.c @@ -262,7 +262,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. static RETURN_TYPE dot_kernel_asimd(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) { - volatile RETURN_TYPE dot = 0.0; +#ifndef DOUBLE + volatile +#endif + RETURN_TYPE dot = 0.0; BLASLONG j = 0; __asm__ __volatile__ ( diff --git a/kernel/arm64/shgemm_kernel_8x8_neoversen2.c b/kernel/arm64/shgemm_kernel_8x8_neoversen2.c new file mode 100644 index 0000000000..cc52ea3366 --- /dev/null +++ b/kernel/arm64/shgemm_kernel_8x8_neoversen2.c @@ -0,0 +1,887 @@ +/*************************************************************************** + * Copyright (c) 2026 The OpenBLAS Project + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of the OpenBLAS project nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * *****************************************************************************/ + +#include + +#include "common.h" + +static inline void kernel_8x8(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, BLASLONG ldc, FLOAT alpha) { + float32x4_t c0_low = vdupq_n_f32(0.0f); + float32x4_t c0_high = vdupq_n_f32(0.0f); + float32x4_t c1_low = vdupq_n_f32(0.0f); + float32x4_t c1_high = vdupq_n_f32(0.0f); + float32x4_t c2_low = vdupq_n_f32(0.0f); + float32x4_t c2_high = vdupq_n_f32(0.0f); + float32x4_t c3_low = vdupq_n_f32(0.0f); + float32x4_t c3_high = vdupq_n_f32(0.0f); + float32x4_t c4_low = vdupq_n_f32(0.0f); + float32x4_t c4_high = vdupq_n_f32(0.0f); + float32x4_t c5_low = vdupq_n_f32(0.0f); + float32x4_t c5_high = vdupq_n_f32(0.0f); + float32x4_t c6_low = vdupq_n_f32(0.0f); + float32x4_t c6_high = vdupq_n_f32(0.0f); + float32x4_t c7_low = vdupq_n_f32(0.0f); + float32x4_t c7_high = vdupq_n_f32(0.0f); + + for (BLASLONG k = 0; k < K; ++k) { + float16x8_t a_f16 = vld1q_f16(A); + float32x4_t a_low = vcvt_f32_f16(vget_low_f16(a_f16)); + float32x4_t a_high = vcvt_f32_f16(vget_high_f16(a_f16)); + + float16x8_t b_f16 = vld1q_f16(B); + float32x4_t b_low = vcvt_f32_f16(vget_low_f16(b_f16)); + float32x4_t b_high = vcvt_f32_f16(vget_high_f16(b_f16)); + + float32_t b0_lane0 = vgetq_lane_f32(b_low, 0); + c0_low = vfmaq_n_f32(c0_low, a_low, b0_lane0); + c0_high = vfmaq_n_f32(c0_high, a_high, b0_lane0); + + float32_t b0_lane1 = vgetq_lane_f32(b_low, 1); + c1_low = vfmaq_n_f32(c1_low, a_low, b0_lane1); + c1_high = vfmaq_n_f32(c1_high, a_high, b0_lane1); + + float32_t b0_lane2 = vgetq_lane_f32(b_low, 2); + c2_low = vfmaq_n_f32(c2_low, a_low, b0_lane2); + c2_high = vfmaq_n_f32(c2_high, a_high, b0_lane2); + + float32_t b0_lane3 = vgetq_lane_f32(b_low, 3); + c3_low = vfmaq_n_f32(c3_low, a_low, b0_lane3); + c3_high = vfmaq_n_f32(c3_high, a_high, b0_lane3); + + float32_t b1_lane0 = vgetq_lane_f32(b_high, 0); + c4_low = vfmaq_n_f32(c4_low, a_low, b1_lane0); + c4_high = vfmaq_n_f32(c4_high, a_high, b1_lane0); + + float32_t b1_lane1 = vgetq_lane_f32(b_high, 1); + c5_low = vfmaq_n_f32(c5_low, a_low, b1_lane1); + c5_high = vfmaq_n_f32(c5_high, a_high, b1_lane1); + + float32_t b1_lane2 = vgetq_lane_f32(b_high, 2); + c6_low = vfmaq_n_f32(c6_low, a_low, b1_lane2); + c6_high = vfmaq_n_f32(c6_high, a_high, b1_lane2); + + float32_t b1_lane3 = vgetq_lane_f32(b_high, 3); + c7_low = vfmaq_n_f32(c7_low, a_low, b1_lane3); + c7_high = vfmaq_n_f32(c7_high, a_high, b1_lane3); + + A += 8; + B += 8; + } + + FLOAT *col_0 = C + 0 * ldc; + FLOAT *col_1 = C + 1 * ldc; + FLOAT *col_2 = C + 2 * ldc; + FLOAT *col_3 = C + 3 * ldc; + FLOAT *col_4 = C + 4 * ldc; + FLOAT *col_5 = C + 5 * ldc; + FLOAT *col_6 = C + 6 * ldc; + FLOAT *col_7 = C + 7 * ldc; + + float32x4_t t0_l = vld1q_f32(col_0); + float32x4_t t0_h = vld1q_f32(col_0 + 4); + t0_l = vaddq_f32(t0_l, vmulq_n_f32(c0_low, alpha)); + t0_h = vaddq_f32(t0_h, vmulq_n_f32(c0_high, alpha)); + vst1q_f32(col_0, t0_l); + vst1q_f32(col_0 + 4, t0_h); + + float32x4_t t1_l = vld1q_f32(col_1); + float32x4_t t1_h = vld1q_f32(col_1 + 4); + t1_l = vaddq_f32(t1_l, vmulq_n_f32(c1_low, alpha)); + t1_h = vaddq_f32(t1_h, vmulq_n_f32(c1_high, alpha)); + vst1q_f32(col_1, t1_l); + vst1q_f32(col_1 + 4, t1_h); + + float32x4_t t2_l = vld1q_f32(col_2); + float32x4_t t2_h = vld1q_f32(col_2 + 4); + t2_l = vaddq_f32(t2_l, vmulq_n_f32(c2_low, alpha)); + t2_h = vaddq_f32(t2_h, vmulq_n_f32(c2_high, alpha)); + vst1q_f32(col_2, t2_l); + vst1q_f32(col_2 + 4, t2_h); + + float32x4_t t3_l = vld1q_f32(col_3); + float32x4_t t3_h = vld1q_f32(col_3 + 4); + t3_l = vaddq_f32(t3_l, vmulq_n_f32(c3_low, alpha)); + t3_h = vaddq_f32(t3_h, vmulq_n_f32(c3_high, alpha)); + vst1q_f32(col_3, t3_l); + vst1q_f32(col_3 + 4, t3_h); + + float32x4_t t4_l = vld1q_f32(col_4); + float32x4_t t4_h = vld1q_f32(col_4 + 4); + t4_l = vaddq_f32(t4_l, vmulq_n_f32(c4_low, alpha)); + t4_h = vaddq_f32(t4_h, vmulq_n_f32(c4_high, alpha)); + vst1q_f32(col_4, t4_l); + vst1q_f32(col_4 + 4, t4_h); + + float32x4_t t5_l = vld1q_f32(col_5); + float32x4_t t5_h = vld1q_f32(col_5 + 4); + t5_l = vaddq_f32(t5_l, vmulq_n_f32(c5_low, alpha)); + t5_h = vaddq_f32(t5_h, vmulq_n_f32(c5_high, alpha)); + vst1q_f32(col_5, t5_l); + vst1q_f32(col_5 + 4, t5_h); + + float32x4_t t6_l = vld1q_f32(col_6); + float32x4_t t6_h = vld1q_f32(col_6 + 4); + t6_l = vaddq_f32(t6_l, vmulq_n_f32(c6_low, alpha)); + t6_h = vaddq_f32(t6_h, vmulq_n_f32(c6_high, alpha)); + vst1q_f32(col_6, t6_l); + vst1q_f32(col_6 + 4, t6_h); + + float32x4_t t7_l = vld1q_f32(col_7); + float32x4_t t7_h = vld1q_f32(col_7 + 4); + t7_l = vaddq_f32(t7_l, vmulq_n_f32(c7_low, alpha)); + t7_h = vaddq_f32(t7_h, vmulq_n_f32(c7_high, alpha)); + vst1q_f32(col_7, t7_l); + vst1q_f32(col_7 + 4, t7_h); +} + +static inline void kernel_4x8(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, BLASLONG ldc, FLOAT alpha) { + float32x4_t c0 = vdupq_n_f32(0.0f); + float32x4_t c1 = vdupq_n_f32(0.0f); + float32x4_t c2 = vdupq_n_f32(0.0f); + float32x4_t c3 = vdupq_n_f32(0.0f); + float32x4_t c4 = vdupq_n_f32(0.0f); + float32x4_t c5 = vdupq_n_f32(0.0f); + float32x4_t c6 = vdupq_n_f32(0.0f); + float32x4_t c7 = vdupq_n_f32(0.0f); + + for (BLASLONG k = 0; k < K; ++k) { + float32x4_t a_f16 = vcvt_f32_f16(vld1_f16(A)); + + float16x8_t b_f16 = vld1q_f16(B); + float32x4_t b_low = vcvt_f32_f16(vget_low_f16(b_f16)); + float32x4_t b_high = vcvt_f32_f16(vget_high_f16(b_f16)); + + float32_t b0_lane0 = vgetq_lane_f32(b_low, 0); + c0 = vfmaq_n_f32(c0, a_f16, b0_lane0); + + float32_t b0_lane1 = vgetq_lane_f32(b_low, 1); + c1 = vfmaq_n_f32(c1, a_f16, b0_lane1); + + float32_t b0_lane2 = vgetq_lane_f32(b_low, 2); + c2 = vfmaq_n_f32(c2, a_f16, b0_lane2); + + float32_t b0_lane3 = vgetq_lane_f32(b_low, 3); + c3 = vfmaq_n_f32(c3, a_f16, b0_lane3); + + float32_t b1_lane0 = vgetq_lane_f32(b_high, 0); + c4 = vfmaq_n_f32(c4, a_f16, b1_lane0); + + float32_t b1_lane1 = vgetq_lane_f32(b_high, 1); + c5 = vfmaq_n_f32(c5, a_f16, b1_lane1); + + float32_t b1_lane2 = vgetq_lane_f32(b_high, 2); + c6 = vfmaq_n_f32(c6, a_f16, b1_lane2); + + float32_t b1_lane3 = vgetq_lane_f32(b_high, 3); + c7 = vfmaq_n_f32(c7, a_f16, b1_lane3); + + A += 4; + B += 8; + } + + FLOAT *col_0 = C + 0 * ldc; + FLOAT *col_1 = C + 1 * ldc; + FLOAT *col_2 = C + 2 * ldc; + FLOAT *col_3 = C + 3 * ldc; + FLOAT *col_4 = C + 4 * ldc; + FLOAT *col_5 = C + 5 * ldc; + FLOAT *col_6 = C + 6 * ldc; + FLOAT *col_7 = C + 7 * ldc; + + float32x4_t t0 = vld1q_f32(col_0); + t0 = vaddq_f32(t0, vmulq_n_f32(c0, alpha)); + vst1q_f32(col_0, t0); + + float32x4_t t1 = vld1q_f32(col_1); + t1 = vaddq_f32(t1, vmulq_n_f32(c1, alpha)); + vst1q_f32(col_1, t1); + + float32x4_t t2 = vld1q_f32(col_2); + t2 = vaddq_f32(t2, vmulq_n_f32(c2, alpha)); + vst1q_f32(col_2, t2); + + float32x4_t t3 = vld1q_f32(col_3); + t3 = vaddq_f32(t3, vmulq_n_f32(c3, alpha)); + vst1q_f32(col_3, t3); + + float32x4_t t4 = vld1q_f32(col_4); + t4 = vaddq_f32(t4, vmulq_n_f32(c4, alpha)); + vst1q_f32(col_4, t4); + + float32x4_t t5 = vld1q_f32(col_5); + t5 = vaddq_f32(t5, vmulq_n_f32(c5, alpha)); + vst1q_f32(col_5, t5); + + float32x4_t t6 = vld1q_f32(col_6); + t6 = vaddq_f32(t6, vmulq_n_f32(c6, alpha)); + vst1q_f32(col_6, t6); + + float32x4_t t7 = vld1q_f32(col_7); + t7 = vaddq_f32(t7, vmulq_n_f32(c7, alpha)); + vst1q_f32(col_7, t7); +} + +static inline void kernel_2x8(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, BLASLONG ldc, FLOAT alpha) { + float32x2_t c0 = vdup_n_f32(0.0f); + float32x2_t c1 = vdup_n_f32(0.0f); + float32x2_t c2 = vdup_n_f32(0.0f); + float32x2_t c3 = vdup_n_f32(0.0f); + float32x2_t c4 = vdup_n_f32(0.0f); + float32x2_t c5 = vdup_n_f32(0.0f); + float32x2_t c6 = vdup_n_f32(0.0f); + float32x2_t c7 = vdup_n_f32(0.0f); + + for (BLASLONG k = 0; k < K; ++k) { + float32x4_t a_f32 = vcvt_f32_f16(vld1_f16(A)); + float32x2_t a_low = vget_low_f32(a_f32); + + float16x8_t b_f16 = vld1q_f16(B); + float32x4_t b_low = vcvt_f32_f16(vget_low_f16(b_f16)); + float32x4_t b_high = vcvt_f32_f16(vget_high_f16(b_f16)); + + float32_t b0_lane0 = vgetq_lane_f32(b_low, 0); + c0 = vfma_n_f32(c0, a_low, b0_lane0); + + float32_t b0_lane1 = vgetq_lane_f32(b_low, 1); + c1 = vfma_n_f32(c1, a_low, b0_lane1); + + float32_t b0_lane2 = vgetq_lane_f32(b_low, 2); + c2 = vfma_n_f32(c2, a_low, b0_lane2); + + float32_t b0_lane3 = vgetq_lane_f32(b_low, 3); + c3 = vfma_n_f32(c3, a_low, b0_lane3); + + float32_t b1_lane0 = vgetq_lane_f32(b_high, 0); + c4 = vfma_n_f32(c4, a_low, b1_lane0); + + float32_t b1_lane1 = vgetq_lane_f32(b_high, 1); + c5 = vfma_n_f32(c5, a_low, b1_lane1); + + float32_t b1_lane2 = vgetq_lane_f32(b_high, 2); + c6 = vfma_n_f32(c6, a_low, b1_lane2); + + float32_t b1_lane3 = vgetq_lane_f32(b_high, 3); + c7 = vfma_n_f32(c7, a_low, b1_lane3); + + A += 2; + B += 8; + } + + FLOAT *col_0 = C + 0 * ldc; + FLOAT *col_1 = C + 1 * ldc; + FLOAT *col_2 = C + 2 * ldc; + FLOAT *col_3 = C + 3 * ldc; + FLOAT *col_4 = C + 4 * ldc; + FLOAT *col_5 = C + 5 * ldc; + FLOAT *col_6 = C + 6 * ldc; + FLOAT *col_7 = C + 7 * ldc; + + float32x2_t t0 = vld1_f32(col_0); + t0 = vadd_f32(t0, vmul_n_f32(c0, alpha)); + vst1_f32(col_0, t0); + + float32x2_t t1 = vld1_f32(col_1); + t1 = vadd_f32(t1, vmul_n_f32(c1, alpha)); + vst1_f32(col_1, t1); + + float32x2_t t2 = vld1_f32(col_2); + t2 = vadd_f32(t2, vmul_n_f32(c2, alpha)); + vst1_f32(col_2, t2); + + float32x2_t t3 = vld1_f32(col_3); + t3 = vadd_f32(t3, vmul_n_f32(c3, alpha)); + vst1_f32(col_3, t3); + + float32x2_t t4 = vld1_f32(col_4); + t4 = vadd_f32(t4, vmul_n_f32(c4, alpha)); + vst1_f32(col_4, t4); + + float32x2_t t5 = vld1_f32(col_5); + t5 = vadd_f32(t5, vmul_n_f32(c5, alpha)); + vst1_f32(col_5, t5); + + float32x2_t t6 = vld1_f32(col_6); + t6 = vadd_f32(t6, vmul_n_f32(c6, alpha)); + vst1_f32(col_6, t6); + + float32x2_t t7 = vld1_f32(col_7); + t7 = vadd_f32(t7, vmul_n_f32(c7, alpha)); + vst1_f32(col_7, t7); +} + +static inline void kernel_1x8(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, BLASLONG ldc, FLOAT alpha) { + FLOAT c0 = 0, c1 = 0, c2 = 0, c3 = 0, c4 = 0, c5 = 0, c6 = 0, c7 = 0; + + for (BLASLONG k = 0; k < K; ++k) { + FLOAT a = A[0]; + c0 += a * B[0]; + c1 += a * B[1]; + c2 += a * B[2]; + c3 += a * B[3]; + c4 += a * B[4]; + c5 += a * B[5]; + c6 += a * B[6]; + c7 += a * B[7]; + + A += 1; + B += 8; + } + + C[0 * ldc] += alpha * c0; + C[1 * ldc] += alpha * c1; + C[2 * ldc] += alpha * c2; + C[3 * ldc] += alpha * c3; + C[4 * ldc] += alpha * c4; + C[5 * ldc] += alpha * c5; + C[6 * ldc] += alpha * c6; + C[7 * ldc] += alpha * c7; +} + +static inline void kernel_8x4(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, BLASLONG ldc, FLOAT alpha) { + float32x4_t c0_low = vdupq_n_f32(0.0f); + float32x4_t c0_high = vdupq_n_f32(0.0f); + float32x4_t c1_low = vdupq_n_f32(0.0f); + float32x4_t c1_high = vdupq_n_f32(0.0f); + float32x4_t c2_low = vdupq_n_f32(0.0f); + float32x4_t c2_high = vdupq_n_f32(0.0f); + float32x4_t c3_low = vdupq_n_f32(0.0f); + float32x4_t c3_high = vdupq_n_f32(0.0f); + + for (BLASLONG k = 0; k < K; ++k) { + float16x8_t a_f16 = vld1q_f16(A); + float32x4_t a_low = vcvt_f32_f16(vget_low_f16(a_f16)); + float32x4_t a_high = vcvt_f32_f16(vget_high_f16(a_f16)); + + float32x4_t b_f32 = vcvt_f32_f16(vld1_f16(B)); + + float32_t b0_lane0 = vgetq_lane_f32(b_f32, 0); + c0_low = vfmaq_n_f32(c0_low, a_low, b0_lane0); + c0_high = vfmaq_n_f32(c0_high, a_high, b0_lane0); + + float32_t b0_lane1 = vgetq_lane_f32(b_f32, 1); + c1_low = vfmaq_n_f32(c1_low, a_low, b0_lane1); + c1_high = vfmaq_n_f32(c1_high, a_high, b0_lane1); + + float32_t b0_lane2 = vgetq_lane_f32(b_f32, 2); + c2_low = vfmaq_n_f32(c2_low, a_low, b0_lane2); + c2_high = vfmaq_n_f32(c2_high, a_high, b0_lane2); + + float32_t b0_lane3 = vgetq_lane_f32(b_f32, 3); + c3_low = vfmaq_n_f32(c3_low, a_low, b0_lane3); + c3_high = vfmaq_n_f32(c3_high, a_high, b0_lane3); + + A += 8; + B += 4; + } + + FLOAT *col_0 = C + 0 * ldc; + FLOAT *col_1 = C + 1 * ldc; + FLOAT *col_2 = C + 2 * ldc; + FLOAT *col_3 = C + 3 * ldc; + + float32x4_t t0_l = vld1q_f32(col_0); + float32x4_t t0_h = vld1q_f32(col_0 + 4); + t0_l = vaddq_f32(t0_l, vmulq_n_f32(c0_low, alpha)); + t0_h = vaddq_f32(t0_h, vmulq_n_f32(c0_high, alpha)); + vst1q_f32(col_0, t0_l); + vst1q_f32(col_0 + 4, t0_h); + + float32x4_t t1_l = vld1q_f32(col_1); + float32x4_t t1_h = vld1q_f32(col_1 + 4); + t1_l = vaddq_f32(t1_l, vmulq_n_f32(c1_low, alpha)); + t1_h = vaddq_f32(t1_h, vmulq_n_f32(c1_high, alpha)); + vst1q_f32(col_1, t1_l); + vst1q_f32(col_1 + 4, t1_h); + + float32x4_t t2_l = vld1q_f32(col_2); + float32x4_t t2_h = vld1q_f32(col_2 + 4); + t2_l = vaddq_f32(t2_l, vmulq_n_f32(c2_low, alpha)); + t2_h = vaddq_f32(t2_h, vmulq_n_f32(c2_high, alpha)); + vst1q_f32(col_2, t2_l); + vst1q_f32(col_2 + 4, t2_h); + + float32x4_t t3_l = vld1q_f32(col_3); + float32x4_t t3_h = vld1q_f32(col_3 + 4); + t3_l = vaddq_f32(t3_l, vmulq_n_f32(c3_low, alpha)); + t3_h = vaddq_f32(t3_h, vmulq_n_f32(c3_high, alpha)); + vst1q_f32(col_3, t3_l); + vst1q_f32(col_3 + 4, t3_h); +} + +static inline void kernel_4x4(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, BLASLONG ldc, FLOAT alpha) { + float32x4_t c0 = vdupq_n_f32(0.0f); + float32x4_t c1 = vdupq_n_f32(0.0f); + float32x4_t c2 = vdupq_n_f32(0.0f); + float32x4_t c3 = vdupq_n_f32(0.0f); + + for (BLASLONG k = 0; k < K; ++k) { + float32x4_t a_f32 = vcvt_f32_f16(vld1_f16(A)); + float32x4_t b_f32 = vcvt_f32_f16(vld1_f16(B)); + + float32_t b0_lane0 = vgetq_lane_f32(b_f32, 0); + c0 = vfmaq_n_f32(c0, a_f32, b0_lane0); + + float32_t b0_lane1 = vgetq_lane_f32(b_f32, 1); + c1 = vfmaq_n_f32(c1, a_f32, b0_lane1); + + float32_t b0_lane2 = vgetq_lane_f32(b_f32, 2); + c2 = vfmaq_n_f32(c2, a_f32, b0_lane2); + + float32_t b0_lane3 = vgetq_lane_f32(b_f32, 3); + c3 = vfmaq_n_f32(c3, a_f32, b0_lane3); + + A += 4; + B += 4; + } + + FLOAT *col_0 = C + 0 * ldc; + FLOAT *col_1 = C + 1 * ldc; + FLOAT *col_2 = C + 2 * ldc; + FLOAT *col_3 = C + 3 * ldc; + + float32x4_t t0 = vld1q_f32(col_0); + t0 = vaddq_f32(t0, vmulq_n_f32(c0, alpha)); + vst1q_f32(col_0, t0); + + float32x4_t t1 = vld1q_f32(col_1); + t1 = vaddq_f32(t1, vmulq_n_f32(c1, alpha)); + vst1q_f32(col_1, t1); + + float32x4_t t2 = vld1q_f32(col_2); + t2 = vaddq_f32(t2, vmulq_n_f32(c2, alpha)); + vst1q_f32(col_2, t2); + + float32x4_t t3 = vld1q_f32(col_3); + t3 = vaddq_f32(t3, vmulq_n_f32(c3, alpha)); + vst1q_f32(col_3, t3); +} + +static inline void kernel_2x4(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, BLASLONG ldc, FLOAT alpha) { + float32x2_t c0 = vdup_n_f32(0.0f); + float32x2_t c1 = vdup_n_f32(0.0f); + float32x2_t c2 = vdup_n_f32(0.0f); + float32x2_t c3 = vdup_n_f32(0.0f); + + for (BLASLONG k = 0; k < K; ++k) { + float32x4_t a_f32 = vcvt_f32_f16(vld1_f16(A)); + float32x2_t a_low = vget_low_f32(a_f32); + + float32x4_t b_f32 = vcvt_f32_f16(vld1_f16(B)); + + float32_t b0_lane0 = vgetq_lane_f32(b_f32, 0); + c0 = vfma_n_f32(c0, a_low, b0_lane0); + + float32_t b0_lane1 = vgetq_lane_f32(b_f32, 1); + c1 = vfma_n_f32(c1, a_low, b0_lane1); + + float32_t b0_lane2 = vgetq_lane_f32(b_f32, 2); + c2 = vfma_n_f32(c2, a_low, b0_lane2); + + float32_t b0_lane3 = vgetq_lane_f32(b_f32, 3); + c3 = vfma_n_f32(c3, a_low, b0_lane3); + A += 2; + B += 4; + } + + FLOAT *col_0 = C + 0 * ldc; + FLOAT *col_1 = C + 1 * ldc; + FLOAT *col_2 = C + 2 * ldc; + FLOAT *col_3 = C + 3 * ldc; + + float32x2_t t0 = vld1_f32(col_0); + t0 = vadd_f32(t0, vmul_n_f32(c0, alpha)); + vst1_f32(col_0, t0); + + float32x2_t t1 = vld1_f32(col_1); + t1 = vadd_f32(t1, vmul_n_f32(c1, alpha)); + vst1_f32(col_1, t1); + + float32x2_t t2 = vld1_f32(col_2); + t2 = vadd_f32(t2, vmul_n_f32(c2, alpha)); + vst1_f32(col_2, t2); + + float32x2_t t3 = vld1_f32(col_3); + t3 = vadd_f32(t3, vmul_n_f32(c3, alpha)); + vst1_f32(col_3, t3); +} + +static inline void kernel_1x4(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, BLASLONG ldc, FLOAT alpha) { + FLOAT c0 = 0, c1 = 0, c2 = 0, c3 = 0; + for (BLASLONG k = 0; k < K; ++k) { + FLOAT a = A[0]; + c0 += a * B[0]; + c1 += a * B[1]; + c2 += a * B[2]; + c3 += a * B[3]; + + A += 1; + B += 4; + } + + C[0 * ldc] += alpha * c0; + C[1 * ldc] += alpha * c1; + C[2 * ldc] += alpha * c2; + C[3 * ldc] += alpha * c3; +} + +static inline void kernel_8x2(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, BLASLONG ldc, FLOAT alpha) { + float32x4_t c0_low = vdupq_n_f32(0.0f); + float32x4_t c0_high = vdupq_n_f32(0.0f); + float32x4_t c1_low = vdupq_n_f32(0.0f); + float32x4_t c1_high = vdupq_n_f32(0.0f); + + for (BLASLONG k = 0; k < K; ++k) { + float16x8_t a_f16 = vld1q_f16(A); + float32x4_t a_low = vcvt_f32_f16(vget_low_f16(a_f16)); + float32x4_t a_high = vcvt_f32_f16(vget_high_f16(a_f16)); + + float32x4_t b_f32 = vcvt_f32_f16(vld1_f16(B)); + + float32_t b0_lane0 = vgetq_lane_f32(b_f32, 0); + c0_low = vfmaq_n_f32(c0_low, a_low, b0_lane0); + c0_high = vfmaq_n_f32(c0_high, a_high, b0_lane0); + + float32_t b0_lane1 = vgetq_lane_f32(b_f32, 1); + c1_low = vfmaq_n_f32(c1_low, a_low, b0_lane1); + c1_high = vfmaq_n_f32(c1_high, a_high, b0_lane1); + + A += 8; + B += 2; + } + + FLOAT *col_0 = C + 0 * ldc; + FLOAT *col_1 = C + 1 * ldc; + + float32x4_t t0_l = vld1q_f32(col_0); + float32x4_t t0_h = vld1q_f32(col_0 + 4); + t0_l = vaddq_f32(t0_l, vmulq_n_f32(c0_low, alpha)); + t0_h = vaddq_f32(t0_h, vmulq_n_f32(c0_high, alpha)); + vst1q_f32(col_0, t0_l); + vst1q_f32(col_0 + 4, t0_h); + + float32x4_t t1_l = vld1q_f32(col_1); + float32x4_t t1_h = vld1q_f32(col_1 + 4); + t1_l = vaddq_f32(t1_l, vmulq_n_f32(c1_low, alpha)); + t1_h = vaddq_f32(t1_h, vmulq_n_f32(c1_high, alpha)); + vst1q_f32(col_1, t1_l); + vst1q_f32(col_1 + 4, t1_h); +} + +static inline void kernel_4x2(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, BLASLONG ldc, FLOAT alpha) { + float32x4_t c0 = vdupq_n_f32(0.0f); + float32x4_t c1 = vdupq_n_f32(0.0f); + + for (BLASLONG k = 0; k < K; ++k) { + float32x4_t a_f32 = vcvt_f32_f16(vld1_f16(A)); + float32x4_t b_f32 = vcvt_f32_f16(vld1_f16(B)); + + float32_t b0_lane0 = vgetq_lane_f32(b_f32, 0); + c0 = vfmaq_n_f32(c0, a_f32, b0_lane0); + + float32_t b0_lane1 = vgetq_lane_f32(b_f32, 1); + c1 = vfmaq_n_f32(c1, a_f32, b0_lane1); + + A += 4; + B += 2; + } + + FLOAT *col_0 = C + 0 * ldc; + FLOAT *col_1 = C + 1 * ldc; + + float32x4_t t0 = vld1q_f32(col_0); + t0 = vaddq_f32(t0, vmulq_n_f32(c0, alpha)); + vst1q_f32(col_0, t0); + + float32x4_t t1 = vld1q_f32(col_1); + t1 = vaddq_f32(t1, vmulq_n_f32(c1, alpha)); + vst1q_f32(col_1, t1); +} + +static inline void kernel_2x2(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, BLASLONG ldc, FLOAT alpha) { + float32x2_t c0 = vdup_n_f32(0.0f); + float32x2_t c1 = vdup_n_f32(0.0f); + + for (BLASLONG k = 0; k < K; ++k) { + float32x4_t a_f32 = vcvt_f32_f16(vld1_f16(A)); + float32x2_t a_low = vget_low_f32(a_f32); + + float32x4_t b_f32 = vcvt_f32_f16(vld1_f16(B)); + + float32_t b0_lane0 = vgetq_lane_f32(b_f32, 0); + c0 = vfma_n_f32(c0, a_low, b0_lane0); + + float32_t b0_lane1 = vgetq_lane_f32(b_f32, 1); + c1 = vfma_n_f32(c1, a_low, b0_lane1); + ; + + A += 2; + B += 2; + } + + FLOAT *col_0 = C + 0 * ldc; + FLOAT *col_1 = C + 1 * ldc; + + float32x2_t t0 = vld1_f32(col_0); + t0 = vadd_f32(t0, vmul_n_f32(c0, alpha)); + vst1_f32(col_0, t0); + + float32x2_t t1 = vld1_f32(col_1); + t1 = vadd_f32(t1, vmul_n_f32(c1, alpha)); + vst1_f32(col_1, t1); +} + +static inline void kernel_1x2(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, BLASLONG ldc, FLOAT alpha) { + FLOAT c0 = 0, c1 = 0; + for (BLASLONG k = 0; k < K; ++k) { + FLOAT a = A[0]; + c0 += a * B[0]; + c1 += a * B[1]; + + A += 1; + B += 2; + } + + C[0 * ldc] += alpha * c0; + C[1 * ldc] += alpha * c1; +} + +static inline void kernel_8x1(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, FLOAT alpha) { + float32x4_t c0_low = vdupq_n_f32(0.0f); + float32x4_t c0_high = vdupq_n_f32(0.0f); + + for (BLASLONG k = 0; k < K; ++k) { + float16x8_t a_f16 = vld1q_f16(A); + float32x4_t a_low = vcvt_f32_f16(vget_low_f16(a_f16)); + float32x4_t a_high = vcvt_f32_f16(vget_high_f16(a_f16)); + + float b_scalar = (float)B[0]; + + c0_low = vfmaq_n_f32(c0_low, a_low, b_scalar); + c0_high = vfmaq_n_f32(c0_high, a_high, b_scalar); + + A += 8; + B += 1; + } + + FLOAT *col_0 = C; + + float32x4_t t0_l = vld1q_f32(col_0); + float32x4_t t0_h = vld1q_f32(col_0 + 4); + t0_l = vaddq_f32(t0_l, vmulq_n_f32(c0_low, alpha)); + t0_h = vaddq_f32(t0_h, vmulq_n_f32(c0_high, alpha)); + vst1q_f32(col_0, t0_l); + vst1q_f32(col_0 + 4, t0_h); +} + +static inline void kernel_4x1(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, FLOAT alpha) { + float32x4_t c0 = vdupq_n_f32(0.0f); + + for (BLASLONG k = 0; k < K; ++k) { + float32x4_t a_f32 = vcvt_f32_f16(vld1_f16(A)); + float b_scalar = (float)B[0]; + c0 = vfmaq_n_f32(c0, a_f32, b_scalar); + + A += 4; + B += 1; + } + + FLOAT *col_0 = C; + float32x4_t t0 = vld1q_f32(col_0); + t0 = vaddq_f32(t0, vmulq_n_f32(c0, alpha)); + vst1q_f32(col_0, t0); +} + +static inline void kernel_2x1(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, FLOAT alpha) { + float32x2_t c0 = vdup_n_f32(0.0f); + + for (BLASLONG k = 0; k < K; ++k) { + float32x4_t a_f32 = vcvt_f32_f16(vld1_f16(A)); + float32x2_t a_low = vget_low_f32(a_f32); + + float b_scalar = (float)B[0]; + c0 = vfma_n_f32(c0, a_low, b_scalar); + + A += 2; + B += 1; + } + + FLOAT *col_0 = C; + float32x2_t t0 = vld1_f32(col_0); + t0 = vadd_f32(t0, vmul_n_f32(c0, alpha)); + vst1_f32(col_0, t0); +} + +static inline void kernel_1x1(BLASLONG K, const float16_t *A, const float16_t *B, FLOAT *C, FLOAT alpha) { + FLOAT sum = 0.0f; + for (BLASLONG k = 0; k < K; ++k) { + sum += A[0] * B[0]; + A += 1; + B += 1; + } + + C[0] += alpha * sum; +} + +int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, FLOAT *C, BLASLONG ldc) { + float16_t *A_base = (float16_t *)A; + float16_t *B_base = (float16_t *)B; + + FLOAT *Ccol = C; + BLASLONG m_rem1, m_rem2, m_rem3, m_rem4; + + while (N >= 8) { + const float16_t *Aptr = A_base; + const float16_t *Bptr = B_base; + FLOAT *Crow = Ccol; + + m_rem1 = M; + + while (m_rem1 >= 8) { + kernel_8x8(K, Aptr, Bptr, Crow, ldc, alpha); + Aptr += K * 8; + Crow += 8; + m_rem1 -= 8; + } + if (m_rem1 >= 4) { + kernel_4x8(K, Aptr, Bptr, Crow, ldc, alpha); + Aptr += K * 4; + Crow += 4; + m_rem1 -= 4; + } + if (m_rem1 >= 2) { + kernel_2x8(K, Aptr, Bptr, Crow, ldc, alpha); + Aptr += K * 2; + Crow += 2; + m_rem1 -= 2; + } + if (m_rem1 >= 1) { + kernel_1x8(K, Aptr, Bptr, Crow, ldc, alpha); + } + + B_base += K * 8; + Ccol += ldc * 8; + N -= 8; + } + + if (N >= 4) { + const float16_t *Aptr = A_base; + const float16_t *Bptr = B_base; + FLOAT *Crow = Ccol; + + m_rem2 = M; + while (m_rem2 >= 8) { + kernel_8x4(K, Aptr, Bptr, Crow, ldc, alpha); + Aptr += K * 8; + Crow += 8; + m_rem2 -= 8; + } + if (m_rem2 >= 4) { + kernel_4x4(K, Aptr, Bptr, Crow, ldc, alpha); + Aptr += K * 4; + Crow += 4; + m_rem2 -= 4; + } + if (m_rem2 >= 2) { + kernel_2x4(K, Aptr, Bptr, Crow, ldc, alpha); + Aptr += K * 2; + Crow += 2; + m_rem2 -= 2; + } + if (m_rem2 >= 1) { + kernel_1x4(K, Aptr, Bptr, Crow, ldc, alpha); + } + + B_base += K * 4; + Ccol += ldc * 4; + N -= 4; + } + + if (N >= 2) { + const float16_t *Aptr = A_base; + const float16_t *Bptr = B_base; + FLOAT *Crow = Ccol; + + m_rem3 = M; + while (m_rem3 >= 8) { + kernel_8x2(K, Aptr, Bptr, Crow, ldc, alpha); + Aptr += K * 8; + Crow += 8; + m_rem3 -= 8; + } + if (m_rem3 >= 4) { + kernel_4x2(K, Aptr, Bptr, Crow, ldc, alpha); + Aptr += K * 4; + Crow += 4; + m_rem3 -= 4; + } + if (m_rem3 >= 2) { + kernel_2x2(K, Aptr, Bptr, Crow, ldc, alpha); + Aptr += K * 2; + Crow += 2; + m_rem3 -= 2; + } + if (m_rem3 >= 1) { + kernel_1x2(K, Aptr, Bptr, Crow, ldc, alpha); + } + + B_base += K * 2; + Ccol += ldc * 2; + N -= 2; + } + + if (N >= 1) { + const float16_t *Aptr = A_base; + const float16_t *Bptr = B_base; + FLOAT *Crow = Ccol; + + m_rem4 = M; + while (m_rem4 >= 8) { + kernel_8x1(K, Aptr, Bptr, Crow, alpha); + Aptr += K * 8; + Crow += 8; + m_rem4 -= 8; + } + if (m_rem4 >= 4) { + kernel_4x1(K, Aptr, Bptr, Crow, alpha); + Aptr += K * 4; + Crow += 4; + m_rem4 -= 4; + } + if (m_rem4 >= 2) { + kernel_2x1(K, Aptr, Bptr, Crow, alpha); + Aptr += K * 2; + Crow += 2; + m_rem4 -= 2; + } + if (m_rem4 >= 1) { + kernel_1x1(K, Aptr, Bptr, Crow, alpha); + } + } + + return 0; +} \ No newline at end of file diff --git a/kernel/arm64/shgemm_ncopy_8_neoversen2.c b/kernel/arm64/shgemm_ncopy_8_neoversen2.c new file mode 100644 index 0000000000..3229a22f24 --- /dev/null +++ b/kernel/arm64/shgemm_ncopy_8_neoversen2.c @@ -0,0 +1,258 @@ +/*************************************************************************** + * Copyright (c) 2026, The OpenBLAS Project + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of the OpenBLAS project nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * *****************************************************************************/ + +#include + +#include "common.h" + +static inline void transpose8x8(float16x8_t *rows, float16x8_t *cols) { + float64x2_t b0 = vtrn1q_f64(vreinterpretq_f64_f16(rows[0]), vreinterpretq_f64_f16(rows[4])); + float64x2_t b1 = vtrn1q_f64(vreinterpretq_f64_f16(rows[1]), vreinterpretq_f64_f16(rows[5])); + float64x2_t b2 = vtrn1q_f64(vreinterpretq_f64_f16(rows[2]), vreinterpretq_f64_f16(rows[6])); + float64x2_t b3 = vtrn1q_f64(vreinterpretq_f64_f16(rows[3]), vreinterpretq_f64_f16(rows[7])); + float64x2_t b4 = vtrn2q_f64(vreinterpretq_f64_f16(rows[0]), vreinterpretq_f64_f16(rows[4])); + float64x2_t b5 = vtrn2q_f64(vreinterpretq_f64_f16(rows[1]), vreinterpretq_f64_f16(rows[5])); + float64x2_t b6 = vtrn2q_f64(vreinterpretq_f64_f16(rows[2]), vreinterpretq_f64_f16(rows[6])); + float64x2_t b7 = vtrn2q_f64(vreinterpretq_f64_f16(rows[3]), vreinterpretq_f64_f16(rows[7])); + + float32x4_t c0 = vtrn1q_f32(vreinterpretq_f32_f64(b0), vreinterpretq_f32_f64(b2)); + float32x4_t c1 = vtrn1q_f32(vreinterpretq_f32_f64(b1), vreinterpretq_f32_f64(b3)); + float32x4_t c2 = vtrn2q_f32(vreinterpretq_f32_f64(b0), vreinterpretq_f32_f64(b2)); + float32x4_t c3 = vtrn2q_f32(vreinterpretq_f32_f64(b1), vreinterpretq_f32_f64(b3)); + float32x4_t c4 = vtrn1q_f32(vreinterpretq_f32_f64(b4), vreinterpretq_f32_f64(b6)); + float32x4_t c5 = vtrn1q_f32(vreinterpretq_f32_f64(b5), vreinterpretq_f32_f64(b7)); + float32x4_t c6 = vtrn2q_f32(vreinterpretq_f32_f64(b4), vreinterpretq_f32_f64(b6)); + float32x4_t c7 = vtrn2q_f32(vreinterpretq_f32_f64(b5), vreinterpretq_f32_f64(b7)); + + float16x8_t d0 = vtrn1q_f16(vreinterpretq_f16_f32(c0), vreinterpretq_f16_f32(c1)); + float16x8_t d1 = vtrn2q_f16(vreinterpretq_f16_f32(c0), vreinterpretq_f16_f32(c1)); + float16x8_t d2 = vtrn1q_f16(vreinterpretq_f16_f32(c2), vreinterpretq_f16_f32(c3)); + float16x8_t d3 = vtrn2q_f16(vreinterpretq_f16_f32(c2), vreinterpretq_f16_f32(c3)); + float16x8_t d4 = vtrn1q_f16(vreinterpretq_f16_f32(c4), vreinterpretq_f16_f32(c5)); + float16x8_t d5 = vtrn2q_f16(vreinterpretq_f16_f32(c4), vreinterpretq_f16_f32(c5)); + float16x8_t d6 = vtrn1q_f16(vreinterpretq_f16_f32(c6), vreinterpretq_f16_f32(c7)); + float16x8_t d7 = vtrn2q_f16(vreinterpretq_f16_f32(c6), vreinterpretq_f16_f32(c7)); + + cols[0] = d0; + cols[1] = d1; + cols[2] = d2; + cols[3] = d3; + cols[4] = d4; + cols[5] = d5; + cols[6] = d6; + cols[7] = d7; +} + +static inline void transpose_4x4(float16x4_t *rows, float16x4_t *cols) { + float16x8_t t0 = vcombine_f16(rows[0], vdup_n_f16(0.0f)); + float16x8_t t1 = vcombine_f16(rows[1], vdup_n_f16(0.0f)); + float16x8_t t2 = vcombine_f16(rows[2], vdup_n_f16(0.0f)); + float16x8_t t3 = vcombine_f16(rows[3], vdup_n_f16(0.0f)); + + float16x8_t t02 = vzip1q_f16(t0, t2); + float16x8_t t13 = vzip1q_f16(t1, t3); + + float16x8x2_t t0123 = vzipq_f16(t02, t13); + + cols[0] = vget_low_f16(t0123.val[0]); + cols[1] = vget_high_f16(t0123.val[0]); + cols[2] = vget_low_f16(t0123.val[1]); + cols[3] = vget_high_f16(t0123.val[1]); +} + +int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b) { + BLASLONG i, j; + IFLOAT *a_offset = a; + IFLOAT *b_offset = b; + + float16x8_t v0, v1, v2, v3, v4, v5, v6, v7; + float16x4_t v8, v9, v10, v11; + + BLASLONG n8 = n >> 3; + + for (j = 0; j < n8; j++) { + IFLOAT *a0 = a_offset; + IFLOAT *a1 = a0 + lda; + IFLOAT *a2 = a1 + lda; + IFLOAT *a3 = a2 + lda; + IFLOAT *a4 = a3 + lda; + IFLOAT *a5 = a4 + lda; + IFLOAT *a6 = a5 + lda; + IFLOAT *a7 = a6 + lda; + a_offset += 8 * lda; + + BLASLONG m8 = m >> 3; + for (i = 0; i < m8; i++) { + v0 = vld1q_f16((float16_t *)a0); + v1 = vld1q_f16((float16_t *)a1); + v2 = vld1q_f16((float16_t *)a2); + v3 = vld1q_f16((float16_t *)a3); + v4 = vld1q_f16((float16_t *)a4); + v5 = vld1q_f16((float16_t *)a5); + v6 = vld1q_f16((float16_t *)a6); + v7 = vld1q_f16((float16_t *)a7); + + float16x8_t rows[8] = {v0, v1, v2, v3, v4, v5, v6, v7}; + float16x8_t cols[8]; + transpose8x8(rows, cols); + + vst1q_f16((float16_t *)b_offset, cols[0]); + vst1q_f16((float16_t *)b_offset + 8, cols[1]); + vst1q_f16((float16_t *)b_offset + 16, cols[2]); + vst1q_f16((float16_t *)b_offset + 24, cols[3]); + vst1q_f16((float16_t *)b_offset + 32, cols[4]); + vst1q_f16((float16_t *)b_offset + 40, cols[5]); + vst1q_f16((float16_t *)b_offset + 48, cols[6]); + vst1q_f16((float16_t *)b_offset + 56, cols[7]); + + a0 += 8; + a1 += 8; + a2 += 8; + a3 += 8; + a4 += 8; + a5 += 8; + a6 += 8; + a7 += 8; + b_offset += 64; + } + + BLASLONG i = (m & 7); + if (i > 0) { + for (BLASLONG k = 0; k < i; k++) { + *(b_offset + 0) = *a0; + *(b_offset + 1) = *a1; + *(b_offset + 2) = *a2; + *(b_offset + 3) = *a3; + *(b_offset + 4) = *a4; + *(b_offset + 5) = *a5; + *(b_offset + 6) = *a6; + *(b_offset + 7) = *a7; + + a0++; + a1++; + a2++; + a3++; + a4++; + a5++; + a6++; + a7++; + + b_offset += 8; + } + } + } + + if (n & 4) { + IFLOAT *a0 = a_offset; + IFLOAT *a1 = a0 + lda; + IFLOAT *a2 = a1 + lda; + IFLOAT *a3 = a2 + lda; + a_offset += 4 * lda; + + BLASLONG m4 = m >> 2; + for (i = 0; i < m4; i++) { + v8 = vld1_f16((float16_t *)a0); + v9 = vld1_f16((float16_t *)a1); + v10 = vld1_f16((float16_t *)a2); + v11 = vld1_f16((float16_t *)a3); + + float16x4_t rows[4] = {v8, v9, v10, v11}; + float16x4_t cols[4]; + transpose_4x4(rows, cols); + + vst1_f16((float16_t *)b_offset, cols[0]); + vst1_f16((float16_t *)b_offset + 4, cols[1]); + vst1_f16((float16_t *)b_offset + 8, cols[2]); + vst1_f16((float16_t *)b_offset + 12, cols[3]); + + a0 += 4; + a1 += 4; + a2 += 4; + a3 += 4; + b_offset += 16; + } + + BLASLONG i = (m & 3); + if (i > 0) { + for (BLASLONG k = 0; k < i; k++) { + *(b_offset + 0) = *a0; + *(b_offset + 1) = *a1; + *(b_offset + 2) = *a2; + *(b_offset + 3) = *a3; + + a0++; + a1++; + a2++; + a3++; + + b_offset += 4; + } + } + } + + if (n & 2) { + IFLOAT *a0 = a_offset; + IFLOAT *a1 = a0 + lda; + a_offset += 2 * lda; + + BLASLONG m2 = m >> 1; + for (i = 0; i < m2; i++) { + + v8 = vld1_f16((float16_t *)a0); + v9 = vld1_f16((float16_t *)a1); + + float16_t col0[2] = {vget_lane_f16(v8, 0), vget_lane_f16(v9, 0)}; + float16_t col1[2] = {vget_lane_f16(v8, 1), vget_lane_f16(v9, 1)}; + + b_offset[0] = col0[0]; + b_offset[1] = col0[1]; + b_offset[2] = col1[0]; + b_offset[3] = col1[1]; + + a0 += 2; + a1 += 2; + b_offset += 4; + } + + if (m & 1) { + b_offset[0] = *a0; + b_offset[1] = *a1; + b_offset += 2; + } + } + + if (n & 1) { + IFLOAT *a0 = a_offset; + for (i = 0; i < m; i++) { + *b_offset++ = *a0; + a0++; + } + } + + return 0; +} \ No newline at end of file diff --git a/kernel/arm64/shgemm_tcopy_8_neoversen2.c b/kernel/arm64/shgemm_tcopy_8_neoversen2.c new file mode 100644 index 0000000000..275abf124f --- /dev/null +++ b/kernel/arm64/shgemm_tcopy_8_neoversen2.c @@ -0,0 +1,87 @@ +/*************************************************************************** + * Copyright (c) 2026, The OpenBLAS Project + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of the OpenBLAS project nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * *****************************************************************************/ + +#include + +#include "common.h" + +int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b) { + BLASLONG i, j; + IFLOAT *aoffset, *aoffset1; + IFLOAT *boffset, *boffset1; + IFLOAT *boffset2, *boffset3, *boffset4; + + aoffset = a; + boffset = b; + + boffset2 = b + m * (n & ~7); + boffset3 = b + m * (n & ~3); + boffset4 = b + m * (n & ~1); + + svbool_t pg8 = svwhilelt_b16(0, 8); + svbool_t pg4 = svwhilelt_b16(0, 4); + + for (j = 0; j < m; j++) { + aoffset1 = aoffset; + boffset1 = boffset; + + aoffset += lda; + boffset += 8; + + for (i = 0; i < (n >> 3); i++) { + svfloat16_t v0 = svld1_f16(pg8, (float16_t *)aoffset1); + svst1_f16(pg8, (float16_t *)boffset1, v0); + + aoffset1 += 8; + boffset1 += 8 * m; + } + + if (n & 4) { + svfloat16_t v0 = svld1_f16(pg4, (float16_t *)aoffset1); + svst1_f16(pg4, (float16_t *)boffset2, v0); + + aoffset1 += 4; + boffset2 += 4; + } + + if (n & 2) { + boffset3[0] = aoffset1[0]; + boffset3[1] = aoffset1[1]; + aoffset1 += 2; + boffset3 += 2; + } + + if (n & 1) { + boffset4[0] = aoffset1[0]; + aoffset1 += 1; + boffset4 += 1; + } + } + + return 0; +} \ No newline at end of file diff --git a/kernel/generic/gemmkernel_2x2.c b/kernel/generic/gemmkernel_2x2.c index 07da2cbc87..94dcaea5f3 100644 --- a/kernel/generic/gemmkernel_2x2.c +++ b/kernel/generic/gemmkernel_2x2.c @@ -30,6 +30,12 @@ #include "conversion_macros.h" +#ifdef BGEMM +#define C_TO_F32 TO_F32 +#else +#define C_TO_F32 +#endif + int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,IFLOAT* ba,IFLOAT* bb,FLOAT* C,BLASLONG ldc #ifdef TRMMKERNEL ,BLASLONG offset @@ -108,13 +114,13 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,IFLOAT* ba,IFLOAT* bb, ptrbb = ptrbb+2; } res0 = res0*ALPHA; - C0[0] = TO_OUTPUT(TO_F32(C0[0])+res0); + C0[0] = TO_OUTPUT(C_TO_F32(C0[0])+res0); res1 = res1*ALPHA; - C0[1] = TO_OUTPUT(TO_F32(C0[1])+res1); + C0[1] = TO_OUTPUT(C_TO_F32(C0[1])+res1); res2 = res2*ALPHA; - C1[0] = TO_OUTPUT(TO_F32(C1[0])+res2); + C1[0] = TO_OUTPUT(C_TO_F32(C1[0])+res2); res3 = res3*ALPHA; - C1[1] = TO_OUTPUT(TO_F32(C1[1])+res3); + C1[1] = TO_OUTPUT(C_TO_F32(C1[1])+res3); C0 = C0+2; C1 = C1+2; } @@ -134,9 +140,9 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,IFLOAT* ba,IFLOAT* bb, ptrbb = ptrbb+2; } res0 = res0*ALPHA; - C0[0] = TO_OUTPUT(TO_F32(C0[0])+res0); + C0[0] = TO_OUTPUT(C_TO_F32(C0[0])+res0); res1 = res1*ALPHA; - C1[0] = TO_OUTPUT(TO_F32(C1[0])+res1); + C1[0] = TO_OUTPUT(C_TO_F32(C1[0])+res1); C0 = C0+1; C1 = C1+1; } @@ -165,9 +171,9 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,IFLOAT* ba,IFLOAT* bb, ptrbb = ptrbb+1; } res0 = res0*ALPHA; - C0[0] = TO_OUTPUT(TO_F32(C0[0])+res0); + C0[0] = TO_OUTPUT(C_TO_F32(C0[0])+res0); res1 = res1*ALPHA; - C0[1] = TO_OUTPUT(TO_F32(C0[1])+res1); + C0[1] = TO_OUTPUT(C_TO_F32(C0[1])+res1); C0 = C0+2; } for (i=0; i<(bm&1); i+=1) @@ -183,7 +189,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,IFLOAT* ba,IFLOAT* bb, ptrbb = ptrbb+1; } res0 = res0*ALPHA; - C0[0] = TO_OUTPUT(TO_F32(C0[0])+res0); + C0[0] = TO_OUTPUT(C_TO_F32(C0[0])+res0); C0 = C0+1; } k = (bk<<0); diff --git a/kernel/mips/KERNEL b/kernel/mips/KERNEL index a6ad0bf028..1a11e0e920 100644 --- a/kernel/mips/KERNEL +++ b/kernel/mips/KERNEL @@ -1,17 +1,17 @@ ifndef SNRM2KERNEL -SNRM2KERNEL = nrm2.c +SNRM2KERNEL = ../arm/nrm2.c endif ifndef DNRM2KERNEL -DNRM2KERNEL = nrm2.c +DNRM2KERNEL = ../arm/nrm2.c endif ifndef CNRM2KERNEL -CNRM2KERNEL = znrm2.c +CNRM2KERNEL = ../arm/znrm2.c endif ifndef ZNRM2KERNEL -ZNRM2KERNEL = znrm2.c +ZNRM2KERNEL = ../arm/znrm2.c endif ifndef SCABS_KERNEL diff --git a/kernel/mips/KERNEL.P5600 b/kernel/mips/KERNEL.P5600 index c37b88adbe..72400f79eb 100644 --- a/kernel/mips/KERNEL.P5600 +++ b/kernel/mips/KERNEL.P5600 @@ -1,50 +1,13 @@ -SAMAXKERNEL = ../mips/amax.c -DAMAXKERNEL = ../mips/amax.c -CAMAXKERNEL = ../mips/zamax.c -ZAMAXKERNEL = ../mips/zamax.c - -SAMINKERNEL = ../mips/amin.c -DAMINKERNEL = ../mips/amin.c -CAMINKERNEL = ../mips/zamin.c -ZAMINKERNEL = ../mips/zamin.c - -SMAXKERNEL = ../mips/max.c -DMAXKERNEL = ../mips/max.c - -SMINKERNEL = ../mips/min.c -DMINKERNEL = ../mips/min.c - -ISAMAXKERNEL = ../mips/iamax.c -IDAMAXKERNEL = ../mips/iamax.c -ICAMAXKERNEL = ../mips/izamax.c -IZAMAXKERNEL = ../mips/izamax.c - -ISAMINKERNEL = ../mips/iamin.c -IDAMINKERNEL = ../mips/iamin.c -ICAMINKERNEL = ../mips/izamin.c -IZAMINKERNEL = ../mips/izamin.c - -ISMAXKERNEL = ../mips/imax.c -IDMAXKERNEL = ../mips/imax.c - -ISMINKERNEL = ../mips/imin.c -IDMINKERNEL = ../mips/imin.c - -SSUMKERNEL = ../mips/sum.c -DSUMKERNEL = ../mips/sum.c -CSUMKERNEL = ../mips/zsum.c -ZSUMKERNEL = ../mips/zsum.c - ifndef NO_MSA SASUMKERNEL = ../mips/sasum_msa.c DASUMKERNEL = ../mips/dasum_msa.c CASUMKERNEL = ../mips/casum_msa.c ZASUMKERNEL = ../mips/zasum_msa.c else -SASUMKERNEL = ../mips/asum.c -DASUMKERNEL = ../mips/asum.c -CASUMKERNEL = ../mips/zasum.c -ZASUMKERNEL = ../mips/zasum.c +SASUMKERNEL = ../arm/asum.c +DASUMKERNEL = ../arm/asum.c +CASUMKERNEL = ../arm/zasum.c +ZASUMKERNEL = ../arm/zasum.c endif ifndef NO_MSA @@ -53,10 +16,10 @@ DAXPYKERNEL = ../mips/daxpy_msa.c CAXPYKERNEL = ../mips/caxpy_msa.c ZAXPYKERNEL = ../mips/zaxpy_msa.c else -SAXPYKERNEL = ../mips/axpy.c -DAXPYKERNEL = ../mips/axpy.c -CAXPYKERNEL = ../mips/zaxpy.c -ZAXPYKERNEL = ../mips/zaxpy.c +SAXPYKERNEL = ../arm/axpy.c +DAXPYKERNEL = ../arm/axpy.c +CAXPYKERNEL = ../arm/zaxpy.c +ZAXPYKERNEL = ../arm/zaxpy.c endif ifndef NO_MSA @@ -65,10 +28,10 @@ DCOPYKERNEL = ../mips/dcopy_msa.c CCOPYKERNEL = ../mips/ccopy_msa.c ZCOPYKERNEL = ../mips/zcopy_msa.c else -SCOPYKERNEL = ../mips/copy.c -DCOPYKERNEL = ../mips/copy.c -CCOPYKERNEL = ../mips/zcopy.c -ZCOPYKERNEL = ../mips/zcopy.c +SCOPYKERNEL = ../arm/copy.c +DCOPYKERNEL = ../arm/copy.c +CCOPYKERNEL = ../arm/zcopy.c +ZCOPYKERNEL = ../arm/zcopy.c endif ifndef NO_MSA @@ -77,16 +40,16 @@ DDOTKERNEL = ../mips/ddot_msa.c CDOTKERNEL = ../mips/cdot_msa.c ZDOTKERNEL = ../mips/zdot_msa.c else -SDOTKERNEL = ../mips/dot.c -DDOTKERNEL = ../mips/dot.c -CDOTKERNEL = ../mips/zdot.c -ZDOTKERNEL = ../mips/zdot.c +SDOTKERNEL = ../arm/dot.c +DDOTKERNEL = ../arm/dot.c +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c endif -SNRM2KERNEL = ../mips/nrm2.c -DNRM2KERNEL = ../mips/nrm2.c -CNRM2KERNEL = ../mips/znrm2.c -ZNRM2KERNEL = ../mips/znrm2.c +SNRM2KERNEL = ../arm/nrm2.c +DNRM2KERNEL = ../arm/nrm2.c +CNRM2KERNEL = ../arm/znrm2.c +ZNRM2KERNEL = ../arm/znrm2.c ifndef NO_MSA SROTKERNEL = ../mips/srot_msa.c @@ -94,10 +57,10 @@ DROTKERNEL = ../mips/drot_msa.c CROTKERNEL = ../mips/crot_msa.c ZROTKERNEL = ../mips/zrot_msa.c else -SROTKERNEL = ../mips/rot.c -DROTKERNEL = ../mips/rot.c -CROTKERNEL = ../mips/zrot.c -ZROTKERNEL = ../mips/zrot.c +SROTKERNEL = ../arm/rot.c +DROTKERNEL = ../arm/rot.c +CROTKERNEL = ../arm/zrot.c +ZROTKERNEL = ../arm/zrot.c endif ifndef NO_MSA @@ -105,13 +68,13 @@ SSCALKERNEL = ../mips/sscal_msa.c DSCALKERNEL = ../mips/dscal_msa.c #CSCALKERNEL = ../mips/cscal_msa.c #ZSCALKERNEL = ../mips/zscal_msa.c -CSCALKERNEL = ../mips/zscal.c -ZSCALKERNEL = ../mips/zscal.c +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c else -SSCALKERNEL = ../mips/scal.c -DSCALKERNEL = ../mips/scal.c -CSCALKERNEL = ../mips/zscal.c -ZSCALKERNEL = ../mips/zscal.c +SSCALKERNEL = ../arm/scal.c +DSCALKERNEL = ../arm/scal.c +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c endif ifndef NO_MSA @@ -120,10 +83,10 @@ DSWAPKERNEL = ../mips/dswap_msa.c CSWAPKERNEL = ../mips/cswap_msa.c ZSWAPKERNEL = ../mips/zswap_msa.c else -SSWAPKERNEL = ../mips/swap.c -DSWAPKERNEL = ../mips/swap.c -CSWAPKERNEL = ../mips/zswap.c -ZSWAPKERNEL = ../mips/zswap.c +SSWAPKERNEL = ../arm/swap.c +DSWAPKERNEL = ../arm/swap.c +CSWAPKERNEL = ../arm/zswap.c +ZSWAPKERNEL = ../arm/zswap.c endif ifndef NO_MSA @@ -132,10 +95,10 @@ DGEMVNKERNEL = ../mips/dgemv_n_msa.c CGEMVNKERNEL = ../mips/cgemv_n_msa.c ZGEMVNKERNEL = ../mips/zgemv_n_msa.c else -SGEMVNKERNEL = ../mips/gemv_n.c -DGEMVNKERNEL = ../mips/gemv_n.c -CGEMVNKERNEL = ../mips/zgemv_n.c -ZGEMVNKERNEL = ../mips/zgemv_n.c +SGEMVNKERNEL = ../arm/gemv_n.c +DGEMVNKERNEL = ../arm/gemv_n.c +CGEMVNKERNEL = ../arm/zgemv_n.c +ZGEMVNKERNEL = ../arm/zgemv_n.c endif ifndef NO_MSA @@ -144,25 +107,24 @@ DGEMVTKERNEL = ../mips/dgemv_t_msa.c CGEMVTKERNEL = ../mips/cgemv_t_msa.c ZGEMVTKERNEL = ../mips/zgemv_t_msa.c else -SGEMVTKERNEL = ../mips/gemv_t.c -DGEMVTKERNEL = ../mips/gemv_t.c -CGEMVTKERNEL = ../mips/zgemv_t.c -ZGEMVTKERNEL = ../mips/zgemv_t.c +SGEMVTKERNEL = ../arm/gemv_t.c +DGEMVTKERNEL = ../arm/gemv_t.c +CGEMVTKERNEL = ../arm/zgemv_t.c +ZGEMVTKERNEL = ../arm/zgemv_t.c endif ifndef NO_MSA SGEMMKERNEL = ../mips/sgemm_kernel_8x8_msa.c SGEMMONCOPY = ../mips/sgemm_ncopy_8_msa.c SGEMMOTCOPY = ../mips/sgemm_tcopy_8_msa.c -SGEMMONCOPYOBJ = sgemm_oncopy.o -SGEMMOTCOPYOBJ = sgemm_otcopy.o else SGEMMKERNEL = ../generic/gemmkernel_2x2.c SGEMMONCOPY = ../generic/gemm_ncopy_2.c SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +endif + SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMOTCOPYOBJ = sgemm_otcopy.o -endif ifndef NO_MSA DGEMMKERNEL = ../mips/dgemm_kernel_8x4_msa.c @@ -172,15 +134,14 @@ DGEMMONCOPY = ../mips/dgemm_ncopy_4_msa.c DGEMMOTCOPY = ../mips/dgemm_tcopy_4_msa.c DGEMMINCOPYOBJ = dgemm_incopy.o DGEMMITCOPYOBJ = dgemm_itcopy.o -DGEMMONCOPYOBJ = dgemm_oncopy.o -DGEMMOTCOPYOBJ = dgemm_otcopy.o else DGEMMKERNEL = ../generic/gemmkernel_2x2.c DGEMMONCOPY = ../generic/gemm_ncopy_2.c DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +endif + DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMOTCOPYOBJ = dgemm_otcopy.o -endif ifndef NO_MSA CGEMMKERNEL = ../mips/cgemm_kernel_8x4_msa.c @@ -190,29 +151,27 @@ CGEMMONCOPY = ../mips/cgemm_ncopy_4_msa.c CGEMMOTCOPY = ../mips/cgemm_tcopy_4_msa.c CGEMMINCOPYOBJ = cgemm_incopy.o CGEMMITCOPYOBJ = cgemm_itcopy.o -CGEMMONCOPYOBJ = cgemm_oncopy.o -CGEMMOTCOPYOBJ = cgemm_otcopy.o else CGEMMKERNEL = ../generic/zgemmkernel_2x2.c CGEMMONCOPY = ../generic/zgemm_ncopy_2.c CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +endif + CGEMMONCOPYOBJ = cgemm_oncopy.o CGEMMOTCOPYOBJ = cgemm_otcopy.o -endif ifndef NO_MSA ZGEMMKERNEL = ../mips/zgemm_kernel_4x4_msa.c ZGEMMONCOPY = ../mips/zgemm_ncopy_4_msa.c ZGEMMOTCOPY = ../mips/zgemm_tcopy_4_msa.c -ZGEMMONCOPYOBJ = zgemm_oncopy.o -ZGEMMOTCOPYOBJ = zgemm_otcopy.o else ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +endif + ZGEMMONCOPYOBJ = zgemm_oncopy.o ZGEMMOTCOPYOBJ = zgemm_otcopy.o -endif ifndef NO_MSA STRSMKERNEL_LN = ../mips/strsm_kernel_LN_8x8_msa.c @@ -261,3 +220,41 @@ ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c endif + +#Pure C for other kernels +SAMAXKERNEL = ../arm/amax.c +DAMAXKERNEL = ../arm/amax.c +CAMAXKERNEL = ../arm/zamax.c +ZAMAXKERNEL = ../arm/zamax.c + +SAMINKERNEL = ../arm/amin.c +DAMINKERNEL = ../arm/amin.c +CAMINKERNEL = ../arm/zamin.c +ZAMINKERNEL = ../arm/zamin.c + +SMAXKERNEL = ../arm/max.c +DMAXKERNEL = ../arm/max.c + +SMINKERNEL = ../arm/min.c +DMINKERNEL = ../arm/min.c + +ISAMAXKERNEL = ../arm/iamax.c +IDAMAXKERNEL = ../arm/iamax.c +ICAMAXKERNEL = ../arm/izamax.c +IZAMAXKERNEL = ../arm/izamax.c + +ISAMINKERNEL = ../arm/iamin.c +IDAMINKERNEL = ../arm/iamin.c +ICAMINKERNEL = ../arm/izamin.c +IZAMINKERNEL = ../arm/izamin.c + +ISMAXKERNEL = ../arm/imax.c +IDMAXKERNEL = ../arm/imax.c + +ISMINKERNEL = ../arm/imin.c +IDMINKERNEL = ../arm/imin.c + +SSUMKERNEL = ../arm/sum.c +DSUMKERNEL = ../arm/sum.c +CSUMKERNEL = ../arm/zsum.c +ZSUMKERNEL = ../arm/zsum.c diff --git a/kernel/mips/KERNEL.generic b/kernel/mips/KERNEL.generic index 1f03c65942..f246811c34 100644 --- a/kernel/mips/KERNEL.generic +++ b/kernel/mips/KERNEL.generic @@ -53,92 +53,92 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c #Pure C for other kernels -SAMAXKERNEL = ../mips/amax.c -DAMAXKERNEL = ../mips/amax.c -CAMAXKERNEL = ../mips/zamax.c -ZAMAXKERNEL = ../mips/zamax.c - -SAMINKERNEL = ../mips/amin.c -DAMINKERNEL = ../mips/amin.c -CAMINKERNEL = ../mips/zamin.c -ZAMINKERNEL = ../mips/zamin.c - -SMAXKERNEL = ../mips/max.c -DMAXKERNEL = ../mips/max.c - -SMINKERNEL = ../mips/min.c -DMINKERNEL = ../mips/min.c - -ISAMAXKERNEL = ../mips/iamax.c -IDAMAXKERNEL = ../mips/iamax.c -ICAMAXKERNEL = ../mips/izamax.c -IZAMAXKERNEL = ../mips/izamax.c - -ISAMINKERNEL = ../mips/iamin.c -IDAMINKERNEL = ../mips/iamin.c -ICAMINKERNEL = ../mips/izamin.c -IZAMINKERNEL = ../mips/izamin.c - -ISMAXKERNEL = ../mips/imax.c -IDMAXKERNEL = ../mips/imax.c - -ISMINKERNEL = ../mips/imin.c -IDMINKERNEL = ../mips/imin.c - -SASUMKERNEL = ../mips/asum.c -DASUMKERNEL = ../mips/asum.c -CASUMKERNEL = ../mips/zasum.c -ZASUMKERNEL = ../mips/zasum.c - -SSUMKERNEL = ../mips/sum.c -DSUMKERNEL = ../mips/sum.c -CSUMKERNEL = ../mips/zsum.c -ZSUMKERNEL = ../mips/zsum.c - -SAXPYKERNEL = ../mips/axpy.c -DAXPYKERNEL = ../mips/axpy.c -CAXPYKERNEL = ../mips/zaxpy.c -ZAXPYKERNEL = ../mips/zaxpy.c - -SCOPYKERNEL = ../mips/copy.c -DCOPYKERNEL = ../mips/copy.c -CCOPYKERNEL = ../mips/zcopy.c -ZCOPYKERNEL = ../mips/zcopy.c - -SDOTKERNEL = ../mips/dot.c -DDOTKERNEL = ../mips/dot.c -CDOTKERNEL = ../mips/zdot.c -ZDOTKERNEL = ../mips/zdot.c - -SNRM2KERNEL = ../mips/nrm2.c -DNRM2KERNEL = ../mips/nrm2.c -CNRM2KERNEL = ../mips/znrm2.c -ZNRM2KERNEL = ../mips/znrm2.c - -SROTKERNEL = ../mips/rot.c -DROTKERNEL = ../mips/rot.c -CROTKERNEL = ../mips/zrot.c -ZROTKERNEL = ../mips/zrot.c - -SSCALKERNEL = ../mips/scal.c -DSCALKERNEL = ../mips/scal.c -CSCALKERNEL = ../mips/zscal.c -ZSCALKERNEL = ../mips/zscal.c - -SSWAPKERNEL = ../mips/swap.c -DSWAPKERNEL = ../mips/swap.c -CSWAPKERNEL = ../mips/zswap.c -ZSWAPKERNEL = ../mips/zswap.c - -SGEMVNKERNEL = ../mips/gemv_n.c -DGEMVNKERNEL = ../mips/gemv_n.c -CGEMVNKERNEL = ../mips/zgemv_n.c -ZGEMVNKERNEL = ../mips/zgemv_n.c - -SGEMVTKERNEL = ../mips/gemv_t.c -DGEMVTKERNEL = ../mips/gemv_t.c -CGEMVTKERNEL = ../mips/zgemv_t.c -ZGEMVTKERNEL = ../mips/zgemv_t.c +SAMAXKERNEL = ../arm/amax.c +DAMAXKERNEL = ../arm/amax.c +CAMAXKERNEL = ../arm/zamax.c +ZAMAXKERNEL = ../arm/zamax.c + +SAMINKERNEL = ../arm/amin.c +DAMINKERNEL = ../arm/amin.c +CAMINKERNEL = ../arm/zamin.c +ZAMINKERNEL = ../arm/zamin.c + +SMAXKERNEL = ../arm/max.c +DMAXKERNEL = ../arm/max.c + +SMINKERNEL = ../arm/min.c +DMINKERNEL = ../arm/min.c + +ISAMAXKERNEL = ../arm/iamax.c +IDAMAXKERNEL = ../arm/iamax.c +ICAMAXKERNEL = ../arm/izamax.c +IZAMAXKERNEL = ../arm/izamax.c + +ISAMINKERNEL = ../arm/iamin.c +IDAMINKERNEL = ../arm/iamin.c +ICAMINKERNEL = ../arm/izamin.c +IZAMINKERNEL = ../arm/izamin.c + +ISMAXKERNEL = ../arm/imax.c +IDMAXKERNEL = ../arm/imax.c + +ISMINKERNEL = ../arm/imin.c +IDMINKERNEL = ../arm/imin.c + +SASUMKERNEL = ../arm/asum.c +DASUMKERNEL = ../arm/asum.c +CASUMKERNEL = ../arm/zasum.c +ZASUMKERNEL = ../arm/zasum.c + +SSUMKERNEL = ../arm/sum.c +DSUMKERNEL = ../arm/sum.c +CSUMKERNEL = ../arm/zsum.c +ZSUMKERNEL = ../arm/zsum.c + +SAXPYKERNEL = ../arm/axpy.c +DAXPYKERNEL = ../arm/axpy.c +CAXPYKERNEL = ../arm/zaxpy.c +ZAXPYKERNEL = ../arm/zaxpy.c + +SCOPYKERNEL = ../arm/copy.c +DCOPYKERNEL = ../arm/copy.c +CCOPYKERNEL = ../arm/zcopy.c +ZCOPYKERNEL = ../arm/zcopy.c + +SDOTKERNEL = ../arm/dot.c +DDOTKERNEL = ../arm/dot.c +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c + +SNRM2KERNEL = ../arm/nrm2.c +DNRM2KERNEL = ../arm/nrm2.c +CNRM2KERNEL = ../arm/znrm2.c +ZNRM2KERNEL = ../arm/znrm2.c + +SROTKERNEL = ../arm/rot.c +DROTKERNEL = ../arm/rot.c +CROTKERNEL = ../arm/zrot.c +ZROTKERNEL = ../arm/zrot.c + +SSCALKERNEL = ../arm/scal.c +DSCALKERNEL = ../arm/scal.c +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c + +SSWAPKERNEL = ../arm/swap.c +DSWAPKERNEL = ../arm/swap.c +CSWAPKERNEL = ../arm/zswap.c +ZSWAPKERNEL = ../arm/zswap.c + +SGEMVNKERNEL = ../arm/gemv_n.c +DGEMVNKERNEL = ../arm/gemv_n.c +CGEMVNKERNEL = ../arm/zgemv_n.c +ZGEMVNKERNEL = ../arm/zgemv_n.c + +SGEMVTKERNEL = ../arm/gemv_t.c +DGEMVTKERNEL = ../arm/gemv_t.c +CGEMVTKERNEL = ../arm/zgemv_t.c +ZGEMVTKERNEL = ../arm/zgemv_t.c SSYMV_U_KERNEL = ../generic/symv_k.c SSYMV_L_KERNEL = ../generic/symv_k.c diff --git a/kernel/mips/amax.c b/kernel/mips/amax.c deleted file mode 100644 index ad14081f5c..0000000000 --- a/kernel/mips/amax.c +++ /dev/null @@ -1,66 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" -#include - -#if defined(DOUBLE) - -#define ABS fabs - -#else - -#define ABS fabsf - -#endif - - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - BLASLONG ix=0; - FLOAT maxf=0.0; - - if (n <= 0 || inc_x <= 0) return(maxf); - - maxf=ABS(x[0]); - ix += inc_x; - i++; - - while(i < n) - { - if( ABS(x[ix]) > maxf ) - { - maxf = ABS(x[ix]); - } - ix += inc_x; - i++; - } - return(maxf); -} - - diff --git a/kernel/mips/amin.c b/kernel/mips/amin.c deleted file mode 100644 index 8079450ff5..0000000000 --- a/kernel/mips/amin.c +++ /dev/null @@ -1,66 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" -#include - -#if defined(DOUBLE) - -#define ABS fabs - -#else - -#define ABS fabsf - -#endif - - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - BLASLONG ix=0; - FLOAT minf=0.0; - - if (n <= 0 || inc_x <= 0) return(minf); - - minf=ABS(x[0]); - ix += inc_x; - i++; - - while(i < n) - { - if( ABS(x[ix]) < minf ) - { - minf = ABS(x[ix]); - } - ix += inc_x; - i++; - } - return(minf); -} - - diff --git a/kernel/mips/asum.c b/kernel/mips/asum.c deleted file mode 100644 index d221464de0..0000000000 --- a/kernel/mips/asum.c +++ /dev/null @@ -1,57 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" -#include - -#if defined(DOUBLE) - -#define ABS fabs - -#else - -#define ABS fabsf - -#endif - - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - FLOAT sumf = 0.0; - if (n <= 0 || inc_x <= 0) return(sumf); - - n *= inc_x; - while(i < n) - { - sumf += ABS(x[i]); - i += inc_x; - } - return(sumf); -} - - diff --git a/kernel/mips/axpby.c b/kernel/mips/axpby.c deleted file mode 100644 index af4fccde21..0000000000 --- a/kernel/mips/axpby.c +++ /dev/null @@ -1,95 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" - -int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) -{ - BLASLONG i=0; - BLASLONG ix,iy; - - if ( n < 0 ) return(0); - - ix = 0; - iy = 0; - - if ( beta == 0.0 ) - { - - if ( alpha == 0.0 ) - { - while(i < n) - { - y[iy] = 0.0 ; - iy += inc_y ; - i++ ; - } - } - else - { - while(i < n) - { - y[iy] = alpha * x[ix] ; - ix += inc_x ; - iy += inc_y ; - i++ ; - } - - - } - - } - else - { - - if ( alpha == 0.0 ) - { - while(i < n) - { - y[iy] = beta * y[iy] ; - iy += inc_y ; - i++ ; - } - } - else - { - while(i < n) - { - y[iy] = alpha * x[ix] + beta * y[iy] ; - ix += inc_x ; - iy += inc_y ; - i++ ; - } - } - - } - - return(0); - -} - - diff --git a/kernel/mips/axpy.c b/kernel/mips/axpy.c deleted file mode 100644 index 42f181ee13..0000000000 --- a/kernel/mips/axpy.c +++ /dev/null @@ -1,54 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" - -int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) -{ - BLASLONG i=0; - BLASLONG ix,iy; - - if ( n < 0 ) return(0); - if ( da == 0.0 ) return(0); - - ix = 0; - iy = 0; - - while(i < n) - { - - y[iy] += da * x[ix] ; - ix += inc_x ; - iy += inc_y ; - i++ ; - - } - return(0); - -} - - diff --git a/kernel/mips/copy.c b/kernel/mips/copy.c deleted file mode 100644 index 9f488ddb38..0000000000 --- a/kernel/mips/copy.c +++ /dev/null @@ -1,50 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" - -int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) -{ - BLASLONG i=0; - BLASLONG ix=0,iy=0; - - if ( n < 0 ) return(0); - - while(i < n) - { - - y[iy] = x[ix] ; - ix += inc_x ; - iy += inc_y ; - i++ ; - - } - return(0); - -} - - diff --git a/kernel/mips/dot.c b/kernel/mips/dot.c deleted file mode 100644 index 89c9f80f6b..0000000000 --- a/kernel/mips/dot.c +++ /dev/null @@ -1,58 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" - -#if defined(DSDOT) -double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) -#else -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) -#endif -{ - BLASLONG i=0; - BLASLONG ix=0,iy=0; - double dot = 0.0 ; - - if ( n < 0 ) return(dot); - - while(i < n) - { -#if defined(DSDOT) - dot += (double)y[iy] * (double)x[ix] ; -#else - dot += y[iy] * x[ix]; -#endif - ix += inc_x ; - iy += inc_y ; - i++ ; - - } - return(dot); - -} - - diff --git a/kernel/mips/gemv_n.c b/kernel/mips/gemv_n.c deleted file mode 100644 index 4cc1772097..0000000000 --- a/kernel/mips/gemv_n.c +++ /dev/null @@ -1,56 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" - -int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) -{ - BLASLONG i; - BLASLONG ix,iy; - BLASLONG j; - FLOAT *a_ptr; - FLOAT temp; - - ix = 0; - a_ptr = a; - - for (j=0; j - -#if defined(DOUBLE) - -#define ABS fabs - -#else - -#define ABS fabsf - -#endif - - -BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - BLASLONG ix=0; - FLOAT maxf=0.0; - BLASLONG max=0; - - if (n <= 0 || inc_x <= 0) return(max); - - maxf=ABS(x[0]); - ix += inc_x; - i++; - - while(i < n) - { - if( ABS(x[ix]) > maxf ) - { - max = i; - maxf = ABS(x[ix]); - } - ix += inc_x; - i++; - } - return(max+1); -} - - diff --git a/kernel/mips/iamin.c b/kernel/mips/iamin.c deleted file mode 100644 index 7f1c4d9057..0000000000 --- a/kernel/mips/iamin.c +++ /dev/null @@ -1,68 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" -#include - -#if defined(DOUBLE) - -#define ABS fabs - -#else - -#define ABS fabsf - -#endif - - -BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - BLASLONG ix=0; - FLOAT minf=0.0; - BLASLONG min=0; - - if (n <= 0 || inc_x <= 0) return(min); - - minf=ABS(x[0]); - ix += inc_x; - i++; - - while(i < n) - { - if( ABS(x[ix]) < ABS(minf) ) - { - min = i; - minf = ABS(x[ix]); - } - ix += inc_x; - i++; - } - return(min+1); -} - - diff --git a/kernel/mips/imax.c b/kernel/mips/imax.c deleted file mode 100644 index 744bfc0d9b..0000000000 --- a/kernel/mips/imax.c +++ /dev/null @@ -1,59 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" -#include - - - -BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - BLASLONG ix=0; - FLOAT maxf=0.0; - BLASLONG max=0; - - if (n <= 0 || inc_x <= 0) return(max); - - maxf=x[0]; - ix += inc_x; - i++; - - while(i < n) - { - if( x[ix] > maxf ) - { - max = i; - maxf = x[ix]; - } - ix += inc_x; - i++; - } - return(max+1); -} - - diff --git a/kernel/mips/imin.c b/kernel/mips/imin.c deleted file mode 100644 index bf130613bf..0000000000 --- a/kernel/mips/imin.c +++ /dev/null @@ -1,59 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" -#include - - - -BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - BLASLONG ix=0; - FLOAT minf=0.0; - BLASLONG min=0; - - if (n <= 0 || inc_x <= 0) return(min); - - minf=x[0]; - ix += inc_x; - i++; - - while(i < n) - { - if( x[ix] < minf ) - { - min = i; - minf = x[ix]; - } - ix += inc_x; - i++; - } - return(min+1); -} - - diff --git a/kernel/mips/izamax.c b/kernel/mips/izamax.c deleted file mode 100644 index 708ee921d3..0000000000 --- a/kernel/mips/izamax.c +++ /dev/null @@ -1,72 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" -#include - -#if defined(DOUBLE) - -#define ABS fabs - -#else - -#define ABS fabsf - -#endif - -#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) - -BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - BLASLONG ix=0; - FLOAT maxf; - BLASLONG max=0; - BLASLONG inc_x2; - - if (n <= 0 || inc_x <= 0) return(max); - - inc_x2 = 2 * inc_x; - - maxf = CABS1(x,0); - ix += inc_x2; - i++; - - while(i < n) - { - if( CABS1(x,ix) > maxf ) - { - max = i; - maxf = CABS1(x,ix); - } - ix += inc_x2; - i++; - } - return(max+1); -} - - diff --git a/kernel/mips/izamin.c b/kernel/mips/izamin.c deleted file mode 100644 index 523605ef49..0000000000 --- a/kernel/mips/izamin.c +++ /dev/null @@ -1,72 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" -#include - -#if defined(DOUBLE) - -#define ABS fabs - -#else - -#define ABS fabsf - -#endif - -#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) - -BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - BLASLONG ix=0; - FLOAT minf; - BLASLONG min=0; - BLASLONG inc_x2; - - if (n <= 0 || inc_x <= 0) return(min); - - inc_x2 = 2 * inc_x; - - minf = CABS1(x,0); - ix += inc_x2; - i++; - - while(i < n) - { - if( CABS1(x,ix) < minf ) - { - min = i; - minf = CABS1(x,ix); - } - ix += inc_x2; - i++; - } - return(min+1); -} - - diff --git a/kernel/mips/max.c b/kernel/mips/max.c deleted file mode 100644 index 2ad956bc01..0000000000 --- a/kernel/mips/max.c +++ /dev/null @@ -1,65 +0,0 @@ -/*************************************************************************** -Copyright (c) 2013, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -/************************************************************************************** -* 2013/09/14 Saar -* BLASTEST float : NoTest -* BLASTEST double : NoTest -* CTEST : NoTest -* TEST : NoTest -* -**************************************************************************************/ - -#include "common.h" -#include - - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - BLASLONG ix=0; - FLOAT maxf=0.0; - - if (n <= 0 || inc_x <= 0) return(maxf); - - maxf=x[0]; - ix += inc_x; - i++; - - while(i < n) - { - if( x[ix] > maxf ) - { - maxf = x[ix]; - } - ix += inc_x; - i++; - } - return(maxf); -} - - diff --git a/kernel/mips/min.c b/kernel/mips/min.c deleted file mode 100644 index 2812fe3978..0000000000 --- a/kernel/mips/min.c +++ /dev/null @@ -1,65 +0,0 @@ -/*************************************************************************** -Copyright (c) 2013, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -/************************************************************************************** -* 2013/09/14 Saar -* BLASTEST float : NoTest -* BLASTEST double : NoTest -* CTEST : NoTest -* TEST : NoTest -* -**************************************************************************************/ - -#include "common.h" -#include - - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - BLASLONG ix=0; - FLOAT minf=0.0; - - if (n <= 0 || inc_x <= 0) return(minf); - - minf=x[0]; - ix += inc_x; - i++; - - while(i < n) - { - if( x[ix] < minf ) - { - minf = x[ix]; - } - ix += inc_x; - i++; - } - return(minf); -} - - diff --git a/kernel/mips/nrm2.c b/kernel/mips/nrm2.c deleted file mode 100644 index 0b6323f294..0000000000 --- a/kernel/mips/nrm2.c +++ /dev/null @@ -1,88 +0,0 @@ -/*************************************************************************** -Copyright (c) 2013, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -/************************************************************************************** -* 2013/09/13 Saar -* BLASTEST float : OK -* BLASTEST double : OK -* CTEST : OK -* TEST : OK -* -**************************************************************************************/ - -#include "common.h" -#include - -#if defined(DOUBLE) - -#define ABS fabs - -#else - -#define ABS fabsf - -#endif - - - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - FLOAT scale = 0.0; - FLOAT ssq = 1.0; - FLOAT absxi = 0.0; - - - if (n <= 0 || inc_x == 0) return(0.0); - if ( n == 1 ) return( ABS(x[0]) ); - - n *= inc_x; - while(abs(i) < abs(n)) - { - - if ( x[i] != 0.0 ) - { - absxi = ABS( x[i] ); - if ( scale < absxi ) - { - ssq = 1 + ssq * ( scale / absxi ) * ( scale / absxi ); - scale = absxi ; - } - else - { - ssq += ( absxi/scale ) * ( absxi/scale ); - } - - } - i += inc_x; - } - scale = scale * sqrt( ssq ); - return(scale); - -} - - diff --git a/kernel/mips/omatcopy_cn.c b/kernel/mips/omatcopy_cn.c deleted file mode 100644 index 11357ec933..0000000000 --- a/kernel/mips/omatcopy_cn.c +++ /dev/null @@ -1,82 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" - -int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) -{ - BLASLONG i,j; - FLOAT *aptr,*bptr; - - if ( rows <= 0 ) return(0); - if ( cols <= 0 ) return(0); - - aptr = a; - bptr = b; - - if ( alpha == 0.0 ) - { - for ( i=0; i - - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - FLOAT sumf = 0.0; - if (n <= 0 || inc_x <= 0) return(sumf); - - n *= inc_x; - while(i < n) - { - sumf += x[i]; - i += inc_x; - } - return(sumf); -} - - diff --git a/kernel/mips/swap.c b/kernel/mips/swap.c deleted file mode 100644 index 23f7a35802..0000000000 --- a/kernel/mips/swap.c +++ /dev/null @@ -1,55 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" -#include - -int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) -{ - BLASLONG i=0; - BLASLONG ix=0,iy=0; - FLOAT temp; - - if ( n < 0 ) return(0); - - while(i < n) - { - - temp = x[ix] ; - x[ix] = y[iy] ; - y[iy] = temp ; - - ix += inc_x ; - iy += inc_y ; - i++ ; - - } - return(0); - -} - - diff --git a/kernel/mips/symv_L.c b/kernel/mips/symv_L.c deleted file mode 100644 index 6a83d73f9d..0000000000 --- a/kernel/mips/symv_L.c +++ /dev/null @@ -1,70 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - - -#include "common.h" - -int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) -{ - BLASLONG i; - BLASLONG ix,iy; - BLASLONG jx,jy; - BLASLONG j; - FLOAT temp1; - FLOAT temp2; - -#if 0 - if ( m != offset ) - printf("Symv_L: m=%d offset=%d\n",m,offset); -#endif - - jx = 0; - jy = 0; - - for (j=0; j - -#if defined(DOUBLE) - -#define ABS fabs - -#else - -#define ABS fabsf - -#endif - -#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - BLASLONG ix=0; - FLOAT maxf; - BLASLONG inc_x2; - - if (n <= 0 || inc_x <= 0) return(0.0); - - inc_x2 = 2 * inc_x; - - maxf = CABS1(x,0); - ix += inc_x2; - i++; - - while(i < n) - { - if( CABS1(x,ix) > maxf ) - { - maxf = CABS1(x,ix); - } - ix += inc_x2; - i++; - } - return(maxf); -} - - diff --git a/kernel/mips/zamin.c b/kernel/mips/zamin.c deleted file mode 100644 index 97c07da818..0000000000 --- a/kernel/mips/zamin.c +++ /dev/null @@ -1,70 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" -#include - -#if defined(DOUBLE) - -#define ABS fabs - -#else - -#define ABS fabsf - -#endif - -#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - BLASLONG ix=0; - FLOAT minf; - BLASLONG inc_x2; - - if (n <= 0 || inc_x <= 0) return(0.0); - - inc_x2 = 2 * inc_x; - - minf = CABS1(x,0); - ix += inc_x2; - i++; - - while(i < n) - { - if( CABS1(x,ix) < minf ) - { - minf = CABS1(x,ix); - } - ix += inc_x2; - i++; - } - return(minf); -} - - diff --git a/kernel/mips/zasum.c b/kernel/mips/zasum.c deleted file mode 100644 index 77a2ed6855..0000000000 --- a/kernel/mips/zasum.c +++ /dev/null @@ -1,62 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" -#include - -#if defined(DOUBLE) - -#define ABS fabs - -#else - -#define ABS fabsf - -#endif - -#define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - FLOAT sumf = 0.0; - BLASLONG inc_x2; - - if (n <= 0 || inc_x <= 0) return(sumf); - - inc_x2 = 2 * inc_x; - - n *= inc_x2; - while(i < n) - { - sumf += CABS1(x,i); - i += inc_x2; - } - return(sumf); -} - - diff --git a/kernel/mips/zaxpby.c b/kernel/mips/zaxpby.c deleted file mode 100644 index 97452e942e..0000000000 --- a/kernel/mips/zaxpby.c +++ /dev/null @@ -1,113 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" - -int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i,FLOAT *y, BLASLONG inc_y) -{ - BLASLONG i=0; - BLASLONG ix,iy; - FLOAT temp; - BLASLONG inc_x2, inc_y2; - - if ( n <= 0 ) return(0); - - ix = 0; - iy = 0; - - inc_x2 = 2 * inc_x; - inc_y2 = 2 * inc_y; - - if ( beta_r == 0.0 && beta_i == 0.0) - { - if ( alpha_r == 0.0 && alpha_i == 0.0 ) - { - - while(i < n) - { - y[iy] = 0.0 ; - y[iy+1] = 0.0 ; - iy += inc_y2 ; - i++ ; - } - - } - else - { - - while(i < n) - { - y[iy] = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) ; - y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) ; - ix += inc_x2 ; - iy += inc_y2 ; - i++ ; - } - - - } - - } - else - { - if ( alpha_r == 0.0 && alpha_i == 0.0 ) - { - - while(i < n) - { - temp = ( beta_r * y[iy] - beta_i * y[iy+1] ) ; - y[iy+1] = ( beta_r * y[iy+1] + beta_i * y[iy] ) ; - y[iy] = temp; - iy += inc_y2 ; - i++ ; - } - - } - else - { - - while(i < n) - { - temp = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) + ( beta_r * y[iy] - beta_i * y[iy+1] ) ; - y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) + ( beta_r * y[iy+1] + beta_i * y[iy] ) ; - y[iy] = temp; - ix += inc_x2 ; - iy += inc_y2 ; - i++ ; - } - - - } - - - - } - return(0); - -} - - diff --git a/kernel/mips/zaxpy.c b/kernel/mips/zaxpy.c deleted file mode 100644 index f0fbab4a26..0000000000 --- a/kernel/mips/zaxpy.c +++ /dev/null @@ -1,64 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" - -int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) -{ - BLASLONG i=0; - BLASLONG ix,iy; - BLASLONG inc_x2; - BLASLONG inc_y2; - - if ( n < 0 ) return(0); - if ( da_r == 0.0 && da_i == 0.0 ) return(0); - - ix = 0; - iy = 0; - - inc_x2 = 2 * inc_x; - inc_y2 = 2 * inc_y; - - while(i < n) - { -#if !defined(CONJ) - y[iy] += ( da_r * x[ix] - da_i * x[ix+1] ) ; - y[iy+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ; -#else - y[iy] += ( da_r * x[ix] + da_i * x[ix+1] ) ; - y[iy+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ; -#endif - ix += inc_x2 ; - iy += inc_y2 ; - i++ ; - - } - return(0); - -} - - diff --git a/kernel/mips/zcopy.c b/kernel/mips/zcopy.c deleted file mode 100644 index 6bb6e33b62..0000000000 --- a/kernel/mips/zcopy.c +++ /dev/null @@ -1,56 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" - -int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) -{ - BLASLONG i=0; - BLASLONG ix=0,iy=0; - BLASLONG inc_x2; - BLASLONG inc_y2; - - if ( n < 0 ) return(0); - - inc_x2 = 2 * inc_x; - inc_y2 = 2 * inc_y; - - while(i < n) - { - - y[iy] = x[ix] ; - y[iy+1] = x[ix+1] ; - ix += inc_x2; - iy += inc_y2; - i++ ; - - } - return(0); - -} - - diff --git a/kernel/mips/zdot.c b/kernel/mips/zdot.c deleted file mode 100644 index df99bae852..0000000000 --- a/kernel/mips/zdot.c +++ /dev/null @@ -1,70 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" - -OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) -{ - BLASLONG i=0; - BLASLONG ix=0,iy=0; - FLOAT dot[2]; - OPENBLAS_COMPLEX_FLOAT result; - BLASLONG inc_x2; - BLASLONG inc_y2; - - dot[0]=0.0; - dot[1]=0.0; - - CREAL(result) = 0.0 ; - CIMAG(result) = 0.0 ; - - if ( n < 1 ) return(result); - - inc_x2 = 2 * inc_x ; - inc_y2 = 2 * inc_y ; - - while(i < n) - { -#if !defined(CONJ) - dot[0] += ( x[ix] * y[iy] - x[ix+1] * y[iy+1] ) ; - dot[1] += ( x[ix+1] * y[iy] + x[ix] * y[iy+1] ) ; -#else - dot[0] += ( x[ix] * y[iy] + x[ix+1] * y[iy+1] ) ; - dot[1] -= ( x[ix+1] * y[iy] - x[ix] * y[iy+1] ) ; -#endif - ix += inc_x2 ; - iy += inc_y2 ; - i++ ; - - } - CREAL(result) = dot[0]; - CIMAG(result) = dot[1]; - return(result); - -} - - diff --git a/kernel/mips/zgemv_n.c b/kernel/mips/zgemv_n.c deleted file mode 100644 index 9bf1f6b429..0000000000 --- a/kernel/mips/zgemv_n.c +++ /dev/null @@ -1,147 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" - -int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) -{ - BLASLONG i; - BLASLONG ix,iy; - BLASLONG j; - FLOAT *a_ptr; - FLOAT temp_r,temp_i; - BLASLONG inc_x2,inc_y2; - BLASLONG lda2; - BLASLONG i2; - - lda2 = 2*lda; - - ix = 0; - a_ptr = a; - - if ( inc_x == 1 && inc_y == 1 ) - { - - for (j=0; j - -#if defined(DOUBLE) - -#define ABS fabs - -#else - -#define ABS fabsf - -#endif - - - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - FLOAT scale = 0.0; - FLOAT ssq = 1.0; - BLASLONG inc_x2; - FLOAT temp; - - if (n <= 0 || inc_x == 0) return(0.0); - - inc_x2 = 2 * inc_x; - - n *= inc_x2; - while(abs(i) < abs(n)) - { - - if ( x[i] != 0.0 ) - { - temp = ABS( x[i] ); - if ( scale < temp ) - { - ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); - scale = temp ; - } - else - { - ssq += ( temp / scale ) * ( temp / scale ); - } - - } - - if ( x[i+1] != 0.0 ) - { - temp = ABS( x[i+1] ); - if ( scale < temp ) - { - ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); - scale = temp ; - } - else - { - ssq += ( temp / scale ) * ( temp / scale ); - } - - } - - - i += inc_x2; - } - scale = scale * sqrt( ssq ); - return(scale); - -} - - diff --git a/kernel/mips/zomatcopy_cn.c b/kernel/mips/zomatcopy_cn.c deleted file mode 100644 index bf6d3c70da..0000000000 --- a/kernel/mips/zomatcopy_cn.c +++ /dev/null @@ -1,62 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" - -int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) -{ - BLASLONG i,j,ia; - FLOAT *aptr,*bptr; - - if ( rows <= 0 ) return(0); - if ( cols <= 0 ) return(0); - - aptr = a; - bptr = b; - - lda *= 2; - ldb *= 2; - - for ( i=0; i - -#define CSUM1(x,i) x[i]+x[i+1] - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) -{ - BLASLONG i=0; - FLOAT sumf = 0.0; - BLASLONG inc_x2; - - if (n <= 0 || inc_x <= 0) return(sumf); - - inc_x2 = 2 * inc_x; - - n *= inc_x2; - while(i < n) - { - sumf += CSUM1(x,i); - i += inc_x2; - } - return(sumf); -} - - diff --git a/kernel/mips/zswap.c b/kernel/mips/zswap.c deleted file mode 100644 index abc3c9cadf..0000000000 --- a/kernel/mips/zswap.c +++ /dev/null @@ -1,63 +0,0 @@ -/*************************************************************************** -Copyright (c) 2016, The OpenBLAS Project -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. -3. Neither the name of the OpenBLAS project nor the names of -its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*****************************************************************************/ - -#include "common.h" -#include - -int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) -{ - BLASLONG i=0; - BLASLONG ix=0,iy=0; - FLOAT temp[2]; - BLASLONG inc_x2; - BLASLONG inc_y2; - - if ( n < 0 ) return(0); - - inc_x2 = 2 * inc_x; - inc_y2 = 2 * inc_y; - - while(i < n) - { - - temp[0] = x[ix] ; - temp[1] = x[ix+1] ; - x[ix] = y[iy] ; - x[ix+1] = y[iy+1] ; - y[iy] = temp[0] ; - y[iy+1] = temp[1] ; - - ix += inc_x2 ; - iy += inc_y2 ; - i++ ; - - } - return(0); - -} - - diff --git a/kernel/mips64/KERNEL b/kernel/mips64/KERNEL index d720aaff5f..b24fcdb4c9 100644 --- a/kernel/mips64/KERNEL +++ b/kernel/mips64/KERNEL @@ -1,16 +1,16 @@ -CAXPYKERNEL = ../mips/zaxpy.c -ZAXPYKERNEL = ../mips/zaxpy.c -SROTKERNEL = ../mips/rot.c -DROTKERNEL = ../mips/rot.c -CROTKERNEL = ../mips/zrot.c -ZROTKERNEL = ../mips/zrot.c -CSWAPKERNEL = ../mips/zswap.c -ZSWAPKERNEL = ../mips/zswap.c - -CSCALKERNEL = ../mips/zscal.c -ZSCALKERNEL = ../mips/zscal.c - - +CAXPYKERNEL = ../arm/zaxpy.c +ZAXPYKERNEL = ../arm/zaxpy.c +SROTKERNEL = ../arm/rot.c +DROTKERNEL = ../arm/rot.c +CROTKERNEL = ../arm/zrot.c +ZROTKERNEL = ../arm/zrot.c +CSWAPKERNEL = ../arm/zswap.c +ZSWAPKERNEL = ../arm/zswap.c + +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c + + ifndef SNRM2KERNEL SNRM2KERNEL = snrm2.S endif diff --git a/kernel/mips64/KERNEL.LOONGSON3R3 b/kernel/mips64/KERNEL.LOONGSON3R3 index 904828d573..e574872f2c 100644 --- a/kernel/mips64/KERNEL.LOONGSON3R3 +++ b/kernel/mips64/KERNEL.LOONGSON3R3 @@ -63,4 +63,4 @@ ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c -DSDOTKERNEL = ../mips/dot.c +DSDOTKERNEL = ../arm/dot.c diff --git a/kernel/mips64/KERNEL.LOONGSON3R4 b/kernel/mips64/KERNEL.LOONGSON3R4 index 1149d97f12..18ce5d203b 100644 --- a/kernel/mips64/KERNEL.LOONGSON3R4 +++ b/kernel/mips64/KERNEL.LOONGSON3R4 @@ -21,7 +21,7 @@ DDOTKERNEL = ../mips/ddot_msa.c CDOTKERNEL = ../mips/cdot_msa.c ZDOTKERNEL = ../mips/zdot_msa.c endif -DSDOTKERNEL = ../mips/dot.c +DSDOTKERNEL = ../arm/dot.c ifndef NO_MSA SROTKERNEL = ../mips/srot_msa.c diff --git a/kernel/mips64/KERNEL.MIPS64_GENERIC b/kernel/mips64/KERNEL.MIPS64_GENERIC index 33bcbeedd5..8700060100 100644 --- a/kernel/mips64/KERNEL.MIPS64_GENERIC +++ b/kernel/mips64/KERNEL.MIPS64_GENERIC @@ -53,92 +53,92 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c #Pure C for other kernels -SAMAXKERNEL = ../mips/amax.c -DAMAXKERNEL = ../mips/amax.c -CAMAXKERNEL = ../mips/zamax.c -ZAMAXKERNEL = ../mips/zamax.c - -SAMINKERNEL = ../mips/amin.c -DAMINKERNEL = ../mips/amin.c -CAMINKERNEL = ../mips/zamin.c -ZAMINKERNEL = ../mips/zamin.c - -SMAXKERNEL = ../mips/max.c -DMAXKERNEL = ../mips/max.c - -SMINKERNEL = ../mips/min.c -DMINKERNEL = ../mips/min.c - -ISAMAXKERNEL = ../mips/iamax.c -IDAMAXKERNEL = ../mips/iamax.c -ICAMAXKERNEL = ../mips/izamax.c -IZAMAXKERNEL = ../mips/izamax.c - -ISAMINKERNEL = ../mips/iamin.c -IDAMINKERNEL = ../mips/iamin.c -ICAMINKERNEL = ../mips/izamin.c -IZAMINKERNEL = ../mips/izamin.c - -ISMAXKERNEL = ../mips/imax.c -IDMAXKERNEL = ../mips/imax.c - -ISMINKERNEL = ../mips/imin.c -IDMINKERNEL = ../mips/imin.c - -SASUMKERNEL = ../mips/asum.c -DASUMKERNEL = ../mips/asum.c -CASUMKERNEL = ../mips/zasum.c -ZASUMKERNEL = ../mips/zasum.c - -SSUMKERNEL = ../mips/sum.c -DSUMKERNEL = ../mips/sum.c -CSUMKERNEL = ../mips/zsum.c -ZSUMKERNEL = ../mips/zsum.c - -SAXPYKERNEL = ../mips/axpy.c -DAXPYKERNEL = ../mips/axpy.c -CAXPYKERNEL = ../mips/zaxpy.c -ZAXPYKERNEL = ../mips/zaxpy.c - -SCOPYKERNEL = ../mips/copy.c -DCOPYKERNEL = ../mips/copy.c -CCOPYKERNEL = ../mips/zcopy.c -ZCOPYKERNEL = ../mips/zcopy.c - -SDOTKERNEL = ../mips/dot.c -DDOTKERNEL = ../mips/dot.c -CDOTKERNEL = ../mips/zdot.c -ZDOTKERNEL = ../mips/zdot.c - -SNRM2KERNEL = ../mips/nrm2.c -DNRM2KERNEL = ../mips/nrm2.c -CNRM2KERNEL = ../mips/znrm2.c -ZNRM2KERNEL = ../mips/znrm2.c - -SROTKERNEL = ../mips/rot.c -DROTKERNEL = ../mips/rot.c -CROTKERNEL = ../mips/zrot.c -ZROTKERNEL = ../mips/zrot.c - -SSCALKERNEL = ../mips/scal.c -DSCALKERNEL = ../mips/scal.c -CSCALKERNEL = ../mips/zscal.c -ZSCALKERNEL = ../mips/zscal.c - -SSWAPKERNEL = ../mips/swap.c -DSWAPKERNEL = ../mips/swap.c -CSWAPKERNEL = ../mips/zswap.c -ZSWAPKERNEL = ../mips/zswap.c - -SGEMVNKERNEL = ../mips/gemv_n.c -DGEMVNKERNEL = ../mips/gemv_n.c -CGEMVNKERNEL = ../mips/zgemv_n.c -ZGEMVNKERNEL = ../mips/zgemv_n.c - -SGEMVTKERNEL = ../mips/gemv_t.c -DGEMVTKERNEL = ../mips/gemv_t.c -CGEMVTKERNEL = ../mips/zgemv_t.c -ZGEMVTKERNEL = ../mips/zgemv_t.c +SAMAXKERNEL = ../arm/amax.c +DAMAXKERNEL = ../arm/amax.c +CAMAXKERNEL = ../arm/zamax.c +ZAMAXKERNEL = ../arm/zamax.c + +SAMINKERNEL = ../arm/amin.c +DAMINKERNEL = ../arm/amin.c +CAMINKERNEL = ../arm/zamin.c +ZAMINKERNEL = ../arm/zamin.c + +SMAXKERNEL = ../arm/max.c +DMAXKERNEL = ../arm/max.c + +SMINKERNEL = ../arm/min.c +DMINKERNEL = ../arm/min.c + +ISAMAXKERNEL = ../arm/iamax.c +IDAMAXKERNEL = ../arm/iamax.c +ICAMAXKERNEL = ../arm/izamax.c +IZAMAXKERNEL = ../arm/izamax.c + +ISAMINKERNEL = ../arm/iamin.c +IDAMINKERNEL = ../arm/iamin.c +ICAMINKERNEL = ../arm/izamin.c +IZAMINKERNEL = ../arm/izamin.c + +ISMAXKERNEL = ../arm/imax.c +IDMAXKERNEL = ../arm/imax.c + +ISMINKERNEL = ../arm/imin.c +IDMINKERNEL = ../arm/imin.c + +SASUMKERNEL = ../arm/asum.c +DASUMKERNEL = ../arm/asum.c +CASUMKERNEL = ../arm/zasum.c +ZASUMKERNEL = ../arm/zasum.c + +SSUMKERNEL = ../arm/sum.c +DSUMKERNEL = ../arm/sum.c +CSUMKERNEL = ../arm/zsum.c +ZSUMKERNEL = ../arm/zsum.c + +SAXPYKERNEL = ../arm/axpy.c +DAXPYKERNEL = ../arm/axpy.c +CAXPYKERNEL = ../arm/zaxpy.c +ZAXPYKERNEL = ../arm/zaxpy.c + +SCOPYKERNEL = ../arm/copy.c +DCOPYKERNEL = ../arm/copy.c +CCOPYKERNEL = ../arm/zcopy.c +ZCOPYKERNEL = ../arm/zcopy.c + +SDOTKERNEL = ../arm/dot.c +DDOTKERNEL = ../arm/dot.c +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c + +SNRM2KERNEL = ../arm/nrm2.c +DNRM2KERNEL = ../arm/nrm2.c +CNRM2KERNEL = ../arm/znrm2.c +ZNRM2KERNEL = ../arm/znrm2.c + +SROTKERNEL = ../arm/rot.c +DROTKERNEL = ../arm/rot.c +CROTKERNEL = ../arm/zrot.c +ZROTKERNEL = ../arm/zrot.c + +SSCALKERNEL = ../arm/scal.c +DSCALKERNEL = ../arm/scal.c +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c + +SSWAPKERNEL = ../arm/swap.c +DSWAPKERNEL = ../arm/swap.c +CSWAPKERNEL = ../arm/zswap.c +ZSWAPKERNEL = ../arm/zswap.c + +SGEMVNKERNEL = ../arm/gemv_n.c +DGEMVNKERNEL = ../arm/gemv_n.c +CGEMVNKERNEL = ../arm/zgemv_n.c +ZGEMVNKERNEL = ../arm/zgemv_n.c + +SGEMVTKERNEL = ../arm/gemv_t.c +DGEMVTKERNEL = ../arm/gemv_t.c +CGEMVTKERNEL = ../arm/zgemv_t.c +ZGEMVTKERNEL = ../arm/zgemv_t.c SSYMV_U_KERNEL = ../generic/symv_k.c SSYMV_L_KERNEL = ../generic/symv_k.c diff --git a/kernel/mips64/KERNEL.generic b/kernel/mips64/KERNEL.generic index 1f03c65942..f246811c34 100644 --- a/kernel/mips64/KERNEL.generic +++ b/kernel/mips64/KERNEL.generic @@ -53,92 +53,92 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c #Pure C for other kernels -SAMAXKERNEL = ../mips/amax.c -DAMAXKERNEL = ../mips/amax.c -CAMAXKERNEL = ../mips/zamax.c -ZAMAXKERNEL = ../mips/zamax.c - -SAMINKERNEL = ../mips/amin.c -DAMINKERNEL = ../mips/amin.c -CAMINKERNEL = ../mips/zamin.c -ZAMINKERNEL = ../mips/zamin.c - -SMAXKERNEL = ../mips/max.c -DMAXKERNEL = ../mips/max.c - -SMINKERNEL = ../mips/min.c -DMINKERNEL = ../mips/min.c - -ISAMAXKERNEL = ../mips/iamax.c -IDAMAXKERNEL = ../mips/iamax.c -ICAMAXKERNEL = ../mips/izamax.c -IZAMAXKERNEL = ../mips/izamax.c - -ISAMINKERNEL = ../mips/iamin.c -IDAMINKERNEL = ../mips/iamin.c -ICAMINKERNEL = ../mips/izamin.c -IZAMINKERNEL = ../mips/izamin.c - -ISMAXKERNEL = ../mips/imax.c -IDMAXKERNEL = ../mips/imax.c - -ISMINKERNEL = ../mips/imin.c -IDMINKERNEL = ../mips/imin.c - -SASUMKERNEL = ../mips/asum.c -DASUMKERNEL = ../mips/asum.c -CASUMKERNEL = ../mips/zasum.c -ZASUMKERNEL = ../mips/zasum.c - -SSUMKERNEL = ../mips/sum.c -DSUMKERNEL = ../mips/sum.c -CSUMKERNEL = ../mips/zsum.c -ZSUMKERNEL = ../mips/zsum.c - -SAXPYKERNEL = ../mips/axpy.c -DAXPYKERNEL = ../mips/axpy.c -CAXPYKERNEL = ../mips/zaxpy.c -ZAXPYKERNEL = ../mips/zaxpy.c - -SCOPYKERNEL = ../mips/copy.c -DCOPYKERNEL = ../mips/copy.c -CCOPYKERNEL = ../mips/zcopy.c -ZCOPYKERNEL = ../mips/zcopy.c - -SDOTKERNEL = ../mips/dot.c -DDOTKERNEL = ../mips/dot.c -CDOTKERNEL = ../mips/zdot.c -ZDOTKERNEL = ../mips/zdot.c - -SNRM2KERNEL = ../mips/nrm2.c -DNRM2KERNEL = ../mips/nrm2.c -CNRM2KERNEL = ../mips/znrm2.c -ZNRM2KERNEL = ../mips/znrm2.c - -SROTKERNEL = ../mips/rot.c -DROTKERNEL = ../mips/rot.c -CROTKERNEL = ../mips/zrot.c -ZROTKERNEL = ../mips/zrot.c - -SSCALKERNEL = ../mips/scal.c -DSCALKERNEL = ../mips/scal.c -CSCALKERNEL = ../mips/zscal.c -ZSCALKERNEL = ../mips/zscal.c - -SSWAPKERNEL = ../mips/swap.c -DSWAPKERNEL = ../mips/swap.c -CSWAPKERNEL = ../mips/zswap.c -ZSWAPKERNEL = ../mips/zswap.c - -SGEMVNKERNEL = ../mips/gemv_n.c -DGEMVNKERNEL = ../mips/gemv_n.c -CGEMVNKERNEL = ../mips/zgemv_n.c -ZGEMVNKERNEL = ../mips/zgemv_n.c - -SGEMVTKERNEL = ../mips/gemv_t.c -DGEMVTKERNEL = ../mips/gemv_t.c -CGEMVTKERNEL = ../mips/zgemv_t.c -ZGEMVTKERNEL = ../mips/zgemv_t.c +SAMAXKERNEL = ../arm/amax.c +DAMAXKERNEL = ../arm/amax.c +CAMAXKERNEL = ../arm/zamax.c +ZAMAXKERNEL = ../arm/zamax.c + +SAMINKERNEL = ../arm/amin.c +DAMINKERNEL = ../arm/amin.c +CAMINKERNEL = ../arm/zamin.c +ZAMINKERNEL = ../arm/zamin.c + +SMAXKERNEL = ../arm/max.c +DMAXKERNEL = ../arm/max.c + +SMINKERNEL = ../arm/min.c +DMINKERNEL = ../arm/min.c + +ISAMAXKERNEL = ../arm/iamax.c +IDAMAXKERNEL = ../arm/iamax.c +ICAMAXKERNEL = ../arm/izamax.c +IZAMAXKERNEL = ../arm/izamax.c + +ISAMINKERNEL = ../arm/iamin.c +IDAMINKERNEL = ../arm/iamin.c +ICAMINKERNEL = ../arm/izamin.c +IZAMINKERNEL = ../arm/izamin.c + +ISMAXKERNEL = ../arm/imax.c +IDMAXKERNEL = ../arm/imax.c + +ISMINKERNEL = ../arm/imin.c +IDMINKERNEL = ../arm/imin.c + +SASUMKERNEL = ../arm/asum.c +DASUMKERNEL = ../arm/asum.c +CASUMKERNEL = ../arm/zasum.c +ZASUMKERNEL = ../arm/zasum.c + +SSUMKERNEL = ../arm/sum.c +DSUMKERNEL = ../arm/sum.c +CSUMKERNEL = ../arm/zsum.c +ZSUMKERNEL = ../arm/zsum.c + +SAXPYKERNEL = ../arm/axpy.c +DAXPYKERNEL = ../arm/axpy.c +CAXPYKERNEL = ../arm/zaxpy.c +ZAXPYKERNEL = ../arm/zaxpy.c + +SCOPYKERNEL = ../arm/copy.c +DCOPYKERNEL = ../arm/copy.c +CCOPYKERNEL = ../arm/zcopy.c +ZCOPYKERNEL = ../arm/zcopy.c + +SDOTKERNEL = ../arm/dot.c +DDOTKERNEL = ../arm/dot.c +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c + +SNRM2KERNEL = ../arm/nrm2.c +DNRM2KERNEL = ../arm/nrm2.c +CNRM2KERNEL = ../arm/znrm2.c +ZNRM2KERNEL = ../arm/znrm2.c + +SROTKERNEL = ../arm/rot.c +DROTKERNEL = ../arm/rot.c +CROTKERNEL = ../arm/zrot.c +ZROTKERNEL = ../arm/zrot.c + +SSCALKERNEL = ../arm/scal.c +DSCALKERNEL = ../arm/scal.c +CSCALKERNEL = ../arm/zscal.c +ZSCALKERNEL = ../arm/zscal.c + +SSWAPKERNEL = ../arm/swap.c +DSWAPKERNEL = ../arm/swap.c +CSWAPKERNEL = ../arm/zswap.c +ZSWAPKERNEL = ../arm/zswap.c + +SGEMVNKERNEL = ../arm/gemv_n.c +DGEMVNKERNEL = ../arm/gemv_n.c +CGEMVNKERNEL = ../arm/zgemv_n.c +ZGEMVNKERNEL = ../arm/zgemv_n.c + +SGEMVTKERNEL = ../arm/gemv_t.c +DGEMVTKERNEL = ../arm/gemv_t.c +CGEMVTKERNEL = ../arm/zgemv_t.c +ZGEMVTKERNEL = ../arm/zgemv_t.c SSYMV_U_KERNEL = ../generic/symv_k.c SSYMV_L_KERNEL = ../generic/symv_k.c diff --git a/kernel/riscv64/dgemm_kernel_8x8_zvl256b.c b/kernel/riscv64/dgemm_kernel_8x8_zvl256b.c index 760bfc8932..613a756b84 100644 --- a/kernel/riscv64/dgemm_kernel_8x8_zvl256b.c +++ b/kernel/riscv64/dgemm_kernel_8x8_zvl256b.c @@ -40,819 +40,1722 @@ AUTOGENERATED KERNEL #include "common.h" +#include -int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, FLOAT* A, FLOAT* B, FLOAT* C, BLASLONG ldc) - -{ - BLASLONG gvl = 0; - BLASLONG m_top = 0; - BLASLONG n_top = 0; - - - // -- MAIN PASS - - for (BLASLONG j=0; j= 8) { + vfloat64m8_t B00 = __riscv_vle64_v_f64m8(B, N * 4); + B += (N * 4); + B0 = __riscv_vget_v_f64m8_f64m2(B00, 0); + B2 = __riscv_vget_v_f64m8_f64m2(B00, 1); + B4 = __riscv_vget_v_f64m8_f64m2(B00, 2); + B6 = __riscv_vget_v_f64m8_f64m2(B00, 3); + +#ifdef GEMM_NEW_PACKING + A00 = A0[0 + (1 * 0)]; + A01 = A0[0 + (1 * 1)]; + A02 = A0[0 + (1 * 2)]; + A03 = A0[0 + (1 * 3)]; + A04 = A0[0 + (1 * 4)]; + A05 = A0[0 + (1 * 5)]; + A06 = A0[0 + (1 * 6)]; + A07 = A0[0 + (1 * 7)]; + A0 += (1 * 8); +#else + A00 = A2[0 + (1 * 0)]; + A01 = A2[0 + (1 * 1)]; + A02 = A2[0 + (1 * 2)]; + A03 = A2[0 + (1 * 3)]; + A04 = A2[0 + (1 * 4)]; + A05 = A2[0 + (1 * 5)]; + A06 = A2[0 + (1 * 6)]; + A07 = A2[0 + (1 * 7)]; + A2 += (1 * 8); +#endif + + resultC = __riscv_vfmul_vf_f64m2(B0, A00, 8); + result0 = __riscv_vfmul_vf_f64m2(B2, A01, 8); + result2 = __riscv_vfmul_vf_f64m2(B4, A02, 8); + result4 = __riscv_vfmul_vf_f64m2(B6, A03, 8); + + B00 = __riscv_vle64_v_f64m8(B, N * 4); + B += (N * 4); + B0 = __riscv_vget_v_f64m8_f64m2(B00, 0); + B2 = __riscv_vget_v_f64m8_f64m2(B00, 1); + B4 = __riscv_vget_v_f64m8_f64m2(B00, 2); + B6 = __riscv_vget_v_f64m8_f64m2(B00, 3); + + result6 = __riscv_vfmul_vf_f64m2(B0, A04, 8); + result8 = __riscv_vfmul_vf_f64m2(B2, A05, 8); + resultA = __riscv_vfmul_vf_f64m2(B4, A06, 8); + resultE = __riscv_vfmul_vf_f64m2(B6, A07, 8); + + BLASLONG k = (K / 8); + K &= 7; + while (--k) { + B00 = __riscv_vle64_v_f64m8(B, N * 4); + B += (N * 4); + B0 = __riscv_vget_v_f64m8_f64m2(B00, 0); + B2 = __riscv_vget_v_f64m8_f64m2(B00, 1); + B4 = __riscv_vget_v_f64m8_f64m2(B00, 2); + B6 = __riscv_vget_v_f64m8_f64m2(B00, 3); + +#ifdef GEMM_NEW_PACKING + A00 = A0[0 + (1 * 0)]; + A01 = A0[0 + (1 * 1)]; + A02 = A0[0 + (1 * 2)]; + A03 = A0[0 + (1 * 3)]; + A04 = A0[0 + (1 * 4)]; + A05 = A0[0 + (1 * 5)]; + A06 = A0[0 + (1 * 6)]; + A07 = A0[0 + (1 * 7)]; + A0 += (1 * 8); +#else + A00 = A2[0 + (1 * 0)]; + A01 = A2[0 + (1 * 1)]; + A02 = A2[0 + (1 * 2)]; + A03 = A2[0 + (1 * 3)]; + A04 = A2[0 + (1 * 4)]; + A05 = A2[0 + (1 * 5)]; + A06 = A2[0 + (1 * 6)]; + A07 = A2[0 + (1 * 7)]; + A2 += (1 * 8); +#endif + + resultC = __riscv_vfmacc_vf_f64m2(resultC, A00, B0, 8); + result0 = __riscv_vfmacc_vf_f64m2(result0, A01, B2, 8); + result2 = __riscv_vfmacc_vf_f64m2(result2, A02, B4, 8); + result4 = __riscv_vfmacc_vf_f64m2(result4, A03, B6, 8); + + B00 = __riscv_vle64_v_f64m8(B, N * 4); + B += (N * 4); + B0 = __riscv_vget_v_f64m8_f64m2(B00, 0); + B2 = __riscv_vget_v_f64m8_f64m2(B00, 1); + B4 = __riscv_vget_v_f64m8_f64m2(B00, 2); + B6 = __riscv_vget_v_f64m8_f64m2(B00, 3); + + result6 = __riscv_vfmacc_vf_f64m2(result6, A04, B0, 8); + result8 = __riscv_vfmacc_vf_f64m2(result8, A05, B2, 8); + resultA = __riscv_vfmacc_vf_f64m2(resultA, A06, B4, 8); + resultE = __riscv_vfmacc_vf_f64m2(resultE, A07, B6, 8); + } + + resultC = __riscv_vfadd_vv_f64m2(resultC, result6, 8); + result0 = __riscv_vfadd_vv_f64m2(result0, result8, 8); + result2 = __riscv_vfadd_vv_f64m2(result2, resultA, 8); + result4 = __riscv_vfadd_vv_f64m2(result4, resultE, 8); + resultC = __riscv_vfadd_vv_f64m2(resultC, result0, 8); + result2 = __riscv_vfadd_vv_f64m2(result2, result4, 8); + resultC = __riscv_vfadd_vv_f64m2(resultC, result2, 8); + } else { + resultC = __riscv_vreinterpret_v_u64m2_f64m2(__riscv_vmv_v_x_u64m2(0, 8)); } - - - BLASLONG ci=n_top*ldc+m_top; - - vfloat64m1_t c0 = __riscv_vle64_v_f64m1( &C[ci], gvl); ci += gvl; - vfloat64m1_t c1 = __riscv_vle64_v_f64m1( &C[ci], gvl); ci += ldc-gvl*1; - vfloat64m1_t c2 = __riscv_vle64_v_f64m1( &C[ci], gvl); ci += gvl; - vfloat64m1_t c3 = __riscv_vle64_v_f64m1( &C[ci], gvl); - c0 = __riscv_vfmacc_vf_f64m1( c0, alpha, result0, gvl ); - c1 = __riscv_vfmacc_vf_f64m1( c1, alpha, result1, gvl ); - c2 = __riscv_vfmacc_vf_f64m1( c2, alpha, result2, gvl ); - c3 = __riscv_vfmacc_vf_f64m1( c3, alpha, result3, gvl ); - - ci=n_top*ldc+m_top; - - __riscv_vse64_v_f64m1( &C[ci], c0, gvl); ci += gvl; - __riscv_vse64_v_f64m1( &C[ci], c1, gvl); ci += ldc-gvl*1; - __riscv_vse64_v_f64m1( &C[ci], c2, gvl); ci += gvl; - __riscv_vse64_v_f64m1( &C[ci], c3, gvl); - m_top += 8; + } else if (M == 2) { + if (K >= 4) { + vfloat64m8_t B00 = __riscv_vle64_v_f64m8(B, N * 4); + B += (N * 4); + B0 = __riscv_vget_v_f64m8_f64m2(B00, 0); + B2 = __riscv_vget_v_f64m8_f64m2(B00, 1); + B4 = __riscv_vget_v_f64m8_f64m2(B00, 2); + B6 = __riscv_vget_v_f64m8_f64m2(B00, 3); + +#ifdef GEMM_NEW_PACKING + A00 = A0[0 + (2 * 0)]; + A01 = A0[1 + (2 * 0)]; + A02 = A0[0 + (2 * 1)]; + A03 = A0[1 + (2 * 1)]; + A04 = A0[0 + (2 * 2)]; + A05 = A0[1 + (2 * 2)]; + A06 = A0[0 + (2 * 3)]; + A07 = A0[1 + (2 * 3)]; + A0 += (2 * 4); +#else + A00 = A1[0 + (2 * 0)]; + A01 = A1[1 + (2 * 0)]; + A02 = A1[0 + (2 * 1)]; + A03 = A1[1 + (2 * 1)]; + A04 = A1[0 + (2 * 2)]; + A05 = A1[1 + (2 * 2)]; + A06 = A1[0 + (2 * 3)]; + A07 = A1[1 + (2 * 3)]; + A1 += (2 * 4); +#endif + + result8 = __riscv_vfmul_vf_f64m2(B0, A00, 8); + resultA = __riscv_vfmul_vf_f64m2(B0, A01, 8); + result0 = __riscv_vfmul_vf_f64m2(B2, A02, 8); + result2 = __riscv_vfmul_vf_f64m2(B2, A03, 8); + result4 = __riscv_vfmul_vf_f64m2(B4, A04, 8); + result6 = __riscv_vfmul_vf_f64m2(B4, A05, 8); + resultC = __riscv_vfmul_vf_f64m2(B6, A06, 8); + resultE = __riscv_vfmul_vf_f64m2(B6, A07, 8); + + BLASLONG k = (K / 4); + K &= 3; + while (--k) { + B00 = __riscv_vle64_v_f64m8(B, N * 4); + B += (N * 4); + B0 = __riscv_vget_v_f64m8_f64m2(B00, 0); + B2 = __riscv_vget_v_f64m8_f64m2(B00, 1); + B4 = __riscv_vget_v_f64m8_f64m2(B00, 2); + B6 = __riscv_vget_v_f64m8_f64m2(B00, 3); + +#ifdef GEMM_NEW_PACKING + A00 = A0[0 + (2 * 0)]; + A01 = A0[1 + (2 * 0)]; + A02 = A0[0 + (2 * 1)]; + A03 = A0[1 + (2 * 1)]; + A04 = A0[0 + (2 * 2)]; + A05 = A0[1 + (2 * 2)]; + A06 = A0[0 + (2 * 3)]; + A07 = A0[1 + (2 * 3)]; + A0 += (2 * 4); +#else + A00 = A1[0 + (2 * 0)]; + A01 = A1[1 + (2 * 0)]; + A02 = A1[0 + (2 * 1)]; + A03 = A1[1 + (2 * 1)]; + A04 = A1[0 + (2 * 2)]; + A05 = A1[1 + (2 * 2)]; + A06 = A1[0 + (2 * 3)]; + A07 = A1[1 + (2 * 3)]; + A1 += (2 * 4); +#endif + + result8 = __riscv_vfmacc_vf_f64m2(result8, A00, B0, 8); + resultA = __riscv_vfmacc_vf_f64m2(resultA, A01, B0, 8); + result0 = __riscv_vfmacc_vf_f64m2(result0, A02, B2, 8); + result2 = __riscv_vfmacc_vf_f64m2(result2, A03, B2, 8); + result4 = __riscv_vfmacc_vf_f64m2(result4, A04, B4, 8); + result6 = __riscv_vfmacc_vf_f64m2(result6, A05, B4, 8); + resultC = __riscv_vfmacc_vf_f64m2(resultC, A06, B6, 8); + resultE = __riscv_vfmacc_vf_f64m2(resultE, A07, B6, 8); + } + + result8 = __riscv_vfadd_vv_f64m2(result8, result0, 8); + resultA = __riscv_vfadd_vv_f64m2(resultA, result2, 8); + result4 = __riscv_vfadd_vv_f64m2(result4, resultC, 8); + result6 = __riscv_vfadd_vv_f64m2(result6, resultE, 8); + result8 = __riscv_vfadd_vv_f64m2(result8, result4, 8); + resultA = __riscv_vfadd_vv_f64m2(resultA, result6, 8); + } else { + result8 = __riscv_vreinterpret_v_u64m2_f64m2(__riscv_vmv_v_x_u64m2(0, 8)); + resultA = __riscv_vreinterpret_v_u64m2_f64m2(__riscv_vmv_v_x_u64m2(0, 8)); + } + } else if (M <= 4) { + if (K >= 2) { + vfloat64m4_t B00 = __riscv_vle64_v_f64m4(B, N * 2); + B += (N * 2); + B0 = __riscv_vget_v_f64m4_f64m2(B00, 0); + B2 = __riscv_vget_v_f64m4_f64m2(B00, 1); + + if (M & 4) { + A00 = A0[0]; + A01 = A0[1]; + A02 = A0[2]; + A03 = A0[3]; + A04 = A0[4]; + A05 = A0[5]; + A06 = A0[6]; + A07 = A0[7]; +#ifndef GEMM_NEW_PACKING + A0 += 8; +#endif + } +#ifdef GEMM_NEW_PACKING + if (M & 2) { + A00 = A0[0 + (M * 0)]; + A01 = A0[1 + (M * 0)]; + A02 = A0[0 + (M * 1)]; + A03 = A0[1 + (M * 1)]; + } + if (M & 1) { + A04 = A0[2 + (M * 0)]; + A05 = A0[2 + (M * 1)]; + } + A0 += (M * 2); +#else + if (M & 2) { + A00 = A1[0 + (2 * 0)]; + A01 = A1[1 + (2 * 0)]; + A02 = A1[0 + (2 * 1)]; + A03 = A1[1 + (2 * 1)]; + A1 += (2 * 2); + } + if (M & 1) { + A04 = A2[0 + (1 * 0)]; + A05 = A2[0 + (1 * 1)]; + A2 += (1 * 2); + } +#endif + + if (M & 4) { + result0 = __riscv_vfmul_vf_f64m2(B0, A00, 8); + result2 = __riscv_vfmul_vf_f64m2(B0, A01, 8); + result4 = __riscv_vfmul_vf_f64m2(B0, A02, 8); + result6 = __riscv_vfmul_vf_f64m2(B0, A03, 8); + result8 = __riscv_vfmul_vf_f64m2(B2, A04, 8); + resultA = __riscv_vfmul_vf_f64m2(B2, A05, 8); + resultC = __riscv_vfmul_vf_f64m2(B2, A06, 8); + resultE = __riscv_vfmul_vf_f64m2(B2, A07, 8); + } else { + result8 = __riscv_vfmul_vf_f64m2(B0, A00, 8); + resultA = __riscv_vfmul_vf_f64m2(B0, A01, 8); + result0 = __riscv_vfmul_vf_f64m2(B2, A02, 8); + result2 = __riscv_vfmul_vf_f64m2(B2, A03, 8); + + resultC = __riscv_vfmul_vf_f64m2(B0, A04, 8); + result4 = __riscv_vfmul_vf_f64m2(B2, A05, 8); + } + + BLASLONG k = (K / 2); + K &= 1; + while (--k) { + B00 = __riscv_vle64_v_f64m4(B, N * 2); + B += (N * 2); + B0 = __riscv_vget_v_f64m4_f64m2(B00, 0); + B2 = __riscv_vget_v_f64m4_f64m2(B00, 1); + + if (M & 4) { + A00 = A0[0]; + A01 = A0[1]; + A02 = A0[2]; + A03 = A0[3]; + A04 = A0[4]; + A05 = A0[5]; + A06 = A0[6]; + A07 = A0[7]; +#ifndef GEMM_NEW_PACKING + A0 += 8; +#endif + } +#ifdef GEMM_NEW_PACKING + if (M & 2) { + A00 = A0[0 + (M * 0)]; + A01 = A0[1 + (M * 0)]; + A02 = A0[0 + (M * 1)]; + A03 = A0[1 + (M * 1)]; + } + if (M & 1) { + A04 = A0[2 + (M * 0)]; + A05 = A0[2 + (M * 1)]; + } + A0 += (M * 2); +#else + if (M & 2) { + A00 = A1[0 + (2 * 0)]; + A01 = A1[1 + (2 * 0)]; + A02 = A1[0 + (2 * 1)]; + A03 = A1[1 + (2 * 1)]; + A1 += (2 * 2); + } + if (M & 1) { + A04 = A2[0 + (1 * 0)]; + A05 = A2[0 + (1 * 1)]; + A2 += (1 * 2); + } +#endif + + if (M & 4) { + result0 = __riscv_vfmacc_vf_f64m2(result0, A00, B0, 8); + result2 = __riscv_vfmacc_vf_f64m2(result2, A01, B0, 8); + result4 = __riscv_vfmacc_vf_f64m2(result4, A02, B0, 8); + result6 = __riscv_vfmacc_vf_f64m2(result6, A03, B0, 8); + result8 = __riscv_vfmacc_vf_f64m2(result8, A04, B2, 8); + resultA = __riscv_vfmacc_vf_f64m2(resultA, A05, B2, 8); + resultC = __riscv_vfmacc_vf_f64m2(resultC, A06, B2, 8); + resultE = __riscv_vfmacc_vf_f64m2(resultE, A07, B2, 8); + } else { + result8 = __riscv_vfmacc_vf_f64m2(result8, A00, B0, 8); + resultA = __riscv_vfmacc_vf_f64m2(resultA, A01, B0, 8); + result0 = __riscv_vfmacc_vf_f64m2(result0, A02, B2, 8); + result2 = __riscv_vfmacc_vf_f64m2(result2, A03, B2, 8); + resultC = __riscv_vfmacc_vf_f64m2(resultC, A04, B0, 8); + result4 = __riscv_vfmacc_vf_f64m2(result4, A05, B2, 8); + } + } + + if (M & 4) { + result0 = __riscv_vfadd_vv_f64m2(result0, result8, 8); + result2 = __riscv_vfadd_vv_f64m2(result2, resultA, 8); + result4 = __riscv_vfadd_vv_f64m2(result4, resultC, 8); + result6 = __riscv_vfadd_vv_f64m2(result6, resultE, 8); + } else { + result8 = __riscv_vfadd_vv_f64m2(result8, result0, 8); + resultA = __riscv_vfadd_vv_f64m2(resultA, result2, 8); + + resultC = __riscv_vfadd_vv_f64m2(resultC, result4, 8); + } + } else { + if (M & 4) { + result0 = __riscv_vreinterpret_v_u64m2_f64m2(__riscv_vmv_v_x_u64m2(0, 8)); + result2 = __riscv_vreinterpret_v_u64m2_f64m2(__riscv_vmv_v_x_u64m2(0, 8)); + result4 = __riscv_vreinterpret_v_u64m2_f64m2(__riscv_vmv_v_x_u64m2(0, 8)); + result6 = __riscv_vreinterpret_v_u64m2_f64m2(__riscv_vmv_v_x_u64m2(0, 8)); + } else { + result8 = __riscv_vreinterpret_v_u64m2_f64m2(__riscv_vmv_v_x_u64m2(0, 8)); + resultA = __riscv_vreinterpret_v_u64m2_f64m2(__riscv_vmv_v_x_u64m2(0, 8)); + + resultC = __riscv_vreinterpret_v_u64m2_f64m2(__riscv_vmv_v_x_u64m2(0, 8)); + } + } + } else +#endif + { + B0 = __riscv_vle64_v_f64m2(B, N); + + if (M & 4) { + A00 = A0[0]; + A01 = A0[1]; + A02 = A0[2]; + A03 = A0[3]; +#ifndef GEMM_NEW_PACKING + A0 += 4; +#endif + } + B += N; +#ifdef GEMM_NEW_PACKING + if (M & 2) { + A04 = A0[0 + (M & 0x4)]; + A05 = A0[1 + (M & 0x4)]; + } + if (M & 1) { + A06 = A0[0 + (M & 0x6)]; + } + A0 += M; +#else + if (M & 2) { + A04 = A1[0]; + A05 = A1[1]; + A1 += 2; + } + if (M & 1) { + A06 = A2[0]; + A2 += 1; + } +#endif + if (M & 4) { + result0 = __riscv_vfmul_vf_f64m2(B0, A00, 8); + result2 = __riscv_vfmul_vf_f64m2(B0, A01, 8); + result4 = __riscv_vfmul_vf_f64m2(B0, A02, 8); + result6 = __riscv_vfmul_vf_f64m2(B0, A03, 8); + } + if (M & 2) { + result8 = __riscv_vfmul_vf_f64m2(B0, A04, 8); + resultA = __riscv_vfmul_vf_f64m2(B0, A05, 8); + } + if (M & 1) { + resultC = __riscv_vfmul_vf_f64m2(B0, A06, 8); + } + K--; } - - if( M & 4 ) { - gvl = __riscv_vsetvl_e64m1(4); - - BLASLONG ai=m_top*K; - BLASLONG bi=n_top*K; - double B0 = B[bi+0]; - double B1 = B[bi+1]; - bi += 2; - - vfloat64m1_t A0 = __riscv_vle64_v_f64m1( &A[ai+0*gvl], gvl ); - ai += 4; - - vfloat64m1_t result0 = __riscv_vfmul_vf_f64m1( A0, B0, gvl); - vfloat64m1_t result1 = __riscv_vfmul_vf_f64m1( A0, B1, gvl); - - for(BLASLONG k=1; k 4 + if (N & 4) { + resultE = __riscv_vle64_v_f64m1(B, 4); + + if (M & 4) { + result0 = __riscv_vfmul_vf_f64m1(resultE, A0[0], 4); + result2 = __riscv_vfmul_vf_f64m1(resultE, A0[1], 4); + result4 = __riscv_vfmul_vf_f64m1(resultE, A0[2], 4); + result6 = __riscv_vfmul_vf_f64m1(resultE, A0[3], 4); + } +#ifdef GEMM_NEW_PACKING + if (M & 2) { + result8 = __riscv_vfmul_vf_f64m1(resultE, A0[0 + (M & 0x4)], 4); + resultA = __riscv_vfmul_vf_f64m1(resultE, A0[1 + (M & 0x4)], 4); + } + if (M & 1) { + resultC = __riscv_vfmul_vf_f64m1(resultE, A0[0 + (M & 0x6)], 4); + } +#else + if (M & 2) { + result8 = __riscv_vfmul_vf_f64m1(resultE, A1[0], 4); + resultA = __riscv_vfmul_vf_f64m1(resultE, A1[1], 4); + } + if (M & 1) { + resultC = __riscv_vfmul_vf_f64m1(resultE, A2[0], 4); + } +#endif } + if (N & 3) { +#ifdef GEMM_NEW_PACKING + if (N & 1) { + B0 = B[0 + (N & 6)]; + } + if (N & 2) { + B1 = B[0 + (N & 4)]; + B2 = B[1 + (N & 4)]; + } +#else + if (N & 1) { + B00 = B + ((N & 6) * K); + B0 = B00[0]; + B00 += 1; + } + if (N & 2) { + B01 = B + ((N & 4) * K); + B1 = B01[0]; + B2 = B01[1]; + B01 += 2; + } +#endif + if (M & 4) { + result04 = __riscv_vle64_v_f64m1(A0, 4); + if (N & 1) { + result05 = __riscv_vfmul_vf_f64m1(result04, B0, 4); + } + if (N & 2) { + result06 = __riscv_vfmul_vf_f64m1(result04, B1, 4); + result07 = __riscv_vfmul_vf_f64m1(result04, B2, 4); + } + } + if (M & 2) { +#ifdef GEMM_NEW_PACKING + a0 = A0[0 + (M & 0x4)]; + a1 = A0[1 + (M & 0x4)]; +#else + a0 = A1[0]; + a1 = A1[1]; +#endif + if (N & 1) { + r0 = B0 * a0; + r1 = B0 * a1; + } + if (N & 2) { + r8 = B1 * a0; + r9 = B1 * a1; + rC = B2 * a0; + rD = B2 * a1; + } + } + if (M & 1) { +#ifdef GEMM_NEW_PACKING + a2 = A0[0 + (M & 0x6)]; +#else + a2 = A2[0]; +#endif + if (N & 1) { + r2 = B0 * a2; + } + if (N & 2) { + rA = B1 * a2; + rE = B2 * a2; + } + } + } +#ifdef GEMM_NEW_PACKING + A0 += M; + B += N; +#else + if (M & 4) { + A0 += 4; + } + if (M & 2) { + A1 += 2; + } + if (M & 1) { + A2 += 1; + } + if (N & 4) { + B += 4; + } +#endif + + while (--K) { + if (N & 4) { + resultE = __riscv_vle64_v_f64m1(B, 4); + + if (M & 4) { + result0 = __riscv_vfmacc_vf_f64m1(result0, A0[0], resultE, 4); + result2 = __riscv_vfmacc_vf_f64m1(result2, A0[1], resultE, 4); + result4 = __riscv_vfmacc_vf_f64m1(result4, A0[2], resultE, 4); + result6 = __riscv_vfmacc_vf_f64m1(result6, A0[3], resultE, 4); + } +#ifdef GEMM_NEW_PACKING + if (M & 2) { + result8 = __riscv_vfmacc_vf_f64m1(result8, A0[0 + (M & 0x4)], resultE, 4); + resultA = __riscv_vfmacc_vf_f64m1(resultA, A0[1 + (M & 0x4)], resultE, 4); + } + if (M & 1) { + resultC = __riscv_vfmacc_vf_f64m1(resultC, A0[0 + (M & 0x6)], resultE, 4); + } +#else + if (M & 2) { + result8 = __riscv_vfmacc_vf_f64m1(result8, A1[0], resultE, 4); + resultA = __riscv_vfmacc_vf_f64m1(resultA, A1[1], resultE, 4); + } + if (M & 1) { + resultC = __riscv_vfmacc_vf_f64m1(resultC, A2[0], resultE, 4); + } +#endif + } - if( M & 1 ) { - double result0 = 0; - double result1 = 0; - BLASLONG ai=m_top*K; - BLASLONG bi=n_top*K; - - for(BLASLONG k=0; k 4 + if (N & 2) { + B4 = B00[0 + (N & 4)]; + B5 = B00[1 + (N & 4)]; + } + if (N & 1) { + B6 = B00[0 + (N & 6)]; + } + B00 += N; +#else + if (N & 2) { + B4 = B01[0]; + B5 = B01[1]; + B01 += 2; + } + if (N & 1) { + B6 = B02[0]; + B02 += 1; + } +#endif - if( M & 4 ) { - gvl = __riscv_vsetvl_e64m1(4); - - BLASLONG ai=m_top*K; - BLASLONG bi=n_top*K; - double B0 = B[bi+0]; - bi += 1; - - vfloat64m1_t A0 = __riscv_vle64_v_f64m1( &A[ai+0*gvl], gvl ); - ai += 4; + A0 = __riscv_vle64_v_f64m2(*A, 4 * 2); + *A += 8; - vfloat64m1_t result0 = __riscv_vfmul_vf_f64m1( A0, B0, gvl); + if (N & 4) { + result0 = __riscv_vfmul_vf_f64m2(A0, B0, 8); + result2 = __riscv_vfmul_vf_f64m2(A0, B1, 8); + result4 = __riscv_vfmul_vf_f64m2(A0, B2, 8); + result6 = __riscv_vfmul_vf_f64m2(A0, B3, 8); + } + if (N & 2) { + result8 = __riscv_vfmul_vf_f64m2(A0, B4, 8); + resultA = __riscv_vfmul_vf_f64m2(A0, B5, 8); + } + if (N & 1) { + resultC = __riscv_vfmul_vf_f64m2(A0, B6, 8); + } + } - for(BLASLONG k=1; k -int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, FLOAT* A, FLOAT* B, FLOAT* C, BLASLONG ldc) +#define GEMM_RIGHT_EDGE // One pass for right edge - swap A & B - transpose at end +#define GEMM_BOTTOM_EDGE // One pass for bottom edge - combo on vector and scalar ops +#define GEMM_RIGHT_CHUNK // Break K into chunks (causes epsilon differences) +#define GEMM_BOTTOM_CHUNK // Break K into chunks (causes epsilon differences) -{ - BLASLONG gvl = 0; - BLASLONG m_top = 0; - BLASLONG n_top = 0; +//#define GEMM_NEW_PACKING // Right edge packed data friendly +#define FORCEINLINE inline __attribute__((always_inline)) - // -- MAIN PASS +#ifdef GEMM_NEW_PACKING +static FORCEINLINE FLOAT* M_TAIL_ONE(BLASLONG K, const BLASLONG M, const BLASLONG N, const bool S, FLOAT alpha, FLOAT* A0, FLOAT*, FLOAT*, FLOAT*, FLOAT* B, FLOAT* C, BLASLONG ldc) +#else +static FORCEINLINE FLOAT* M_TAIL_ONE(BLASLONG K, const BLASLONG M, const BLASLONG N, const bool S, FLOAT alpha, FLOAT* A0, FLOAT* A1, FLOAT* A2, FLOAT* A3, FLOAT* B, FLOAT* C, BLASLONG ldc) +#endif +{ + const bool S2 = (S && (M == 8)); + if (N & 8) { + vfloat32m1_t result0, result1, result2, result3, result4, result5, result6, result7; + vfloat32m1_t result8, result9, resultA, resultB, resultC, resultD, resultE; + vfloat32m1_t B0, A4; + +#ifdef GEMM_RIGHT_CHUNK + vfloat32m1_t resultF; + vfloat32m1_t B1, B2, B3, B4, B5, B6, B7; + + if (M <= 2) { + if (K >= 8) { + vfloat32m8_t B00 = __riscv_vle32_v_f32m8(B, N * 8); + B0 = __riscv_vget_v_f32m8_f32m1(B00, 0); + B1 = __riscv_vget_v_f32m8_f32m1(B00, 1); + B2 = __riscv_vget_v_f32m8_f32m1(B00, 2); + B3 = __riscv_vget_v_f32m8_f32m1(B00, 3); + B4 = __riscv_vget_v_f32m8_f32m1(B00, 4); + B5 = __riscv_vget_v_f32m8_f32m1(B00, 5); + B6 = __riscv_vget_v_f32m8_f32m1(B00, 6); + B7 = __riscv_vget_v_f32m8_f32m1(B00, 7); + B += (N * 8); + +#ifdef GEMM_NEW_PACKING + if (M == 1) { + resultE = __riscv_vfmul_vf_f32m1(B0, A0[0 + (1 * 0)], N); + result0 = __riscv_vfmul_vf_f32m1(B1, A0[0 + (1 * 1)], N); + result1 = __riscv_vfmul_vf_f32m1(B2, A0[0 + (1 * 2)], N); + result2 = __riscv_vfmul_vf_f32m1(B3, A0[0 + (1 * 3)], N); + result3 = __riscv_vfmul_vf_f32m1(B4, A0[0 + (1 * 4)], N); + result4 = __riscv_vfmul_vf_f32m1(B5, A0[0 + (1 * 5)], N); + result5 = __riscv_vfmul_vf_f32m1(B6, A0[0 + (1 * 6)], N); + result6 = __riscv_vfmul_vf_f32m1(B7, A0[0 + (1 * 7)], N); + } else { + resultC = __riscv_vfmul_vf_f32m1(B0, A0[0 + (2 * 0)], N); + resultD = __riscv_vfmul_vf_f32m1(B0, A0[1 + (2 * 0)], N); + result0 = __riscv_vfmul_vf_f32m1(B1, A0[0 + (2 * 1)], N); + result1 = __riscv_vfmul_vf_f32m1(B1, A0[1 + (2 * 1)], N); + result2 = __riscv_vfmul_vf_f32m1(B2, A0[0 + (2 * 2)], N); + result3 = __riscv_vfmul_vf_f32m1(B2, A0[1 + (2 * 2)], N); + result4 = __riscv_vfmul_vf_f32m1(B3, A0[0 + (2 * 3)], N); + result5 = __riscv_vfmul_vf_f32m1(B3, A0[1 + (2 * 3)], N); + result6 = __riscv_vfmul_vf_f32m1(B4, A0[0 + (2 * 4)], N); + result7 = __riscv_vfmul_vf_f32m1(B4, A0[1 + (2 * 4)], N); + result8 = __riscv_vfmul_vf_f32m1(B5, A0[0 + (2 * 5)], N); + result9 = __riscv_vfmul_vf_f32m1(B5, A0[1 + (2 * 5)], N); + resultA = __riscv_vfmul_vf_f32m1(B6, A0[0 + (2 * 6)], N); + resultB = __riscv_vfmul_vf_f32m1(B6, A0[1 + (2 * 6)], N); + resultE = __riscv_vfmul_vf_f32m1(B7, A0[0 + (2 * 7)], N); + resultF = __riscv_vfmul_vf_f32m1(B7, A0[1 + (2 * 7)], N); + } + A0 += (M * 8); +#else + if (M == 1) { + resultE = __riscv_vfmul_vf_f32m1(B0, A3[0 + (1 * 0)], N); + result0 = __riscv_vfmul_vf_f32m1(B1, A3[0 + (1 * 1)], N); + result1 = __riscv_vfmul_vf_f32m1(B2, A3[0 + (1 * 2)], N); + result2 = __riscv_vfmul_vf_f32m1(B3, A3[0 + (1 * 3)], N); + result3 = __riscv_vfmul_vf_f32m1(B4, A3[0 + (1 * 4)], N); + result4 = __riscv_vfmul_vf_f32m1(B5, A3[0 + (1 * 5)], N); + result5 = __riscv_vfmul_vf_f32m1(B6, A3[0 + (1 * 6)], N); + result6 = __riscv_vfmul_vf_f32m1(B7, A3[0 + (1 * 7)], N); + A3 += (1 * 8); + } else { + resultC = __riscv_vfmul_vf_f32m1(B0, A2[0 + (2 * 0)], N); + resultD = __riscv_vfmul_vf_f32m1(B0, A2[1 + (2 * 0)], N); + result0 = __riscv_vfmul_vf_f32m1(B1, A2[0 + (2 * 1)], N); + result1 = __riscv_vfmul_vf_f32m1(B1, A2[1 + (2 * 1)], N); + result2 = __riscv_vfmul_vf_f32m1(B2, A2[0 + (2 * 2)], N); + result3 = __riscv_vfmul_vf_f32m1(B2, A2[1 + (2 * 2)], N); + result4 = __riscv_vfmul_vf_f32m1(B3, A2[0 + (2 * 3)], N); + result5 = __riscv_vfmul_vf_f32m1(B3, A2[1 + (2 * 3)], N); + result6 = __riscv_vfmul_vf_f32m1(B4, A2[0 + (2 * 4)], N); + result7 = __riscv_vfmul_vf_f32m1(B4, A2[1 + (2 * 4)], N); + result8 = __riscv_vfmul_vf_f32m1(B5, A2[0 + (2 * 5)], N); + result9 = __riscv_vfmul_vf_f32m1(B5, A2[1 + (2 * 5)], N); + resultA = __riscv_vfmul_vf_f32m1(B6, A2[0 + (2 * 6)], N); + resultB = __riscv_vfmul_vf_f32m1(B6, A2[1 + (2 * 6)], N); + resultE = __riscv_vfmul_vf_f32m1(B7, A2[0 + (2 * 7)], N); + resultF = __riscv_vfmul_vf_f32m1(B7, A2[1 + (2 * 7)], N); + A2 += (2 * 8); + } +#endif + + BLASLONG k = (K / 8); + K &= 7; + while (--k) { + B00 = __riscv_vle32_v_f32m8(B, N * 8); + B0 = __riscv_vget_v_f32m8_f32m1(B00, 0); + B1 = __riscv_vget_v_f32m8_f32m1(B00, 1); + B2 = __riscv_vget_v_f32m8_f32m1(B00, 2); + B3 = __riscv_vget_v_f32m8_f32m1(B00, 3); + B4 = __riscv_vget_v_f32m8_f32m1(B00, 4); + B5 = __riscv_vget_v_f32m8_f32m1(B00, 5); + B6 = __riscv_vget_v_f32m8_f32m1(B00, 6); + B7 = __riscv_vget_v_f32m8_f32m1(B00, 7); + B += (N * 8); + +#ifdef GEMM_NEW_PACKING + if (M == 1) { + resultE = __riscv_vfmacc_vf_f32m1(resultE, A0[0 + (1 * 0)], B0, N); + result0 = __riscv_vfmacc_vf_f32m1(result0, A0[0 + (1 * 1)], B1, N); + result1 = __riscv_vfmacc_vf_f32m1(result1, A0[0 + (1 * 2)], B2, N); + result2 = __riscv_vfmacc_vf_f32m1(result2, A0[0 + (1 * 3)], B3, N); + result3 = __riscv_vfmacc_vf_f32m1(result3, A0[0 + (1 * 4)], B4, N); + result4 = __riscv_vfmacc_vf_f32m1(result4, A0[0 + (1 * 5)], B5, N); + result5 = __riscv_vfmacc_vf_f32m1(result5, A0[0 + (1 * 6)], B6, N); + result6 = __riscv_vfmacc_vf_f32m1(result6, A0[0 + (1 * 7)], B7, N); + } else { + resultC = __riscv_vfmacc_vf_f32m1(resultC, A0[0 + (2 * 0)], B0, N); + resultD = __riscv_vfmacc_vf_f32m1(resultD, A0[1 + (2 * 0)], B0, N); + result0 = __riscv_vfmacc_vf_f32m1(result0, A0[0 + (2 * 1)], B1, N); + result1 = __riscv_vfmacc_vf_f32m1(result1, A0[1 + (2 * 1)], B1, N); + result2 = __riscv_vfmacc_vf_f32m1(result2, A0[0 + (2 * 2)], B2, N); + result3 = __riscv_vfmacc_vf_f32m1(result3, A0[1 + (2 * 2)], B2, N); + result4 = __riscv_vfmacc_vf_f32m1(result4, A0[0 + (2 * 3)], B3, N); + result5 = __riscv_vfmacc_vf_f32m1(result5, A0[1 + (2 * 3)], B3, N); + result6 = __riscv_vfmacc_vf_f32m1(result6, A0[0 + (2 * 4)], B4, N); + result7 = __riscv_vfmacc_vf_f32m1(result7, A0[1 + (2 * 4)], B4, N); + result8 = __riscv_vfmacc_vf_f32m1(result8, A0[0 + (2 * 5)], B5, N); + result9 = __riscv_vfmacc_vf_f32m1(result9, A0[1 + (2 * 5)], B5, N); + resultA = __riscv_vfmacc_vf_f32m1(resultA, A0[0 + (2 * 6)], B6, N); + resultB = __riscv_vfmacc_vf_f32m1(resultB, A0[1 + (2 * 6)], B6, N); + resultE = __riscv_vfmacc_vf_f32m1(resultE, A0[0 + (2 * 7)], B7, N); + resultF = __riscv_vfmacc_vf_f32m1(resultF, A0[1 + (2 * 7)], B7, N); + } + A0 += (M * 8); +#else + if (M == 1) { + resultE = __riscv_vfmacc_vf_f32m1(resultE, A3[0 + (1 * 0)], B0, N); + result0 = __riscv_vfmacc_vf_f32m1(result0, A3[0 + (1 * 1)], B1, N); + result1 = __riscv_vfmacc_vf_f32m1(result1, A3[0 + (1 * 2)], B2, N); + result2 = __riscv_vfmacc_vf_f32m1(result2, A3[0 + (1 * 3)], B3, N); + result3 = __riscv_vfmacc_vf_f32m1(result3, A3[0 + (1 * 4)], B4, N); + result4 = __riscv_vfmacc_vf_f32m1(result4, A3[0 + (1 * 5)], B5, N); + result5 = __riscv_vfmacc_vf_f32m1(result5, A3[0 + (1 * 6)], B6, N); + result6 = __riscv_vfmacc_vf_f32m1(result6, A3[0 + (1 * 7)], B7, N); + A3 += (1 * 8); + } else { + resultC = __riscv_vfmacc_vf_f32m1(resultC, A2[0 + (2 * 0)], B0, N); + resultD = __riscv_vfmacc_vf_f32m1(resultD, A2[1 + (2 * 0)], B0, N); + result0 = __riscv_vfmacc_vf_f32m1(result0, A2[0 + (2 * 1)], B1, N); + result1 = __riscv_vfmacc_vf_f32m1(result1, A2[1 + (2 * 1)], B1, N); + result2 = __riscv_vfmacc_vf_f32m1(result2, A2[0 + (2 * 2)], B2, N); + result3 = __riscv_vfmacc_vf_f32m1(result3, A2[1 + (2 * 2)], B2, N); + result4 = __riscv_vfmacc_vf_f32m1(result4, A2[0 + (2 * 3)], B3, N); + result5 = __riscv_vfmacc_vf_f32m1(result5, A2[1 + (2 * 3)], B3, N); + result6 = __riscv_vfmacc_vf_f32m1(result6, A2[0 + (2 * 4)], B4, N); + result7 = __riscv_vfmacc_vf_f32m1(result7, A2[1 + (2 * 4)], B4, N); + result8 = __riscv_vfmacc_vf_f32m1(result8, A2[0 + (2 * 5)], B5, N); + result9 = __riscv_vfmacc_vf_f32m1(result9, A2[1 + (2 * 5)], B5, N); + resultA = __riscv_vfmacc_vf_f32m1(resultA, A2[0 + (2 * 6)], B6, N); + resultB = __riscv_vfmacc_vf_f32m1(resultB, A2[1 + (2 * 6)], B6, N); + resultE = __riscv_vfmacc_vf_f32m1(resultE, A2[0 + (2 * 7)], B7, N); + resultF = __riscv_vfmacc_vf_f32m1(resultF, A2[1 + (2 * 7)], B7, N); + A2 += (2 * 8); + } +#endif + } + + if (M == 1) { + resultE = __riscv_vfadd_vv_f32m1(resultE, result0, N); + result1 = __riscv_vfadd_vv_f32m1(result1, result2, N); + result3 = __riscv_vfadd_vv_f32m1(result3, result4, N); + result5 = __riscv_vfadd_vv_f32m1(result5, result6, N); + resultE = __riscv_vfadd_vv_f32m1(resultE, result1, N); + result3 = __riscv_vfadd_vv_f32m1(result3, result5, N); + resultE = __riscv_vfadd_vv_f32m1(resultE, result3, N); + } else { + resultC = __riscv_vfadd_vv_f32m1(resultC, result0, N); + resultD = __riscv_vfadd_vv_f32m1(resultD, result1, N); + result2 = __riscv_vfadd_vv_f32m1(result2, result4, N); + result3 = __riscv_vfadd_vv_f32m1(result3, result5, N); + result6 = __riscv_vfadd_vv_f32m1(result6, result8, N); + result7 = __riscv_vfadd_vv_f32m1(result7, result9, N); + resultA = __riscv_vfadd_vv_f32m1(resultA, resultE, N); + resultB = __riscv_vfadd_vv_f32m1(resultB, resultF, N); + resultC = __riscv_vfadd_vv_f32m1(resultC, result2, N); + resultD = __riscv_vfadd_vv_f32m1(resultD, result3, N); + result6 = __riscv_vfadd_vv_f32m1(result6, resultA, N); + result7 = __riscv_vfadd_vv_f32m1(result7, resultB, N); + resultC = __riscv_vfadd_vv_f32m1(resultC, result6, N); + resultD = __riscv_vfadd_vv_f32m1(resultD, result7, N); + } + } else { + if (M == 1) { + resultE = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + } else { + resultC = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + resultD = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + } + } + } else if (M == 3) { + if (K >= 4) { + vfloat32m4_t B00 = __riscv_vle32_v_f32m4(B, N * 4); + B0 = __riscv_vget_v_f32m4_f32m1(B00, 0); + B1 = __riscv_vget_v_f32m4_f32m1(B00, 1); + B2 = __riscv_vget_v_f32m4_f32m1(B00, 2); + B3 = __riscv_vget_v_f32m4_f32m1(B00, 3); + B += (N * 4); + +#ifdef GEMM_NEW_PACKING + resultC = __riscv_vfmul_vf_f32m1(B0, A0[0 + (3 * 0)], N); + resultD = __riscv_vfmul_vf_f32m1(B0, A0[1 + (3 * 0)], N); + result0 = __riscv_vfmul_vf_f32m1(B1, A0[0 + (3 * 1)], N); + result1 = __riscv_vfmul_vf_f32m1(B1, A0[1 + (3 * 1)], N); + result4 = __riscv_vfmul_vf_f32m1(B2, A0[0 + (3 * 2)], N); + result5 = __riscv_vfmul_vf_f32m1(B2, A0[1 + (3 * 2)], N); + result8 = __riscv_vfmul_vf_f32m1(B3, A0[0 + (3 * 3)], N); + result9 = __riscv_vfmul_vf_f32m1(B3, A0[1 + (3 * 3)], N); + + resultE = __riscv_vfmul_vf_f32m1(B0, A0[2 + (3 * 0)], N); + result2 = __riscv_vfmul_vf_f32m1(B1, A0[2 + (3 * 1)], N); + result6 = __riscv_vfmul_vf_f32m1(B2, A0[2 + (3 * 2)], N); + resultA = __riscv_vfmul_vf_f32m1(B3, A0[2 + (3 * 3)], N); + A0 += (3 * 4); +#else + resultC = __riscv_vfmul_vf_f32m1(B0, A2[0 + (2 * 0)], N); + resultD = __riscv_vfmul_vf_f32m1(B0, A2[1 + (2 * 0)], N); + result0 = __riscv_vfmul_vf_f32m1(B1, A2[0 + (2 * 1)], N); + result1 = __riscv_vfmul_vf_f32m1(B1, A2[1 + (2 * 1)], N); + result4 = __riscv_vfmul_vf_f32m1(B2, A2[0 + (2 * 2)], N); + result5 = __riscv_vfmul_vf_f32m1(B2, A2[1 + (2 * 2)], N); + result8 = __riscv_vfmul_vf_f32m1(B3, A2[0 + (2 * 3)], N); + result9 = __riscv_vfmul_vf_f32m1(B3, A2[1 + (2 * 3)], N); + A2 += (2 * 4); + + resultE = __riscv_vfmul_vf_f32m1(B0, A3[0 + (1 * 0)], N); + result2 = __riscv_vfmul_vf_f32m1(B1, A3[0 + (1 * 1)], N); + result6 = __riscv_vfmul_vf_f32m1(B2, A3[0 + (1 * 2)], N); + resultA = __riscv_vfmul_vf_f32m1(B3, A3[0 + (1 * 3)], N); + A3 += (1 * 4); +#endif + + BLASLONG k = (K / 4); + K &= 3; + while (--k) { + B00 = __riscv_vle32_v_f32m4(B, N * 4); + B0 = __riscv_vget_v_f32m4_f32m1(B00, 0); + B1 = __riscv_vget_v_f32m4_f32m1(B00, 1); + B2 = __riscv_vget_v_f32m4_f32m1(B00, 2); + B3 = __riscv_vget_v_f32m4_f32m1(B00, 3); + B += (N * 4); + +#ifdef GEMM_NEW_PACKING + resultC = __riscv_vfmacc_vf_f32m1(resultC, A0[0 + (3 * 0)], B0, N); + resultD = __riscv_vfmacc_vf_f32m1(resultD, A0[1 + (3 * 0)], B0, N); + result0 = __riscv_vfmacc_vf_f32m1(result0, A0[0 + (3 * 1)], B1, N); + result1 = __riscv_vfmacc_vf_f32m1(result1, A0[1 + (3 * 1)], B1, N); + result4 = __riscv_vfmacc_vf_f32m1(result4, A0[0 + (3 * 2)], B2, N); + result5 = __riscv_vfmacc_vf_f32m1(result5, A0[1 + (3 * 2)], B2, N); + result8 = __riscv_vfmacc_vf_f32m1(result8, A0[0 + (3 * 3)], B3, N); + result9 = __riscv_vfmacc_vf_f32m1(result9, A0[1 + (3 * 3)], B3, N); + + resultE = __riscv_vfmacc_vf_f32m1(resultE, A0[2 + (3 * 0)], B0, N); + result2 = __riscv_vfmacc_vf_f32m1(result2, A0[2 + (3 * 1)], B1, N); + result6 = __riscv_vfmacc_vf_f32m1(result6, A0[2 + (3 * 2)], B2, N); + resultA = __riscv_vfmacc_vf_f32m1(resultA, A0[2 + (3 * 3)], B3, N); + A0 += (3 * 4); +#else + resultC = __riscv_vfmacc_vf_f32m1(resultC, A2[0 + (2 * 0)], B0, N); + resultD = __riscv_vfmacc_vf_f32m1(resultD, A2[1 + (2 * 0)], B0, N); + result0 = __riscv_vfmacc_vf_f32m1(result0, A2[0 + (2 * 1)], B1, N); + result1 = __riscv_vfmacc_vf_f32m1(result1, A2[1 + (2 * 1)], B1, N); + result4 = __riscv_vfmacc_vf_f32m1(result4, A2[0 + (2 * 2)], B2, N); + result5 = __riscv_vfmacc_vf_f32m1(result5, A2[1 + (2 * 2)], B2, N); + result8 = __riscv_vfmacc_vf_f32m1(result8, A2[0 + (2 * 3)], B3, N); + result9 = __riscv_vfmacc_vf_f32m1(result9, A2[1 + (2 * 3)], B3, N); + A2 += (2 * 4); + + resultE = __riscv_vfmacc_vf_f32m1(resultE, A3[0 + (1 * 0)], B0, N); + result2 = __riscv_vfmacc_vf_f32m1(result2, A3[0 + (1 * 1)], B1, N); + result6 = __riscv_vfmacc_vf_f32m1(result6, A3[0 + (1 * 2)], B2, N); + resultA = __riscv_vfmacc_vf_f32m1(resultA, A3[0 + (1 * 3)], B3, N); + A3 += (1 * 4); +#endif + } + + resultC = __riscv_vfadd_vv_f32m1(resultC, result0, N); + resultD = __riscv_vfadd_vv_f32m1(resultD, result1, N); + result4 = __riscv_vfadd_vv_f32m1(result4, result8, N); + result5 = __riscv_vfadd_vv_f32m1(result5, result9, N); + resultC = __riscv_vfadd_vv_f32m1(resultC, result4, N); + resultD = __riscv_vfadd_vv_f32m1(resultD, result5, N); + + resultE = __riscv_vfadd_vv_f32m1(resultE, result2, N); + result6 = __riscv_vfadd_vv_f32m1(result6, resultA, N); + resultE = __riscv_vfadd_vv_f32m1(resultE, result6, N); + } else { + resultC = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + resultD = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + + resultE = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + } + } else if (M <= 8) { + if (K >= 2) { + vfloat32m2_t B00, A00; + vfloat32m1_t A5; + + if (!S2) { + B00 = __riscv_vle32_v_f32m2(B, N * 2); + B0 = __riscv_vget_v_f32m2_f32m1(B00, 0); + B1 = __riscv_vget_v_f32m2_f32m1(B00, 1); + } + + if (M == 8) { + if (S2) { + A00 = __riscv_vle32_v_f32m2(A0, N * 2); + A4 = __riscv_vget_v_f32m2_f32m1(A00, 0); + A5 = __riscv_vget_v_f32m2_f32m1(A00, 1); + + result0 = __riscv_vfmul_vf_f32m1(A4, B[0], N); + result1 = __riscv_vfmul_vf_f32m1(A4, B[1], N); + result2 = __riscv_vfmul_vf_f32m1(A4, B[2], N); + result3 = __riscv_vfmul_vf_f32m1(A4, B[3], N); + result4 = __riscv_vfmul_vf_f32m1(A4, B[4], N); + result5 = __riscv_vfmul_vf_f32m1(A4, B[5], N); + result6 = __riscv_vfmul_vf_f32m1(A4, B[6], N); + result7 = __riscv_vfmul_vf_f32m1(A4, B[7], N); + result8 = __riscv_vfmul_vf_f32m1(A5, B[8], N); + result9 = __riscv_vfmul_vf_f32m1(A5, B[9], N); + resultA = __riscv_vfmul_vf_f32m1(A5, B[10], N); + resultB = __riscv_vfmul_vf_f32m1(A5, B[11], N); + resultC = __riscv_vfmul_vf_f32m1(A5, B[12], N); + resultD = __riscv_vfmul_vf_f32m1(A5, B[13], N); + resultE = __riscv_vfmul_vf_f32m1(A5, B[14], N); + resultF = __riscv_vfmul_vf_f32m1(A5, B[15], N); + } else { + result0 = __riscv_vfmul_vf_f32m1(B0, A0[0], N); + result1 = __riscv_vfmul_vf_f32m1(B0, A0[1], N); + result2 = __riscv_vfmul_vf_f32m1(B0, A0[2], N); + result3 = __riscv_vfmul_vf_f32m1(B0, A0[3], N); + result4 = __riscv_vfmul_vf_f32m1(B0, A0[4], N); + result5 = __riscv_vfmul_vf_f32m1(B0, A0[5], N); + result6 = __riscv_vfmul_vf_f32m1(B0, A0[6], N); + result7 = __riscv_vfmul_vf_f32m1(B0, A0[7], N); + result8 = __riscv_vfmul_vf_f32m1(B1, A0[8], N); + result9 = __riscv_vfmul_vf_f32m1(B1, A0[9], N); + resultA = __riscv_vfmul_vf_f32m1(B1, A0[10], N); + resultB = __riscv_vfmul_vf_f32m1(B1, A0[11], N); + resultC = __riscv_vfmul_vf_f32m1(B1, A0[12], N); + resultD = __riscv_vfmul_vf_f32m1(B1, A0[13], N); + resultE = __riscv_vfmul_vf_f32m1(B1, A0[14], N); + resultF = __riscv_vfmul_vf_f32m1(B1, A0[15], N); + } + } +#ifdef GEMM_NEW_PACKING + if (M & 4) { + result8 = __riscv_vfmul_vf_f32m1(B0, A0[0 + (M * 0)], N); + result9 = __riscv_vfmul_vf_f32m1(B0, A0[1 + (M * 0)], N); + resultA = __riscv_vfmul_vf_f32m1(B0, A0[2 + (M * 0)], N); + resultB = __riscv_vfmul_vf_f32m1(B0, A0[3 + (M * 0)], N); + result0 = __riscv_vfmul_vf_f32m1(B1, A0[0 + (M * 1)], N); + result1 = __riscv_vfmul_vf_f32m1(B1, A0[1 + (M * 1)], N); + result2 = __riscv_vfmul_vf_f32m1(B1, A0[2 + (M * 1)], N); + result3 = __riscv_vfmul_vf_f32m1(B1, A0[3 + (M * 1)], N); + } + if (M & 2) { + resultC = __riscv_vfmul_vf_f32m1(B0, A0[0 + (M & 0x4) + (M * 0)], N); + resultD = __riscv_vfmul_vf_f32m1(B0, A0[1 + (M & 0x4) + (M * 0)], N); + result4 = __riscv_vfmul_vf_f32m1(B1, A0[0 + (M & 0x4) + (M * 1)], N); + result5 = __riscv_vfmul_vf_f32m1(B1, A0[1 + (M & 0x4) + (M * 1)], N); + } + if (M & 1) { + resultE = __riscv_vfmul_vf_f32m1(B0, A0[0 + (M & 0x6) + (M * 0)], N); + result6 = __riscv_vfmul_vf_f32m1(B1, A0[0 + (M & 0x6) + (M * 1)], N); + } + A0 += (M * 2); +#else + if (M & 4) { + result8 = __riscv_vfmul_vf_f32m1(B0, A1[0 + (4 * 0)], N); + result9 = __riscv_vfmul_vf_f32m1(B0, A1[1 + (4 * 0)], N); + resultA = __riscv_vfmul_vf_f32m1(B0, A1[2 + (4 * 0)], N); + resultB = __riscv_vfmul_vf_f32m1(B0, A1[3 + (4 * 0)], N); + result0 = __riscv_vfmul_vf_f32m1(B1, A1[0 + (4 * 1)], N); + result1 = __riscv_vfmul_vf_f32m1(B1, A1[1 + (4 * 1)], N); + result2 = __riscv_vfmul_vf_f32m1(B1, A1[2 + (4 * 1)], N); + result3 = __riscv_vfmul_vf_f32m1(B1, A1[3 + (4 * 1)], N); + A1 += (4 * 2); + } + if (M & 2) { + resultC = __riscv_vfmul_vf_f32m1(B0, A2[0 + (2 * 0)], N); + resultD = __riscv_vfmul_vf_f32m1(B0, A2[1 + (2 * 0)], N); + result4 = __riscv_vfmul_vf_f32m1(B1, A2[0 + (2 * 1)], N); + result5 = __riscv_vfmul_vf_f32m1(B1, A2[1 + (2 * 1)], N); + A2 += (2 * 2); + } + if (M & 1) { + resultE = __riscv_vfmul_vf_f32m1(B0, A3[0 + (1 * 0)], N); + result6 = __riscv_vfmul_vf_f32m1(B1, A3[0 + (1 * 1)], N); + A3 += (1 * 2); + } + if (M == 8) { + A0 += (N * 2); + } +#endif + B += (N * 2); + + BLASLONG k = (K / 2); + K &= 1; + while (--k) { + if (!S2) { + B00 = __riscv_vle32_v_f32m2(B, N * 2); + B0 = __riscv_vget_v_f32m2_f32m1(B00, 0); + B1 = __riscv_vget_v_f32m2_f32m1(B00, 1); + } + + if (M == 8) { + if (S2) { + A00 = __riscv_vle32_v_f32m2(A0, N * 2); + A4 = __riscv_vget_v_f32m2_f32m1(A00, 0); + A5 = __riscv_vget_v_f32m2_f32m1(A00, 1); + + result0 = __riscv_vfmacc_vf_f32m1(result0, B[0], A4, N); + result1 = __riscv_vfmacc_vf_f32m1(result1, B[1], A4, N); + result2 = __riscv_vfmacc_vf_f32m1(result2, B[2], A4, N); + result3 = __riscv_vfmacc_vf_f32m1(result3, B[3], A4, N); + result4 = __riscv_vfmacc_vf_f32m1(result4, B[4], A4, N); + result5 = __riscv_vfmacc_vf_f32m1(result5, B[5], A4, N); + result6 = __riscv_vfmacc_vf_f32m1(result6, B[6], A4, N); + result7 = __riscv_vfmacc_vf_f32m1(result7, B[7], A4, N); + result8 = __riscv_vfmacc_vf_f32m1(result8, B[8], A5, N); + result9 = __riscv_vfmacc_vf_f32m1(result9, B[9], A5, N); + resultA = __riscv_vfmacc_vf_f32m1(resultA, B[10], A5, N); + resultB = __riscv_vfmacc_vf_f32m1(resultB, B[11], A5, N); + resultC = __riscv_vfmacc_vf_f32m1(resultC, B[12], A5, N); + resultD = __riscv_vfmacc_vf_f32m1(resultD, B[13], A5, N); + resultE = __riscv_vfmacc_vf_f32m1(resultE, B[14], A5, N); + resultF = __riscv_vfmacc_vf_f32m1(resultF, B[15], A5, N); + } else { + result0 = __riscv_vfmacc_vf_f32m1(result0, A0[0], B0, N); + result1 = __riscv_vfmacc_vf_f32m1(result1, A0[1], B0, N); + result2 = __riscv_vfmacc_vf_f32m1(result2, A0[2], B0, N); + result3 = __riscv_vfmacc_vf_f32m1(result3, A0[3], B0, N); + result4 = __riscv_vfmacc_vf_f32m1(result4, A0[4], B0, N); + result5 = __riscv_vfmacc_vf_f32m1(result5, A0[5], B0, N); + result6 = __riscv_vfmacc_vf_f32m1(result6, A0[6], B0, N); + result7 = __riscv_vfmacc_vf_f32m1(result7, A0[7], B0, N); + result8 = __riscv_vfmacc_vf_f32m1(result8, A0[8], B1, N); + result9 = __riscv_vfmacc_vf_f32m1(result9, A0[9], B1, N); + resultA = __riscv_vfmacc_vf_f32m1(resultA, A0[10], B1, N); + resultB = __riscv_vfmacc_vf_f32m1(resultB, A0[11], B1, N); + resultC = __riscv_vfmacc_vf_f32m1(resultC, A0[12], B1, N); + resultD = __riscv_vfmacc_vf_f32m1(resultD, A0[13], B1, N); + resultE = __riscv_vfmacc_vf_f32m1(resultE, A0[14], B1, N); + resultF = __riscv_vfmacc_vf_f32m1(resultF, A0[15], B1, N); + } + } +#ifdef GEMM_NEW_PACKING + if (M & 4) { + result8 = __riscv_vfmacc_vf_f32m1(result8, A0[0 + (M * 0)], B0, N); + result9 = __riscv_vfmacc_vf_f32m1(result9, A0[1 + (M * 0)], B0, N); + resultA = __riscv_vfmacc_vf_f32m1(resultA, A0[2 + (M * 0)], B0, N); + resultB = __riscv_vfmacc_vf_f32m1(resultB, A0[3 + (M * 0)], B0, N); + result0 = __riscv_vfmacc_vf_f32m1(result0, A0[0 + (M * 1)], B1, N); + result1 = __riscv_vfmacc_vf_f32m1(result1, A0[1 + (M * 1)], B1, N); + result2 = __riscv_vfmacc_vf_f32m1(result2, A0[2 + (M * 1)], B1, N); + result3 = __riscv_vfmacc_vf_f32m1(result3, A0[3 + (M * 1)], B1, N); + } + if (M & 2) { + resultC = __riscv_vfmacc_vf_f32m1(resultC, A0[0 + (M & 0x4) + (M * 0)], B0, N); + resultD = __riscv_vfmacc_vf_f32m1(resultD, A0[1 + (M & 0x4) + (M * 0)], B0, N); + result4 = __riscv_vfmacc_vf_f32m1(result4, A0[0 + (M & 0x4) + (M * 1)], B1, N); + result5 = __riscv_vfmacc_vf_f32m1(result5, A0[1 + (M & 0x4) + (M * 1)], B1, N); + } + if (M & 1) { + resultE = __riscv_vfmacc_vf_f32m1(resultE, A0[0 + (M & 0x6) + (M * 0)], B0, N); + result6 = __riscv_vfmacc_vf_f32m1(result6, A0[0 + (M & 0x6) + (M * 1)], B1, N); + } + A0 += (M * 2); +#else + if (M & 4) { + result8 = __riscv_vfmacc_vf_f32m1(result8, A1[0 + (4 * 0)], B0, N); + result9 = __riscv_vfmacc_vf_f32m1(result9, A1[1 + (4 * 0)], B0, N); + resultA = __riscv_vfmacc_vf_f32m1(resultA, A1[2 + (4 * 0)], B0, N); + resultB = __riscv_vfmacc_vf_f32m1(resultB, A1[3 + (4 * 0)], B0, N); + result0 = __riscv_vfmacc_vf_f32m1(result0, A1[0 + (4 * 1)], B1, N); + result1 = __riscv_vfmacc_vf_f32m1(result1, A1[1 + (4 * 1)], B1, N); + result2 = __riscv_vfmacc_vf_f32m1(result2, A1[2 + (4 * 1)], B1, N); + result3 = __riscv_vfmacc_vf_f32m1(result3, A1[3 + (4 * 1)], B1, N); + A1 += (4 * 2); + } + if (M & 2) { + resultC = __riscv_vfmacc_vf_f32m1(resultC, A2[0 + (2 * 0)], B0, N); + resultD = __riscv_vfmacc_vf_f32m1(resultD, A2[1 + (2 * 0)], B0, N); + result4 = __riscv_vfmacc_vf_f32m1(result4, A2[0 + (2 * 1)], B1, N); + result5 = __riscv_vfmacc_vf_f32m1(result5, A2[1 + (2 * 1)], B1, N); + A2 += (2 * 2); + } + if (M & 1) { + resultE = __riscv_vfmacc_vf_f32m1(resultE, A3[0 + (1 * 0)], B0, N); + result6 = __riscv_vfmacc_vf_f32m1(result6, A3[0 + (1 * 1)], B1, N); + A3 += (1 * 2); + } + if (M == 8) { + A0 += (N * 2); + } +#endif + B += (N * 2); + } + + if (M == 8) { + result0 = __riscv_vfadd_vv_f32m1(result0, result8, N); + result1 = __riscv_vfadd_vv_f32m1(result1, result9, N); + result2 = __riscv_vfadd_vv_f32m1(result2, resultA, N); + result3 = __riscv_vfadd_vv_f32m1(result3, resultB, N); + result4 = __riscv_vfadd_vv_f32m1(result4, resultC, N); + result5 = __riscv_vfadd_vv_f32m1(result5, resultD, N); + result6 = __riscv_vfadd_vv_f32m1(result6, resultE, N); + result7 = __riscv_vfadd_vv_f32m1(result7, resultF, N); + } + if (M & 4) { + result8 = __riscv_vfadd_vv_f32m1(result8, result0, N); + result9 = __riscv_vfadd_vv_f32m1(result9, result1, N); + resultA = __riscv_vfadd_vv_f32m1(resultA, result2, N); + resultB = __riscv_vfadd_vv_f32m1(resultB, result3, N); + } + if (M & 2) { + resultC = __riscv_vfadd_vv_f32m1(resultC, result4, N); + resultD = __riscv_vfadd_vv_f32m1(resultD, result5, N); + } + if (M & 1) { + resultE = __riscv_vfadd_vv_f32m1(resultE, result6, N); + } + } else { + if (M == 8) { + result0 = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + result1 = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + result2 = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + result3 = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + result4 = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + result5 = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + result6 = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + result7 = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + } + if (M & 4) { + result8 = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + result9 = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + resultA = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + resultB = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + } + if (M & 2) { + resultC = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + resultD = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + } + if (M & 1) { + resultE = __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vmv_v_x_u32m1(0, N)); + } + } + } else +#endif + { + if (!S2) { + B0 = __riscv_vle32_v_f32m1(B, N); + } - for (BLASLONG j=0; j 4 + if (N & 4) { + if (!S2) { + resultF = __riscv_vle32_v_f32mf2(B, 4); } - - - BLASLONG ci=n_top*ldc+m_top; - - vfloat32m1_t c0 = __riscv_vle32_v_f32m1( &C[ci], gvl); ci += ldc-gvl*0; - vfloat32m1_t c1 = __riscv_vle32_v_f32m1( &C[ci], gvl); ci += ldc-gvl*0; - vfloat32m1_t c2 = __riscv_vle32_v_f32m1( &C[ci], gvl); ci += ldc-gvl*0; - vfloat32m1_t c3 = __riscv_vle32_v_f32m1( &C[ci], gvl); - c0 = __riscv_vfmacc_vf_f32m1( c0, alpha, result0, gvl ); - c1 = __riscv_vfmacc_vf_f32m1( c1, alpha, result1, gvl ); - c2 = __riscv_vfmacc_vf_f32m1( c2, alpha, result2, gvl ); - c3 = __riscv_vfmacc_vf_f32m1( c3, alpha, result3, gvl ); - - ci=n_top*ldc+m_top; - - __riscv_vse32_v_f32m1( &C[ci], c0, gvl); ci += ldc-gvl*0; - __riscv_vse32_v_f32m1( &C[ci], c1, gvl); ci += ldc-gvl*0; - __riscv_vse32_v_f32m1( &C[ci], c2, gvl); ci += ldc-gvl*0; - __riscv_vse32_v_f32m1( &C[ci], c3, gvl); - m_top += 8; + if (M & 8) { + if (S2) { + result08 = __riscv_vfmul_vf_f32m1(result03, B[0], 8); + result09 = __riscv_vfmul_vf_f32m1(result03, B[1], 8); + result0A = __riscv_vfmul_vf_f32m1(result03, B[2], 8); + result0B = __riscv_vfmul_vf_f32m1(result03, B[3], 8); + } else { + result0 = __riscv_vfmul_vf_f32mf2(resultF, A0[0], 4); + result1 = __riscv_vfmul_vf_f32mf2(resultF, A0[1], 4); + result2 = __riscv_vfmul_vf_f32mf2(resultF, A0[2], 4); + result3 = __riscv_vfmul_vf_f32mf2(resultF, A0[3], 4); + result4 = __riscv_vfmul_vf_f32mf2(resultF, A0[4], 4); + result5 = __riscv_vfmul_vf_f32mf2(resultF, A0[5], 4); + result6 = __riscv_vfmul_vf_f32mf2(resultF, A0[6], 4); + result7 = __riscv_vfmul_vf_f32mf2(resultF, A0[7], 4); + } + } +#ifdef GEMM_NEW_PACKING + if (M & 4) { + result8 = __riscv_vfmul_vf_f32mf2(resultF, A0[0 + (M & 0x8)], 4); + result9 = __riscv_vfmul_vf_f32mf2(resultF, A0[1 + (M & 0x8)], 4); + resultA = __riscv_vfmul_vf_f32mf2(resultF, A0[2 + (M & 0x8)], 4); + resultB = __riscv_vfmul_vf_f32mf2(resultF, A0[3 + (M & 0x8)], 4); + } + if (M & 2) { + resultC = __riscv_vfmul_vf_f32mf2(resultF, A0[0 + (M & 0xC)], 4); + resultD = __riscv_vfmul_vf_f32mf2(resultF, A0[1 + (M & 0xC)], 4); + } + if (M & 1) { + resultE = __riscv_vfmul_vf_f32mf2(resultF, A0[0 + (M & 0xE)], 4); + } +#else + if (M & 4) { + result8 = __riscv_vfmul_vf_f32mf2(resultF, A1[0], 4); + result9 = __riscv_vfmul_vf_f32mf2(resultF, A1[1], 4); + resultA = __riscv_vfmul_vf_f32mf2(resultF, A1[2], 4); + resultB = __riscv_vfmul_vf_f32mf2(resultF, A1[3], 4); + } + if (M & 2) { + resultC = __riscv_vfmul_vf_f32mf2(resultF, A2[0], 4); + resultD = __riscv_vfmul_vf_f32mf2(resultF, A2[1], 4); + } + if (M & 1) { + resultE = __riscv_vfmul_vf_f32mf2(resultF, A3[0], 4); + } +#endif } - - if( M & 4 ) { - gvl = __riscv_vsetvl_e32m1(4); - - BLASLONG ai=m_top*K; - BLASLONG bi=n_top*K; - float B0 = B[bi+0]; - float B1 = B[bi+1]; - float B2 = B[bi+2]; - float B3 = B[bi+3]; - bi += 4; - - vfloat32m1_t A0 = __riscv_vle32_v_f32m1( &A[ai+0*gvl], gvl ); - ai += 4; - - vfloat32m1_t result0 = __riscv_vfmul_vf_f32m1( A0, B0, gvl); - vfloat32m1_t result1 = __riscv_vfmul_vf_f32m1( A0, B1, gvl); - vfloat32m1_t result2 = __riscv_vfmul_vf_f32m1( A0, B2, gvl); - vfloat32m1_t result3 = __riscv_vfmul_vf_f32m1( A0, B3, gvl); - - for(BLASLONG k=1; k 4 + if (N & 2) { + B4 = B00[0 + (N & 4)]; + B5 = B00[1 + (N & 4)]; + } + if (N & 1) { + B6 = B00[0 + (N & 6)]; + } + B00 += N; +#else + if (N & 2) { + B4 = B01[0]; + B5 = B01[1]; + B01 += 2; + } + if (N & 1) { + B6 = B02[0]; + B02 += 1; + } +#endif + A0 = __riscv_vle32_v_f32m2(*A, 8 * 2); + *A += 16; - BLASLONG ci=n_top*ldc+m_top; - - vfloat32m1_t c0 = __riscv_vle32_v_f32m1( &C[ci], gvl); ci += gvl; - vfloat32m1_t c1 = __riscv_vle32_v_f32m1( &C[ci], gvl); - c0 = __riscv_vfmacc_vf_f32m1( c0, alpha, result0, gvl ); - c1 = __riscv_vfmacc_vf_f32m1( c1, alpha, result1, gvl ); - - ci=n_top*ldc+m_top; - - __riscv_vse32_v_f32m1( &C[ci], c0, gvl); ci += gvl; - __riscv_vse32_v_f32m1( &C[ci], c1, gvl); - m_top += 16; + if (N & 4) { + result0 = __riscv_vfmul_vf_f32m2(A0, B0, 16); + result2 = __riscv_vfmul_vf_f32m2(A0, B1, 16); + result4 = __riscv_vfmul_vf_f32m2(A0, B2, 16); + result6 = __riscv_vfmul_vf_f32m2(A0, B3, 16); + } + if (N & 2) { + result8 = __riscv_vfmul_vf_f32m2(A0, B4, 16); + resultA = __riscv_vfmul_vf_f32m2(A0, B5, 16); + } + if (N & 1) { + resultC = __riscv_vfmul_vf_f32m2(A0, B6, 16); + } } - - if( M & 8 ) { - gvl = __riscv_vsetvl_e32m1(8); - - BLASLONG ai=m_top*K; - BLASLONG bi=n_top*K; - float B0 = B[bi+0]; - bi += 1; - - vfloat32m1_t A0 = __riscv_vle32_v_f32m1( &A[ai+0*gvl], gvl ); - ai += 8; - - vfloat32m1_t result0 = __riscv_vfmul_vf_f32m1( A0, B0, gvl); - - for(BLASLONG k=1; k= 0; i--) { + + aa = *(a + i); + + for (j = 0; j < n; j ++) { + bb = *(c + i + j * ldc); + bb *= aa; + *b = bb; + *(c + i + j * ldc) = bb; + b ++; + + for (k = 0; k < i; k ++){ + *(c + k + j * ldc) -= bb * *(a + k); + } + + } + a -= m; + b -= 2 * n; + } + +} + +#else + +static inline void solve(BLASLONG m, BLASLONG n, FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc) { + + FLOAT aa1, aa2; + FLOAT bb1, bb2; + FLOAT cc1, cc2; + + int i, j, k; + + ldc *= 2; + a += (m - 1) * m * 2; + b += (m - 1) * n * 2; + + for (i = m - 1; i >= 0; i--) { + + aa1 = *(a + i * 2 + 0); + aa2 = *(a + i * 2 + 1); + + for (j = 0; j < n; j ++) { + bb1 = *(c + i * 2 + 0 + j * ldc); + bb2 = *(c + i * 2 + 1 + j * ldc); + +#ifndef CONJ + cc1 = aa1 * bb1 - aa2 * bb2; + cc2 = aa1 * bb2 + aa2 * bb1; +#else + cc1 = aa1 * bb1 + aa2 * bb2; + cc2 = aa1 * bb2 - aa2 * bb1; +#endif + + + *(b + 0) = cc1; + *(b + 1) = cc2; + *(c + i * 2 + 0 + j * ldc) = cc1; + *(c + i * 2 + 1 + j * ldc) = cc2; + b += 2; + + for (k = 0; k < i; k ++){ +#ifndef CONJ + *(c + k * 2 + 0 + j * ldc) -= cc1 * *(a + k * 2 + 0) - cc2 * *(a + k * 2 + 1); + *(c + k * 2 + 1 + j * ldc) -= cc1 * *(a + k * 2 + 1) + cc2 * *(a + k * 2 + 0); +#else + *(c + k * 2 + 0 + j * ldc) -= cc1 * *(a + k * 2 + 0) + cc2 * *(a + k * 2 + 1); + *(c + k * 2 + 1 + j * ldc) -= - cc1 * *(a + k * 2 + 1) + cc2 * *(a + k * 2 + 0); +#endif + } + + } + a -= m * 2; + b -= 4 * n; + } + +} + +#endif + + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, +#ifdef COMPLEX + FLOAT dummy2, +#endif + FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc, BLASLONG offset){ + +#ifndef DOUBLE + openblas_wasm128_strsm_ln_calls += 1; +#else + openblas_wasm128_dtrsm_ln_calls += 1; +#endif + + BLASLONG i, j; + FLOAT *aa, *cc; + BLASLONG kk; + +#if 0 + fprintf(stderr, "TRSM KERNEL LN : m = %3ld n = %3ld k = %3ld offset = %3ld\n", + m, n, k, offset); +#endif + + j = (n >> GEMM_UNROLL_N_SHIFT); + + while (j > 0) { + + kk = m + offset; + + if (m & (GEMM_UNROLL_M - 1)) { + for (i = 1; i < GEMM_UNROLL_M; i *= 2){ + if (m & i) { + aa = a + ((m & ~(i - 1)) - i) * k * COMPSIZE; + cc = c + ((m & ~(i - 1)) - i) * COMPSIZE; + + if (k - kk > 0) { + GEMM_KERNEL(i, GEMM_UNROLL_N, k - kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa + i * kk * COMPSIZE, + b + GEMM_UNROLL_N * kk * COMPSIZE, + cc, + ldc); + } + + solve(i, GEMM_UNROLL_N, + aa + (kk - i) * i * COMPSIZE, + b + (kk - i) * GEMM_UNROLL_N * COMPSIZE, + cc, ldc); + + kk -= i; + } + } + } + + i = (m >> GEMM_UNROLL_M_SHIFT); + if (i > 0) { + aa = a + ((m & ~(GEMM_UNROLL_M - 1)) - GEMM_UNROLL_M) * k * COMPSIZE; + cc = c + ((m & ~(GEMM_UNROLL_M - 1)) - GEMM_UNROLL_M) * COMPSIZE; + + do { + if (k - kk > 0) { + GEMM_KERNEL(GEMM_UNROLL_M, GEMM_UNROLL_N, k - kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa + GEMM_UNROLL_M * kk * COMPSIZE, + b + GEMM_UNROLL_N * kk * COMPSIZE, + cc, + ldc); + } + + solve(GEMM_UNROLL_M, GEMM_UNROLL_N, + aa + (kk - GEMM_UNROLL_M) * GEMM_UNROLL_M * COMPSIZE, + b + (kk - GEMM_UNROLL_M) * GEMM_UNROLL_N * COMPSIZE, + cc, ldc); + + aa -= GEMM_UNROLL_M * k * COMPSIZE; + cc -= GEMM_UNROLL_M * COMPSIZE; + kk -= GEMM_UNROLL_M; + i --; + } while (i > 0); + } + + b += GEMM_UNROLL_N * k * COMPSIZE; + c += GEMM_UNROLL_N * ldc * COMPSIZE; + j --; + } + + if (n & (GEMM_UNROLL_N - 1)) { + + j = (GEMM_UNROLL_N >> 1); + while (j > 0) { + if (n & j) { + + kk = m + offset; + + if (m & (GEMM_UNROLL_M - 1)) { + for (i = 1; i < GEMM_UNROLL_M; i *= 2){ + if (m & i) { + aa = a + ((m & ~(i - 1)) - i) * k * COMPSIZE; + cc = c + ((m & ~(i - 1)) - i) * COMPSIZE; + + if (k - kk > 0) { + GEMM_KERNEL(i, j, k - kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa + i * kk * COMPSIZE, + b + j * kk * COMPSIZE, + cc, ldc); + } + + solve(i, j, + aa + (kk - i) * i * COMPSIZE, + b + (kk - i) * j * COMPSIZE, + cc, ldc); + + kk -= i; + } + } + } + + i = (m >> GEMM_UNROLL_M_SHIFT); + if (i > 0) { + aa = a + ((m & ~(GEMM_UNROLL_M - 1)) - GEMM_UNROLL_M) * k * COMPSIZE; + cc = c + ((m & ~(GEMM_UNROLL_M - 1)) - GEMM_UNROLL_M) * COMPSIZE; + + do { + if (k - kk > 0) { + GEMM_KERNEL(GEMM_UNROLL_M, j, k - kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa + GEMM_UNROLL_M * kk * COMPSIZE, + b + j * kk * COMPSIZE, + cc, + ldc); + } + + solve(GEMM_UNROLL_M, j, + aa + (kk - GEMM_UNROLL_M) * GEMM_UNROLL_M * COMPSIZE, + b + (kk - GEMM_UNROLL_M) * j * COMPSIZE, + cc, ldc); + + aa -= GEMM_UNROLL_M * k * COMPSIZE; + cc -= GEMM_UNROLL_M * COMPSIZE; + kk -= GEMM_UNROLL_M; + i --; + } while (i > 0); + } + + b += j * k * COMPSIZE; + c += j * ldc * COMPSIZE; + } + j >>= 1; + } + } + + return 0; +} diff --git a/kernel/wasm/trsm_kernel_LT_wasm128.c b/kernel/wasm/trsm_kernel_LT_wasm128.c new file mode 100644 index 0000000000..1c4f0ce870 --- /dev/null +++ b/kernel/wasm/trsm_kernel_LT_wasm128.c @@ -0,0 +1,397 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include "common.h" + +#ifndef DOUBLE +static unsigned long long openblas_wasm128_strsm_lt_calls = 0; +unsigned long long openblas_wasm128_get_strsm_lt_calls(void) { + return openblas_wasm128_strsm_lt_calls; +} +void openblas_wasm128_reset_strsm_lt_calls(void) { + openblas_wasm128_strsm_lt_calls = 0; +} +#else +static unsigned long long openblas_wasm128_dtrsm_lt_calls = 0; +unsigned long long openblas_wasm128_get_dtrsm_lt_calls(void) { + return openblas_wasm128_dtrsm_lt_calls; +} +void openblas_wasm128_reset_dtrsm_lt_calls(void) { + openblas_wasm128_dtrsm_lt_calls = 0; +} +#endif + +static FLOAT dm1 = -1.; + +#ifdef CONJ +#define GEMM_KERNEL GEMM_KERNEL_L +#else +#define GEMM_KERNEL GEMM_KERNEL_N +#endif + +#if GEMM_DEFAULT_UNROLL_M == 1 +#define GEMM_UNROLL_M_SHIFT 0 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 2 +#define GEMM_UNROLL_M_SHIFT 1 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 4 +#define GEMM_UNROLL_M_SHIFT 2 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 6 +#define GEMM_UNROLL_M_SHIFT 2 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 8 +#define GEMM_UNROLL_M_SHIFT 3 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 16 +#define GEMM_UNROLL_M_SHIFT 4 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 1 +#define GEMM_UNROLL_N_SHIFT 0 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 2 +#define GEMM_UNROLL_N_SHIFT 1 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 4 +#define GEMM_UNROLL_N_SHIFT 2 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 8 +#define GEMM_UNROLL_N_SHIFT 3 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 16 +#define GEMM_UNROLL_N_SHIFT 4 +#endif + +#ifndef COMPLEX + +static inline void solve(BLASLONG m, BLASLONG n, FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc) { + + FLOAT aa, bb; + + int i, j, k; + +#if defined(__wasm_simd128__) && !defined(DOUBLE) + if (m == 2 && n == 2) { + FLOAT d0 = a[0]; + FLOAT l10 = a[1]; + FLOAT d1 = a[3]; + FLOAT x00 = c[0] * d0; + FLOAT x01 = c[ldc] * d0; + FLOAT y10 = c[1] - x00 * l10; + FLOAT y11 = c[1 + ldc] - x01 * l10; + FLOAT x10 = y10 * d1; + FLOAT x11 = y11 * d1; + b[0] = x00; + b[1] = x01; + b[2] = x10; + b[3] = x11; + c[0] = x00; + c[ldc] = x01; + c[1] = x10; + c[1 + ldc] = x11; + return; + } + if (m == 2 && n == 1) { + FLOAT d0 = a[0]; + FLOAT l10 = a[1]; + FLOAT d1 = a[3]; + FLOAT x00 = c[0] * d0; + FLOAT y10 = c[1] - x00 * l10; + FLOAT x10 = y10 * d1; + b[0] = x00; + b[1] = x10; + c[0] = x00; + c[1] = x10; + return; + } + if (m == 1 && n == 2) { + FLOAT d0 = a[0]; + FLOAT x00 = c[0] * d0; + FLOAT x01 = c[ldc] * d0; + b[0] = x00; + b[1] = x01; + c[0] = x00; + c[ldc] = x01; + return; + } + if (m == 1 && n == 1) { + FLOAT x00 = c[0] * a[0]; + b[0] = x00; + c[0] = x00; + return; + } +#endif + + for (i = 0; i < m; i++) { + + aa = *(a + i); + + for (j = 0; j < n; j ++) { + bb = *(c + i + j * ldc); + bb *= aa; + *b = bb; + *(c + i + j * ldc) = bb; + b ++; + + for (k = i + 1; k < m; k ++){ + *(c + k + j * ldc) -= bb * *(a + k); + } + + } + a += m; + } +} + +#else + +static inline void solve(BLASLONG m, BLASLONG n, FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc) { + + FLOAT aa1, aa2; + FLOAT bb1, bb2; + FLOAT cc1, cc2; + + int i, j, k; + + ldc *= 2; + + for (i = 0; i < m; i++) { + + aa1 = *(a + i * 2 + 0); + aa2 = *(a + i * 2 + 1); + + for (j = 0; j < n; j ++) { + bb1 = *(c + i * 2 + 0 + j * ldc); + bb2 = *(c + i * 2 + 1 + j * ldc); + +#ifndef CONJ + cc1 = aa1 * bb1 - aa2 * bb2; + cc2 = aa1 * bb2 + aa2 * bb1; +#else + cc1 = aa1 * bb1 + aa2 * bb2; + cc2 = aa1 * bb2 - aa2 * bb1; +#endif + + *(b + 0) = cc1; + *(b + 1) = cc2; + *(c + i * 2 + 0 + j * ldc) = cc1; + *(c + i * 2 + 1 + j * ldc) = cc2; + b += 2; + + for (k = i + 1; k < m; k ++){ +#ifndef CONJ + *(c + k * 2 + 0 + j * ldc) -= cc1 * *(a + k * 2 + 0) - cc2 * *(a + k * 2 + 1); + *(c + k * 2 + 1 + j * ldc) -= cc1 * *(a + k * 2 + 1) + cc2 * *(a + k * 2 + 0); +#else + *(c + k * 2 + 0 + j * ldc) -= cc1 * *(a + k * 2 + 0) + cc2 * *(a + k * 2 + 1); + *(c + k * 2 + 1 + j * ldc) -= -cc1 * *(a + k * 2 + 1) + cc2 * *(a + k * 2 + 0); +#endif + } + + } + a += m * 2; + } +} + +#endif + + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, +#ifdef COMPLEX + FLOAT dummy2, +#endif + FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc, BLASLONG offset){ + +#ifndef DOUBLE + openblas_wasm128_strsm_lt_calls += 1; +#else + openblas_wasm128_dtrsm_lt_calls += 1; +#endif + + FLOAT *aa, *cc; + BLASLONG kk; + BLASLONG i, j, jj; + +#if 0 + fprintf(stderr, "TRSM KERNEL LT : m = %3ld n = %3ld k = %3ld offset = %3ld\n", + m, n, k, offset); +#endif + + jj = 0; + + j = (n >> GEMM_UNROLL_N_SHIFT); + + while (j > 0) { + + kk = offset; + aa = a; + cc = c; + + i = (m >> GEMM_UNROLL_M_SHIFT); + + while (i > 0) { + + if (kk > 0) { + GEMM_KERNEL(GEMM_UNROLL_M, GEMM_UNROLL_N, kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa, b, cc, ldc); + } + + solve(GEMM_UNROLL_M, GEMM_UNROLL_N, + aa + kk * GEMM_UNROLL_M * COMPSIZE, + b + kk * GEMM_UNROLL_N * COMPSIZE, + cc, ldc); + + aa += GEMM_UNROLL_M * k * COMPSIZE; + cc += GEMM_UNROLL_M * COMPSIZE; + kk += GEMM_UNROLL_M; + i --; + } + + if (m & (GEMM_UNROLL_M - 1)) { + i = (GEMM_UNROLL_M >> 1); + while (i > 0) { + if (m & i) { + if (kk > 0) { + GEMM_KERNEL(i, GEMM_UNROLL_N, kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa, b, cc, ldc); + } + solve(i, GEMM_UNROLL_N, + aa + kk * i * COMPSIZE, + b + kk * GEMM_UNROLL_N * COMPSIZE, + cc, ldc); + + aa += i * k * COMPSIZE; + cc += i * COMPSIZE; + kk += i; + } + i >>= 1; + } + } + + b += GEMM_UNROLL_N * k * COMPSIZE; + c += GEMM_UNROLL_N * ldc * COMPSIZE; + j --; + jj += GEMM_UNROLL_M; + } + + if (n & (GEMM_UNROLL_N - 1)) { + + j = (GEMM_UNROLL_N >> 1); + while (j > 0) { + if (n & j) { + + kk = offset; + aa = a; + cc = c; + + i = (m >> GEMM_UNROLL_M_SHIFT); + + while (i > 0) { + if (kk > 0) { + GEMM_KERNEL(GEMM_UNROLL_M, j, kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa, + b, + cc, + ldc); + } + + solve(GEMM_UNROLL_M, j, + aa + kk * GEMM_UNROLL_M * COMPSIZE, + b + kk * j * COMPSIZE, cc, ldc); + + aa += GEMM_UNROLL_M * k * COMPSIZE; + cc += GEMM_UNROLL_M * COMPSIZE; + kk += GEMM_UNROLL_M; + i --; + } + + if (m & (GEMM_UNROLL_M - 1)) { + i = (GEMM_UNROLL_M >> 1); + while (i > 0) { + if (m & i) { + if (kk > 0) { + GEMM_KERNEL(i, j, kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa, + b, + cc, + ldc); + } + + solve(i, j, + aa + kk * i * COMPSIZE, + b + kk * j * COMPSIZE, cc, ldc); + + aa += i * k * COMPSIZE; + cc += i * COMPSIZE; + kk += i; + } + i >>= 1; + } + } + + b += j * k * COMPSIZE; + c += j * ldc * COMPSIZE; + } + j >>= 1; + } + } + + return 0; +} diff --git a/kernel/wasm/trsm_kernel_RN_wasm128.c b/kernel/wasm/trsm_kernel_RN_wasm128.c new file mode 100644 index 0000000000..be19281acd --- /dev/null +++ b/kernel/wasm/trsm_kernel_RN_wasm128.c @@ -0,0 +1,395 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include "common.h" + +#ifndef DOUBLE +static unsigned long long openblas_wasm128_strsm_rn_calls = 0; +unsigned long long openblas_wasm128_get_strsm_rn_calls(void) { + return openblas_wasm128_strsm_rn_calls; +} +void openblas_wasm128_reset_strsm_rn_calls(void) { + openblas_wasm128_strsm_rn_calls = 0; +} +#else +static unsigned long long openblas_wasm128_dtrsm_rn_calls = 0; +unsigned long long openblas_wasm128_get_dtrsm_rn_calls(void) { + return openblas_wasm128_dtrsm_rn_calls; +} +void openblas_wasm128_reset_dtrsm_rn_calls(void) { + openblas_wasm128_dtrsm_rn_calls = 0; +} +#endif + +static FLOAT dm1 = -1.; + +#ifdef CONJ +#define GEMM_KERNEL GEMM_KERNEL_R +#else +#define GEMM_KERNEL GEMM_KERNEL_N +#endif + +#if GEMM_DEFAULT_UNROLL_M == 1 +#define GEMM_UNROLL_M_SHIFT 0 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 2 +#define GEMM_UNROLL_M_SHIFT 1 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 4 +#define GEMM_UNROLL_M_SHIFT 2 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 6 +#define GEMM_UNROLL_M_SHIFT 2 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 8 +#define GEMM_UNROLL_M_SHIFT 3 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 16 +#define GEMM_UNROLL_M_SHIFT 4 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 1 +#define GEMM_UNROLL_N_SHIFT 0 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 2 +#define GEMM_UNROLL_N_SHIFT 1 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 4 +#define GEMM_UNROLL_N_SHIFT 2 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 8 +#define GEMM_UNROLL_N_SHIFT 3 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 16 +#define GEMM_UNROLL_N_SHIFT 4 +#endif + +#ifndef COMPLEX + +static inline void solve(BLASLONG m, BLASLONG n, FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc) { + + FLOAT aa, bb; + + int i, j, k; + +#if defined(__wasm_simd128__) + if (m == 2 && n == 2) { + FLOAT d0 = b[0]; + FLOAT u01 = b[1]; + FLOAT d1 = b[3]; + FLOAT x00 = c[0] * d0; + FLOAT x10 = c[1] * d0; + FLOAT y01 = c[ldc] - x00 * u01; + FLOAT y11 = c[1 + ldc] - x10 * u01; + FLOAT x01 = y01 * d1; + FLOAT x11 = y11 * d1; + a[0] = x00; + a[1] = x10; + a[2] = x01; + a[3] = x11; + c[0] = x00; + c[1] = x10; + c[ldc] = x01; + c[1 + ldc] = x11; + return; + } + if (m == 2 && n == 1) { + FLOAT d0 = b[0]; + FLOAT x00 = c[0] * d0; + FLOAT x10 = c[1] * d0; + a[0] = x00; + a[1] = x10; + c[0] = x00; + c[1] = x10; + return; + } + if (m == 1 && n == 2) { + FLOAT d0 = b[0]; + FLOAT u01 = b[1]; + FLOAT d1 = b[3]; + FLOAT x00 = c[0] * d0; + FLOAT y01 = c[ldc] - x00 * u01; + FLOAT x01 = y01 * d1; + a[0] = x00; + a[1] = x01; + c[0] = x00; + c[ldc] = x01; + return; + } + if (m == 1 && n == 1) { + FLOAT x00 = c[0] * b[0]; + a[0] = x00; + c[0] = x00; + return; + } +#endif + + for (i = 0; i < n; i++) { + + bb = *(b + i); + + for (j = 0; j < m; j ++) { + aa = *(c + j + i * ldc); + aa *= bb; + *a = aa; + *(c + j + i * ldc) = aa; + a ++; + + for (k = i + 1; k < n; k ++){ + *(c + j + k * ldc) -= aa * *(b + k); + } + + } + b += n; + } +} + +#else + +static inline void solve(BLASLONG m, BLASLONG n, FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc) { + + FLOAT aa1, aa2; + FLOAT bb1, bb2; + FLOAT cc1, cc2; + + int i, j, k; + + ldc *= 2; + + for (i = 0; i < n; i++) { + + bb1 = *(b + i * 2 + 0); + bb2 = *(b + i * 2 + 1); + + for (j = 0; j < m; j ++) { + aa1 = *(c + j * 2 + 0 + i * ldc); + aa2 = *(c + j * 2 + 1 + i * ldc); + +#ifndef CONJ + cc1 = aa1 * bb1 - aa2 * bb2; + cc2 = aa1 * bb2 + aa2 * bb1; +#else + cc1 = aa1 * bb1 + aa2 * bb2; + cc2 = -aa1 * bb2 + aa2 * bb1; +#endif + + *(a + 0) = cc1; + *(a + 1) = cc2; + *(c + j * 2 + 0 + i * ldc) = cc1; + *(c + j * 2 + 1 + i * ldc) = cc2; + a += 2; + + for (k = i + 1; k < n; k ++){ +#ifndef CONJ + *(c + j * 2 + 0 + k * ldc) -= cc1 * *(b + k * 2 + 0) - cc2 * *(b + k * 2 + 1); + *(c + j * 2 + 1 + k * ldc) -= cc1 * *(b + k * 2 + 1) + cc2 * *(b + k * 2 + 0); +#else + *(c + j * 2 + 0 + k * ldc) -= cc1 * *(b + k * 2 + 0) + cc2 * *(b + k * 2 + 1); + *(c + j * 2 + 1 + k * ldc) -= - cc1 * *(b + k * 2 + 1) + cc2 * *(b + k * 2 + 0); +#endif + } + + } + b += n * 2; + } +} + +#endif + + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, +#ifdef COMPLEX + FLOAT dummy2, +#endif + FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc, BLASLONG offset){ + +#ifndef DOUBLE + openblas_wasm128_strsm_rn_calls += 1; +#else + openblas_wasm128_dtrsm_rn_calls += 1; +#endif + + FLOAT *aa, *cc; + BLASLONG kk; + BLASLONG i, j, jj; + +#if 0 + fprintf(stderr, "TRSM RN KERNEL m = %3ld n = %3ld k = %3ld offset = %3ld\n", + m, n, k, offset); +#endif + + jj = 0; + j = (n >> GEMM_UNROLL_N_SHIFT); + kk = -offset; + + while (j > 0) { + + aa = a; + cc = c; + + i = (m >> GEMM_UNROLL_M_SHIFT); + + if (i > 0) { + do { + if (kk > 0) { + GEMM_KERNEL(GEMM_UNROLL_M, GEMM_UNROLL_N, kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa, b, cc, ldc); + } + + solve(GEMM_UNROLL_M, GEMM_UNROLL_N, + aa + kk * GEMM_UNROLL_M * COMPSIZE, + b + kk * GEMM_UNROLL_N * COMPSIZE, + cc, ldc); + + aa += GEMM_UNROLL_M * k * COMPSIZE; + cc += GEMM_UNROLL_M * COMPSIZE; + i --; + } while (i > 0); + } + + + if (m & (GEMM_UNROLL_M - 1)) { + i = (GEMM_UNROLL_M >> 1); + while (i > 0) { + if (m & i) { + if (kk > 0) { + GEMM_KERNEL(i, GEMM_UNROLL_N, kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa, b, cc, ldc); + } + solve(i, GEMM_UNROLL_N, + aa + kk * i * COMPSIZE, + b + kk * GEMM_UNROLL_N * COMPSIZE, + cc, ldc); + + aa += i * k * COMPSIZE; + cc += i * COMPSIZE; + } + i >>= 1; + } + } + + kk += GEMM_UNROLL_N; + b += GEMM_UNROLL_N * k * COMPSIZE; + c += GEMM_UNROLL_N * ldc * COMPSIZE; + j --; + jj += GEMM_UNROLL_M; + } + + if (n & (GEMM_UNROLL_N - 1)) { + + j = (GEMM_UNROLL_N >> 1); + while (j > 0) { + if (n & j) { + + aa = a; + cc = c; + + i = (m >> GEMM_UNROLL_M_SHIFT); + + while (i > 0) { + if (kk > 0) { + GEMM_KERNEL(GEMM_UNROLL_M, j, kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa, + b, + cc, + ldc); + } + + solve(GEMM_UNROLL_M, j, + aa + kk * GEMM_UNROLL_M * COMPSIZE, + b + kk * j * COMPSIZE, cc, ldc); + + aa += GEMM_UNROLL_M * k * COMPSIZE; + cc += GEMM_UNROLL_M * COMPSIZE; + i --; + } + + if (m & (GEMM_UNROLL_M - 1)) { + i = (GEMM_UNROLL_M >> 1); + while (i > 0) { + if (m & i) { + if (kk > 0) { + GEMM_KERNEL(i, j, kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa, + b, + cc, + ldc); + } + + solve(i, j, + aa + kk * i * COMPSIZE, + b + kk * j * COMPSIZE, cc, ldc); + + aa += i * k * COMPSIZE; + cc += i * COMPSIZE; + } + i >>= 1; + } + } + + b += j * k * COMPSIZE; + c += j * ldc * COMPSIZE; + kk += j; + } + j >>= 1; + } + } + + return 0; +} diff --git a/kernel/wasm/trsm_kernel_RT_wasm128.c b/kernel/wasm/trsm_kernel_RT_wasm128.c new file mode 100644 index 0000000000..ebfa1437ca --- /dev/null +++ b/kernel/wasm/trsm_kernel_RT_wasm128.c @@ -0,0 +1,420 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include "common.h" + +static FLOAT dm1 = -1.; + +#ifndef DOUBLE +static unsigned long long openblas_wasm128_strsm_rt_calls = 0; +unsigned long long openblas_wasm128_get_strsm_rt_calls(void) { + return openblas_wasm128_strsm_rt_calls; +} +void openblas_wasm128_reset_strsm_rt_calls(void) { + openblas_wasm128_strsm_rt_calls = 0; +} +#else +static unsigned long long openblas_wasm128_dtrsm_rt_calls = 0; +unsigned long long openblas_wasm128_get_dtrsm_rt_calls(void) { + return openblas_wasm128_dtrsm_rt_calls; +} +void openblas_wasm128_reset_dtrsm_rt_calls(void) { + openblas_wasm128_dtrsm_rt_calls = 0; +} +#endif + +#ifdef CONJ +#define GEMM_KERNEL GEMM_KERNEL_R +#else +#define GEMM_KERNEL GEMM_KERNEL_N +#endif + +#if GEMM_DEFAULT_UNROLL_M == 1 +#define GEMM_UNROLL_M_SHIFT 0 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 2 +#define GEMM_UNROLL_M_SHIFT 1 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 4 +#define GEMM_UNROLL_M_SHIFT 2 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 6 +#define GEMM_UNROLL_M_SHIFT 2 +#endif + + +#if GEMM_DEFAULT_UNROLL_M == 8 +#define GEMM_UNROLL_M_SHIFT 3 +#endif + +#if GEMM_DEFAULT_UNROLL_M == 16 +#define GEMM_UNROLL_M_SHIFT 4 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 1 +#define GEMM_UNROLL_N_SHIFT 0 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 2 +#define GEMM_UNROLL_N_SHIFT 1 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 4 +#define GEMM_UNROLL_N_SHIFT 2 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 8 +#define GEMM_UNROLL_N_SHIFT 3 +#endif + +#if GEMM_DEFAULT_UNROLL_N == 16 +#define GEMM_UNROLL_N_SHIFT 4 +#endif + + +#ifndef COMPLEX + +static inline void solve(BLASLONG m, BLASLONG n, FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc) { + + FLOAT aa, bb; + + int i, j, k; + +#if defined(__wasm_simd128__) + if (m == 2 && n == 2) { + FLOAT d0 = b[0]; + FLOAT l10 = b[2]; + FLOAT d1 = b[3]; + FLOAT x01 = c[ldc] * d1; + FLOAT x11 = c[1 + ldc] * d1; + FLOAT y00 = c[0] - x01 * l10; + FLOAT y10 = c[1] - x11 * l10; + FLOAT x00 = y00 * d0; + FLOAT x10 = y10 * d0; + a[0] = x00; + a[1] = x10; + a[2] = x01; + a[3] = x11; + c[0] = x00; + c[1] = x10; + c[ldc] = x01; + c[1 + ldc] = x11; + return; + } + if (m == 2 && n == 1) { + FLOAT d0 = b[0]; + FLOAT x00 = c[0] * d0; + FLOAT x10 = c[1] * d0; + a[0] = x00; + a[1] = x10; + c[0] = x00; + c[1] = x10; + return; + } + if (m == 1 && n == 2) { + FLOAT d0 = b[0]; + FLOAT l10 = b[2]; + FLOAT d1 = b[3]; + FLOAT x01 = c[ldc] * d1; + FLOAT y00 = c[0] - x01 * l10; + FLOAT x00 = y00 * d0; + a[0] = x00; + a[1] = x01; + c[0] = x00; + c[ldc] = x01; + return; + } + if (m == 1 && n == 1) { + FLOAT x00 = c[0] * b[0]; + a[0] = x00; + c[0] = x00; + return; + } +#endif + + a += (n - 1) * m; + b += (n - 1) * n; + + for (i = n - 1; i >= 0; i--) { + + bb = *(b + i); + + for (j = 0; j < m; j ++) { + aa = *(c + j + i * ldc); + aa *= bb; + *a = aa; + *(c + j + i * ldc) = aa; + a ++; + + for (k = 0; k < i; k ++){ + *(c + j + k * ldc) -= aa * *(b + k); + } + + } + b -= n; + a -= 2 * m; + } + +} + +#else + +static inline void solve(BLASLONG m, BLASLONG n, FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc) { + + FLOAT aa1, aa2; + FLOAT bb1, bb2; + FLOAT cc1, cc2; + + int i, j, k; + + ldc *= 2; + + a += (n - 1) * m * 2; + b += (n - 1) * n * 2; + + for (i = n - 1; i >= 0; i--) { + + bb1 = *(b + i * 2 + 0); + bb2 = *(b + i * 2 + 1); + + for (j = 0; j < m; j ++) { + + aa1 = *(c + j * 2 + 0 + i * ldc); + aa2 = *(c + j * 2 + 1 + i * ldc); + +#ifndef CONJ + cc1 = aa1 * bb1 - aa2 * bb2; + cc2 = aa1 * bb2 + aa2 * bb1; +#else + cc1 = aa1 * bb1 + aa2 * bb2; + cc2 = - aa1 * bb2 + aa2 * bb1; +#endif + + *(a + 0) = cc1; + *(a + 1) = cc2; + + *(c + j * 2 + 0 + i * ldc) = cc1; + *(c + j * 2 + 1 + i * ldc) = cc2; + a += 2; + + for (k = 0; k < i; k ++){ +#ifndef CONJ + *(c + j * 2 + 0 + k * ldc) -= cc1 * *(b + k * 2 + 0) - cc2 * *(b + k * 2 + 1); + *(c + j * 2 + 1 + k * ldc) -= cc1 * *(b + k * 2 + 1) + cc2 * *(b + k * 2 + 0); +#else + *(c + j * 2 + 0 + k * ldc) -= cc1 * *(b + k * 2 + 0) + cc2 * *(b + k * 2 + 1); + *(c + j * 2 + 1 + k * ldc) -= -cc1 * *(b + k * 2 + 1) + cc2 * *(b + k * 2 + 0); +#endif + } + + } + b -= n * 2; + a -= 4 * m; + } + +} + +#endif + +int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1, +#ifdef COMPLEX + FLOAT dummy2, +#endif + FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc, BLASLONG offset){ + +#ifndef DOUBLE + openblas_wasm128_strsm_rt_calls += 1; +#else + openblas_wasm128_dtrsm_rt_calls += 1; +#endif + + BLASLONG i, j; + FLOAT *aa, *cc; + BLASLONG kk; + +#if 0 + fprintf(stderr, "TRSM RT KERNEL m = %3ld n = %3ld k = %3ld offset = %3ld\n", + m, n, k, offset); +#endif + + kk = n - offset; + c += n * ldc * COMPSIZE; + b += n * k * COMPSIZE; + + if (n & (GEMM_UNROLL_N - 1)) { + + j = 1; + while (j < GEMM_UNROLL_N) { + if (n & j) { + + aa = a; + b -= j * k * COMPSIZE; + c -= j * ldc* COMPSIZE; + cc = c; + + i = (m >> GEMM_UNROLL_M_SHIFT); + if (i > 0) { + + do { + if (k - kk > 0) { + GEMM_KERNEL(GEMM_UNROLL_M, j, k - kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa + GEMM_UNROLL_M * kk * COMPSIZE, + b + j * kk * COMPSIZE, + cc, + ldc); + } + + solve(GEMM_UNROLL_M, j, + aa + (kk - j) * GEMM_UNROLL_M * COMPSIZE, + b + (kk - j) * j * COMPSIZE, + cc, ldc); + + aa += GEMM_UNROLL_M * k * COMPSIZE; + cc += GEMM_UNROLL_M * COMPSIZE; + i --; + } while (i > 0); + } + + if (m & (GEMM_UNROLL_M - 1)) { + i = (GEMM_UNROLL_M >> 1); + do { + if (m & i) { + + if (k - kk > 0) { + GEMM_KERNEL(i, j, k - kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa + i * kk * COMPSIZE, + b + j * kk * COMPSIZE, + cc, ldc); + } + + solve(i, j, + aa + (kk - j) * i * COMPSIZE, + b + (kk - j) * j * COMPSIZE, + cc, ldc); + + aa += i * k * COMPSIZE; + cc += i * COMPSIZE; + + } + i >>= 1; + } while (i > 0); + } + kk -= j; + } + j <<= 1; + } + } + + j = (n >> GEMM_UNROLL_N_SHIFT); + + if (j > 0) { + + do { + aa = a; + b -= GEMM_UNROLL_N * k * COMPSIZE; + c -= GEMM_UNROLL_N * ldc * COMPSIZE; + cc = c; + + i = (m >> GEMM_UNROLL_M_SHIFT); + if (i > 0) { + do { + if (k - kk > 0) { + GEMM_KERNEL(GEMM_UNROLL_M, GEMM_UNROLL_N, k - kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa + GEMM_UNROLL_M * kk * COMPSIZE, + b + GEMM_UNROLL_N * kk * COMPSIZE, + cc, + ldc); + } + + solve(GEMM_UNROLL_M, GEMM_UNROLL_N, + aa + (kk - GEMM_UNROLL_N) * GEMM_UNROLL_M * COMPSIZE, + b + (kk - GEMM_UNROLL_N) * GEMM_UNROLL_N * COMPSIZE, + cc, ldc); + + aa += GEMM_UNROLL_M * k * COMPSIZE; + cc += GEMM_UNROLL_M * COMPSIZE; + i --; + } while (i > 0); + } + + if (m & (GEMM_UNROLL_M - 1)) { + i = (GEMM_UNROLL_M >> 1); + do { + if (m & i) { + if (k - kk > 0) { + GEMM_KERNEL(i, GEMM_UNROLL_N, k - kk, dm1, +#ifdef COMPLEX + ZERO, +#endif + aa + i * kk * COMPSIZE, + b + GEMM_UNROLL_N * kk * COMPSIZE, + cc, + ldc); + } + + solve(i, GEMM_UNROLL_N, + aa + (kk - GEMM_UNROLL_N) * i * COMPSIZE, + b + (kk - GEMM_UNROLL_N) * GEMM_UNROLL_N * COMPSIZE, + cc, ldc); + + aa += i * k * COMPSIZE; + cc += i * COMPSIZE; + } + i >>= 1; + } while (i > 0); + } + + kk -= GEMM_UNROLL_N; + j --; + } while (j > 0); + } + + return 0; +} diff --git a/kernel/x86_64/KERNEL.SKYLAKEX b/kernel/x86_64/KERNEL.SKYLAKEX index 7e946ef2ea..fffccfa613 100644 --- a/kernel/x86_64/KERNEL.SKYLAKEX +++ b/kernel/x86_64/KERNEL.SKYLAKEX @@ -1,7 +1,12 @@ include $(KERNELDIR)/KERNEL.HASWELL +ifeq ($(C_COMPILER)$(OSNAME), CLANGWINNT) +SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S +STRMMKERNEL = sgemm_kernel_16x4_skylakex.S +else SGEMMKERNEL = sgemm_kernel_16x4_skylakex_3.c STRMMKERNEL = sgemm_kernel_16x4_skylakex_2.c +endif SGEMMINCOPY = ../generic/gemm_ncopy_16.c SGEMMITCOPY = sgemm_tcopy_16_skylakex.c SGEMMONCOPY = sgemm_ncopy_4_skylakex.c @@ -20,8 +25,13 @@ SGEMM_SMALL_K_B0_TN = sgemm_small_kernel_tn_skylakex.c SGEMM_SMALL_K_TT = sgemm_small_kernel_tt_skylakex.c SGEMM_SMALL_K_B0_TT = sgemm_small_kernel_tt_skylakex.c +ifeq ($(C_COMPILER)$(OSNAME), CLANGWINNT) +DGEMMKERNEL = dgemm_kernel_16x2_skylakex.S +DTRMMKERNEL = dgemm_kernel_16x2_skylakex.S +else DGEMMKERNEL = dgemm_kernel_16x2_skylakex.c DTRMMKERNEL = dgemm_kernel_16x2_skylakex.c +endif DGEMMINCOPY = ../generic/gemm_ncopy_16.c DGEMMITCOPY = dgemm_tcopy_16_skylakex.c DGEMMONCOPY = ../generic/gemm_ncopy_2.c @@ -41,8 +51,13 @@ DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_skylakex.c SGEMM_BETA = sgemm_beta_skylakex.c DGEMM_BETA = dgemm_beta_skylakex.c +ifeq ($(C_COMPILER)$(OSNAME), CLANGWINNT) +CGEMMKERNEL = cgemm_kernel_8x2_haswell.c +ZGEMMKERNEL = zgemm_kernel_4x2_haswell.c +else CGEMMKERNEL = cgemm_kernel_8x2_skylakex.c ZGEMMKERNEL = zgemm_kernel_4x2_skylakex.c +endif CASUMKERNEL = casum.c ZASUMKERNEL = zasum.c diff --git a/lapack-netlib/LAPACKE/src/lapacke_cgesvdq_work.c b/lapack-netlib/LAPACKE/src/lapacke_cgesvdq_work.c index e73e524bd6..d1885ef26c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_cgesvdq_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_cgesvdq_work.c @@ -50,12 +50,42 @@ lapack_int LAPACKE_cgesvdq_work( int matrix_layout, char joba, char jobp, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) || - LAPACKE_lsame( jobu, 's' ) ) ? m : 1; - lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m : - (LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1); - lapack_int nrows_v = LAPACKE_lsame( jobv, 'a' ) ? n : - ( LAPACKE_lsame( jobv, 's' ) ? MIN(m,n) : 1); + + lapack_int nrows_u; + lapack_int ncols_u; + lapack_int nrows_v; + lapack_int ncols_v; + + if( LAPACKE_lsame( jobu, 'a' ) ) { + nrows_u = m; + ncols_u = m; + } + else if( LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) ) { + nrows_u = m; + ncols_u = n; + } + else if( LAPACKE_lsame( jobu, 'f' ) ) { + nrows_u = n; + ncols_u = n; + } else { + nrows_u = 1; + ncols_u = 1; + } + + /* in the case joba == 'e', v_t is used as a workspace */ + if( LAPACKE_lsame( jobv, 'a' ) || + LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' ) || + LAPACKE_lsame( joba, 'e' ) ) { + nrows_v = n; + ncols_v = n; + } else { + nrows_v = 1; + ncols_v = 1; + } + lapack_int lda_t = MAX(1,m); lapack_int ldu_t = MAX(1,nrows_u); lapack_int ldv_t = MAX(1,nrows_v); @@ -73,69 +103,80 @@ lapack_int LAPACKE_cgesvdq_work( int matrix_layout, char joba, char jobp, LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); return info; } - if( ldv < n ) { + if( ldv < ncols_v ) { info = -14; LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); return info; } /* Query optimal working array(s) size if requested */ - if( lcwork == -1 ) { + if ( ( liwork == -1 ) || ( lcwork == -1 ) || ( lrwork == -1 ) ) { LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, cwork, &lcwork, rwork, &lrwork, &info ); return (info < 0) ? (info - 1) : info; } /* Allocate memory for temporary array(s) */ - a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); - if( a_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { - u_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldu_t * MAX(1,ncols_u) ); + if ( ( m > 0 ) && ( n > 0 ) ){ + a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + + u_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldu_t * MAX(1,ncols_u) ); if( u_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; } } - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { - v_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) ); + + v_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,ncols_v) ); if( v_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; } - } + /* Transpose input matrices */ - LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + if ( ( m > 0 ) && ( n > 0 ) ){ + LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + } + /* Call LAPACK function and adjust info */ - LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, - s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, - cwork, &lcwork, rwork, &lrwork, &info ); + LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, + s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, + cwork, &lcwork, rwork, &lrwork, &info ); if( info < 0 ) { info = info - 1; } + /* Transpose output matrices */ - LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); - if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, - u, ldu ); - } - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { - LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v, - ldv ); + if ( ( m > 0 ) && ( n > 0 ) ){ + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + + if( LAPACKE_lsame( jobu, 'a' ) || + LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, + u, ldu ); + } + + /* we do not transpose v_t back to v in the case (joba == 'e') because, in this case, v_t is used as a workspace */ + if( LAPACKE_lsame( jobv, 'a' ) || + LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' )) { + LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v, ncols_v, v_t, ldv_t, v, + ldv ); + } } + /* Release memory and exit */ - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { - LAPACKE_free( v_t ); - } + if ( ( m > 0 ) && ( n > 0 ) ) { LAPACKE_free( v_t ); v_t = NULL; } exit_level_2: - if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { - LAPACKE_free( u_t ); - } + if ( ( m > 0 ) && ( n > 0 ) ) { LAPACKE_free( u_t ); u_t = NULL; } exit_level_1: - LAPACKE_free( a_t ); + if ( ( m > 0 ) && ( n > 0 ) ) { LAPACKE_free( a_t ); a_t = NULL; } exit_level_0: if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dgesvdq_work.c b/lapack-netlib/LAPACKE/src/lapacke_dgesvdq_work.c index 0d65f05298..2e62ee35e9 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dgesvdq_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dgesvdq_work.c @@ -50,12 +50,42 @@ lapack_int LAPACKE_dgesvdq_work( int matrix_layout, char joba, char jobp, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) || - LAPACKE_lsame( jobu, 's' ) ) ? m : 1; - lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m : - (LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1); - lapack_int nrows_v = LAPACKE_lsame( jobv, 'a' ) ? n : - ( LAPACKE_lsame( jobv, 's' ) ? MIN(m,n) : 1); + + lapack_int nrows_u; + lapack_int ncols_u; + lapack_int nrows_v; + lapack_int ncols_v; + + if( LAPACKE_lsame( jobu, 'a' ) ) { + nrows_u = m; + ncols_u = m; + } + else if( LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) ) { + nrows_u = m; + ncols_u = n; + } + else if( LAPACKE_lsame( jobu, 'f' ) ) { + nrows_u = n; + ncols_u = n; + } else { + nrows_u = 1; + ncols_u = 1; + } + + /* in the case joba == 'e', v_t is used as a workspace */ + if( LAPACKE_lsame( jobv, 'a' ) || + LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' ) || + LAPACKE_lsame( joba, 'e' ) ) { + nrows_v = n; + ncols_v = n; + } else { + nrows_v = 1; + ncols_v = 1; + } + lapack_int lda_t = MAX(1,m); lapack_int ldu_t = MAX(1,nrows_u); lapack_int ldv_t = MAX(1,nrows_v); @@ -73,42 +103,46 @@ lapack_int LAPACKE_dgesvdq_work( int matrix_layout, char joba, char jobp, LAPACKE_xerbla( "LAPACKE_dgesvdq_work", info ); return info; } - if( ldv < n ) { + if( ldv < ncols_v ) { info = -14; LAPACKE_xerbla( "LAPACKE_dgesvdq_work", info ); return info; } + /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { + if ( ( liwork == -1 ) || ( lwork == -1 ) || ( lrwork == -1 ) ) { LAPACK_dgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, work, &lwork, rwork, &lrwork, &info ); return (info < 0) ? (info - 1) : info; } + /* Allocate memory for temporary array(s) */ - a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) ); - if( a_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { - u_t = (double*) - LAPACKE_malloc( sizeof(double) * ldu_t * MAX(1,ncols_u) ); + if ( ( m > 0 ) && ( n > 0 ) ){ + a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + + u_t = (double*)LAPACKE_malloc( sizeof(double) * ldu_t * MAX(1,ncols_u) ); if( u_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; } - } - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { - v_t = (double*) - LAPACKE_malloc( sizeof(double) * ldv_t * MAX(1,n) ); + + v_t = (double*)LAPACKE_malloc( sizeof(double) * ldv_t * MAX(1,ncols_v) ); if( v_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; } } + /* Transpose input matrices */ - LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + if ( ( m > 0 ) && ( n > 0 ) ){ + LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + } + /* Call LAPACK function and adjust info */ LAPACK_dgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, @@ -116,26 +150,35 @@ lapack_int LAPACKE_dgesvdq_work( int matrix_layout, char joba, char jobp, if( info < 0 ) { info = info - 1; } + /* Transpose output matrices */ - LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); - if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, - u, ldu ); - } - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { - LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v, - ldv ); + if ( ( m > 0 ) && ( n > 0 ) ){ + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + + if( LAPACKE_lsame( jobu, 'a' ) || + LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, + u, ldu ); + } + + /* we do not transpose v_t back to v in the case (joba == 'e') because, in this case, v_t is used as a workspace */ + if( LAPACKE_lsame( jobv, 'a' ) || + LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' )) { + LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_v, ncols_v, v_t, ldv_t, v, + ldv ); + } } + /* Release memory and exit */ - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { - LAPACKE_free( v_t ); - } + if ( ( m > 0 ) && ( n > 0 ) ) { LAPACKE_free( v_t ); v_t = NULL; } exit_level_2: - if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { - LAPACKE_free( u_t ); - } + if ( ( m > 0 ) && ( n > 0 ) ) { LAPACKE_free( u_t ); u_t = NULL; } exit_level_1: - LAPACKE_free( a_t ); + if ( ( m > 0 ) && ( n > 0 ) ) { LAPACKE_free( a_t ); a_t = NULL; } exit_level_0: if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_dgesvdq_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_sgesvdq_work.c b/lapack-netlib/LAPACKE/src/lapacke_sgesvdq_work.c index b65bd9abfb..047ec62e48 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_sgesvdq_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_sgesvdq_work.c @@ -50,11 +50,42 @@ lapack_int LAPACKE_sgesvdq_work( int matrix_layout, char joba, char jobp, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) || - LAPACKE_lsame( jobu, 's' ) ) ? m : 1; - lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m : - (LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1); - lapack_int nrows_v = LAPACKE_lsame( jobv, 'a' ) ? n : 1; + + lapack_int nrows_u; + lapack_int ncols_u; + lapack_int nrows_v; + lapack_int ncols_v; + + if( LAPACKE_lsame( jobu, 'a' ) ) { + nrows_u = m; + ncols_u = m; + } + else if( LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) ) { + nrows_u = m; + ncols_u = n; + } + else if( LAPACKE_lsame( jobu, 'f' ) ) { + nrows_u = n; + ncols_u = n; + } else { + nrows_u = 1; + ncols_u = 1; + } + + /* in the case joba == 'e', v_t is used as a workspace */ + if( LAPACKE_lsame( jobv, 'a' ) || + LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' ) || + LAPACKE_lsame( joba, 'e' ) ) { + nrows_v = n; + ncols_v = n; + } else { + nrows_v = 1; + ncols_v = 1; + } + lapack_int lda_t = MAX(1,m); lapack_int ldu_t = MAX(1,nrows_u); lapack_int ldv_t = MAX(1,nrows_v); @@ -72,42 +103,46 @@ lapack_int LAPACKE_sgesvdq_work( int matrix_layout, char joba, char jobp, LAPACKE_xerbla( "LAPACKE_sgesvdq_work", info ); return info; } - if( ldv < n ) { + if( ldv < ncols_v ) { info = -14; LAPACKE_xerbla( "LAPACKE_sgesvdq_work", info ); return info; } + /* Query optimal working array(s) size if requested */ - if( lwork == -1 ) { + if ( ( liwork == -1 ) || ( lwork == -1 ) || ( lrwork == -1 ) ) { LAPACK_sgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, work, &lwork, rwork, &lrwork, &info ); return (info < 0) ? (info - 1) : info; } + /* Allocate memory for temporary array(s) */ - a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) ); - if( a_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { - u_t = (float*) - LAPACKE_malloc( sizeof(float) * ldu_t * MAX(1,ncols_u) ); + if ( ( m > 0 ) && ( n > 0 ) ){ + a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + + u_t = (float*)LAPACKE_malloc( sizeof(float) * ldu_t * MAX(1,ncols_u) ); if( u_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; } - } - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { - v_t = (float*) - LAPACKE_malloc( sizeof(float) * ldv_t * MAX(1,n) ); + + v_t = (float*)LAPACKE_malloc( sizeof(float) * ldv_t * MAX(1,ncols_v) ); if( v_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; } } + /* Transpose input matrices */ - LAPACKE_sge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + if ( ( m > 0 ) && ( n > 0 ) ){ + LAPACKE_sge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + } + /* Call LAPACK function and adjust info */ LAPACK_sgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, @@ -115,26 +150,35 @@ lapack_int LAPACKE_sgesvdq_work( int matrix_layout, char joba, char jobp, if( info < 0 ) { info = info - 1; } + /* Transpose output matrices */ - LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); - if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, - u, ldu ); - } - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { - LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v, - ldv ); + if ( ( m > 0 ) && ( n > 0 ) ){ + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + + if( LAPACKE_lsame( jobu, 'a' ) || + LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, + u, ldu ); + } + + /* we do not transpose v_t back to v in the case (joba == 'e') because, in this case, v_t is used as a workspace */ + if( LAPACKE_lsame( jobv, 'a' ) || + LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' )) { + LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_v, ncols_v, v_t, ldv_t, v, + ldv ); + } } + /* Release memory and exit */ - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { - LAPACKE_free( v_t ); - } + if ( ( m > 0 ) && ( n > 0 ) ) { LAPACKE_free( v_t ); v_t = NULL; } exit_level_2: - if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { - LAPACKE_free( u_t ); - } + if ( ( m > 0 ) && ( n > 0 ) ) { LAPACKE_free( u_t ); u_t = NULL; } exit_level_1: - LAPACKE_free( a_t ); + if ( ( m > 0 ) && ( n > 0 ) ) { LAPACKE_free( a_t ); a_t = NULL; } exit_level_0: if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_sgesvdq_work", info ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zgesvdq_work.c b/lapack-netlib/LAPACKE/src/lapacke_zgesvdq_work.c index 1a6437555e..ab8354bac6 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zgesvdq_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvdq_work.c @@ -50,12 +50,42 @@ lapack_int LAPACKE_zgesvdq_work( int matrix_layout, char joba, char jobp, info = info - 1; } } else if( matrix_layout == LAPACK_ROW_MAJOR ) { - lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) || - LAPACKE_lsame( jobu, 's' ) ) ? m : 1; - lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m : - (LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1); - lapack_int nrows_v = LAPACKE_lsame( jobv, 'a' ) ? n : - ( LAPACKE_lsame( jobv, 's' ) ? MIN(m,n) : 1); + + lapack_int nrows_u; + lapack_int ncols_u; + lapack_int nrows_v; + lapack_int ncols_v; + + if( LAPACKE_lsame( jobu, 'a' ) ) { + nrows_u = m; + ncols_u = m; + } + else if( LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) ) { + nrows_u = m; + ncols_u = n; + } + else if( LAPACKE_lsame( jobu, 'f' ) ) { + nrows_u = n; + ncols_u = n; + } else { + nrows_u = 1; + ncols_u = 1; + } + + /* in the case joba == 'e', v_t is used as a workspace */ + if( LAPACKE_lsame( jobv, 'a' ) || + LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' ) || + LAPACKE_lsame( joba, 'e' ) ) { + nrows_v = n; + ncols_v = n; + } else { + nrows_v = 1; + ncols_v = 1; + } + lapack_int lda_t = MAX(1,m); lapack_int ldu_t = MAX(1,nrows_u); lapack_int ldv_t = MAX(1,nrows_v); @@ -73,69 +103,80 @@ lapack_int LAPACKE_zgesvdq_work( int matrix_layout, char joba, char jobp, LAPACKE_xerbla( "LAPACKE_zgesvdq_work", info ); return info; } - if( ldv < n ) { + if( ldv < ncols_v ) { info = -14; LAPACKE_xerbla( "LAPACKE_zgesvdq_work", info ); return info; } /* Query optimal working array(s) size if requested */ - if( lcwork == -1 ) { + if ( ( liwork == -1 ) || ( lcwork == -1 ) || ( lrwork == -1 ) ) { LAPACK_zgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, cwork, &lcwork, rwork, &lrwork, &info ); return (info < 0) ? (info - 1) : info; } /* Allocate memory for temporary array(s) */ - a_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); - if( a_t == NULL ) { - info = LAPACK_TRANSPOSE_MEMORY_ERROR; - goto exit_level_0; - } - if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { - u_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * ldu_t * MAX(1,ncols_u) ); + if ( ( m > 0 ) && ( n > 0 ) ){ + a_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + + u_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldu_t * MAX(1,ncols_u) ); if( u_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_1; } - } - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { - v_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * ldv_t * MAX(1,n) ); + + v_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldv_t * MAX(1,ncols_v) ); if( v_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; } } + /* Transpose input matrices */ - LAPACKE_zge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + if ( ( m > 0 ) && ( n > 0 ) ){ + LAPACKE_zge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + } + /* Call LAPACK function and adjust info */ - LAPACK_zgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, - s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, - cwork, &lcwork, rwork, &lrwork, &info ); + LAPACK_zgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, + s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, + cwork, &lcwork, rwork, &lrwork, &info ); if( info < 0 ) { info = info - 1; } + /* Transpose output matrices */ - LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); - if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, - u, ldu ); - } - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { - LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v, - ldv ); + if ( ( m > 0 ) && ( n > 0 ) ){ + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + + if( LAPACKE_lsame( jobu, 'a' ) || + LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, + u, ldu ); + } + + /* we do not transpose v_t back to v in the case (joba == 'e') because, in this case, v_t is used as a workspace */ + if( LAPACKE_lsame( jobv, 'a' ) || + LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' )) { + LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_v, ncols_v, v_t, ldv_t, v, + ldv ); + } } + /* Release memory and exit */ - if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { - LAPACKE_free( v_t ); - } + if ( ( m > 0 ) && ( n > 0 ) ) { LAPACKE_free( v_t ); v_t = NULL; } exit_level_2: - if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { - LAPACKE_free( u_t ); - } + if ( ( m > 0 ) && ( n > 0 ) ) { LAPACKE_free( u_t ); u_t = NULL; } exit_level_1: - LAPACKE_free( a_t ); + if ( ( m > 0 ) && ( n > 0 ) ) { LAPACKE_free( a_t ); a_t = NULL; } exit_level_0: if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_zgesvdq_work", info ); diff --git a/lapack-netlib/SRC/DEPRECATED/cgelqs.f b/lapack-netlib/SRC/DEPRECATED/cgelqs.f index 47e17a5830..aba3632a74 100644 --- a/lapack-netlib/SRC/DEPRECATED/cgelqs.f +++ b/lapack-netlib/SRC/DEPRECATED/cgelqs.f @@ -16,7 +16,7 @@ * .. * .. Array Arguments .. * COMPLEX A( LDA, * ), B( LDB, * ), TAU( * ), -* $ WORK( LWORK ) +* $ WORK( * ) * .. * * @@ -128,7 +128,7 @@ SUBROUTINE CGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ), TAU( * ), - $ WORK( LWORK ) + $ WORK( * ) * .. * * ===================================================================== diff --git a/lapack-netlib/SRC/DEPRECATED/cgeqrs.f b/lapack-netlib/SRC/DEPRECATED/cgeqrs.f index 13ac7f74fd..9d0527283d 100644 --- a/lapack-netlib/SRC/DEPRECATED/cgeqrs.f +++ b/lapack-netlib/SRC/DEPRECATED/cgeqrs.f @@ -16,7 +16,7 @@ * .. * .. Array Arguments .. * COMPLEX A( LDA, * ), B( LDB, * ), TAU( * ), -* $ WORK( LWORK ) +* $ WORK( * ) * .. * * @@ -128,7 +128,7 @@ SUBROUTINE CGEQRS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, * .. * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ), TAU( * ), - $ WORK( LWORK ) + $ WORK( * ) * .. * * ===================================================================== diff --git a/lapack-netlib/SRC/DEPRECATED/dgelqs.f b/lapack-netlib/SRC/DEPRECATED/dgelqs.f index ecbb5893c3..1bab678901 100644 --- a/lapack-netlib/SRC/DEPRECATED/dgelqs.f +++ b/lapack-netlib/SRC/DEPRECATED/dgelqs.f @@ -16,7 +16,7 @@ * .. * .. Array Arguments .. * DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TAU( * ), -* $ WORK( LWORK ) +* $ WORK( * ) * .. * * @@ -128,7 +128,7 @@ SUBROUTINE DGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TAU( * ), - $ WORK( LWORK ) + $ WORK( * ) * .. * * ===================================================================== @@ -174,18 +174,19 @@ SUBROUTINE DGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, * * Solve L*X = B(1:m,:) * - CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M, NRHS, - $ ONE, A, LDA, B, LDB ) + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M, + $ NRHS, ONE, A, LDA, B, LDB ) * * Set B(m+1:n,:) to zero * IF( M.LT.N ) - $ CALL DLASET( 'Full', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) + $ CALL DLASET( 'Full', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), + $ LDB ) * * B := Q' * B * - CALL DORMLQ( 'Left', 'Transpose', N, NRHS, M, A, LDA, TAU, B, LDB, - $ WORK, LWORK, INFO ) + CALL DORMLQ( 'Left', 'Transpose', N, NRHS, M, A, LDA, TAU, B, + $ LDB, WORK, LWORK, INFO ) * RETURN * diff --git a/lapack-netlib/SRC/DEPRECATED/dgeqrs.f b/lapack-netlib/SRC/DEPRECATED/dgeqrs.f index bfb7bd8bb8..e3e6c4048e 100644 --- a/lapack-netlib/SRC/DEPRECATED/dgeqrs.f +++ b/lapack-netlib/SRC/DEPRECATED/dgeqrs.f @@ -16,7 +16,7 @@ * .. * .. Array Arguments .. * DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TAU( * ), -* $ WORK( LWORK ) +* $ WORK( * ) * .. * * @@ -128,7 +128,7 @@ SUBROUTINE DGEQRS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TAU( * ), - $ WORK( LWORK ) + $ WORK( * ) * .. * * ===================================================================== diff --git a/lapack-netlib/SRC/DEPRECATED/sgelqs.f b/lapack-netlib/SRC/DEPRECATED/sgelqs.f index 83afb4690b..2b1dd44b71 100644 --- a/lapack-netlib/SRC/DEPRECATED/sgelqs.f +++ b/lapack-netlib/SRC/DEPRECATED/sgelqs.f @@ -16,7 +16,7 @@ * .. * .. Array Arguments .. * REAL A( LDA, * ), B( LDB, * ), TAU( * ), -* $ WORK( LWORK ) +* $ WORK( * ) * .. * * @@ -128,7 +128,7 @@ SUBROUTINE SGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ), TAU( * ), - $ WORK( LWORK ) + $ WORK( * ) * .. * * ===================================================================== @@ -174,18 +174,19 @@ SUBROUTINE SGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, * * Solve L*X = B(1:m,:) * - CALL STRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M, NRHS, - $ ONE, A, LDA, B, LDB ) + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M, + $ NRHS, ONE, A, LDA, B, LDB ) * * Set B(m+1:n,:) to zero * IF( M.LT.N ) - $ CALL SLASET( 'Full', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) + $ CALL SLASET( 'Full', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), + $ LDB ) * * B := Q' * B * - CALL SORMLQ( 'Left', 'Transpose', N, NRHS, M, A, LDA, TAU, B, LDB, - $ WORK, LWORK, INFO ) + CALL SORMLQ( 'Left', 'Transpose', N, NRHS, M, A, LDA, TAU, B, + $ LDB, WORK, LWORK, INFO ) * RETURN * diff --git a/lapack-netlib/SRC/DEPRECATED/sgeqrs.f b/lapack-netlib/SRC/DEPRECATED/sgeqrs.f index ed11489104..bdbad5dcb4 100644 --- a/lapack-netlib/SRC/DEPRECATED/sgeqrs.f +++ b/lapack-netlib/SRC/DEPRECATED/sgeqrs.f @@ -16,7 +16,7 @@ * .. * .. Array Arguments .. * REAL A( LDA, * ), B( LDB, * ), TAU( * ), -* $ WORK( LWORK ) +* $ WORK( * ) * .. * * @@ -128,7 +128,7 @@ SUBROUTINE SGEQRS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, * .. * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ), TAU( * ), - $ WORK( LWORK ) + $ WORK( * ) * .. * * ===================================================================== diff --git a/lapack-netlib/SRC/DEPRECATED/zgelqs.f b/lapack-netlib/SRC/DEPRECATED/zgelqs.f index 5f629f8c7e..772165dfd9 100644 --- a/lapack-netlib/SRC/DEPRECATED/zgelqs.f +++ b/lapack-netlib/SRC/DEPRECATED/zgelqs.f @@ -16,7 +16,7 @@ * .. * .. Array Arguments .. * COMPLEX*16 A( LDA, * ), B( LDB, * ), TAU( * ), -* $ WORK( LWORK ) +* $ WORK( * ) * .. * * @@ -128,7 +128,7 @@ SUBROUTINE ZGELQS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ), TAU( * ), - $ WORK( LWORK ) + $ WORK( * ) * .. * * ===================================================================== diff --git a/lapack-netlib/SRC/DEPRECATED/zgeqrs.f b/lapack-netlib/SRC/DEPRECATED/zgeqrs.f index 6583e38591..cc33a45fc1 100644 --- a/lapack-netlib/SRC/DEPRECATED/zgeqrs.f +++ b/lapack-netlib/SRC/DEPRECATED/zgeqrs.f @@ -16,7 +16,7 @@ * .. * .. Array Arguments .. * COMPLEX*16 A( LDA, * ), B( LDB, * ), TAU( * ), -* $ WORK( LWORK ) +* $ WORK( * ) * .. * * @@ -128,7 +128,7 @@ SUBROUTINE ZGEQRS( M, N, NRHS, A, LDA, TAU, B, LDB, WORK, LWORK, * .. * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ), TAU( * ), - $ WORK( LWORK ) + $ WORK( * ) * .. * * ===================================================================== diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 0f547dd0c4..ebf3431a92 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -155,7 +155,7 @@ SLASRC_O = \ slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \ slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \ slarf.o slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o slargv.o \ - slarrv.o slartv.o \ + slarf1f.o slarf1l.o slarrv.o slartv.o \ slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o slasyf_rook.o \ slasyf_rk.o \ slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o \ @@ -271,6 +271,7 @@ CLASRC_O = \ claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \ claqz0.o claqz1.o claqz2.o claqz3.o \ clarf.o clarfb.o clarfb_gett.o clarfg.o clarft.o clarfgp.o \ + clarf1f.o clarf1l.o \ clarfx.o clarfy.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \ clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \ claswp.o clasyf.o clasyf_rook.o clasyf_rk.o clasyf_aa.o \ @@ -364,7 +365,7 @@ DLASRC_O = \ dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \ dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \ dlarf.o dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o \ - dlargv.o dlarrv.o dlartv.o \ + dlarf1f.o dlarf1l.o dlargv.o dlarrv.o dlartv.o \ dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o \ dlasyf.o dlasyf_rook.o dlasyf_rk.o \ dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlauu2.o \ @@ -478,7 +479,7 @@ ZLASRC_O = \ zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \ zlaqz0.o zlaqz1.o zlaqz2.o zlaqz3.o \ zlarcm.o zlarf.o zlarfb.o zlarfb_gett.o \ - zlarfg.o zlarft.o zlarfgp.o \ + zlarfg.o zlarft.o zlarfgp.o zlarf1f.o zlarf1l.o \ zlarfx.o zlarfy.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \ zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \ zlassq.o zlaswp.o zlasyf.o zlasyf_rook.o zlasyf_rk.o zlasyf_aa.o \ diff --git a/lapack-netlib/SRC/cgebd2.f b/lapack-netlib/SRC/cgebd2.f index db949f90cf..b9be813007 100644 --- a/lapack-netlib/SRC/cgebd2.f +++ b/lapack-netlib/SRC/cgebd2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CGEBD2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -132,7 +130,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup gebd2 * @precisions normal c -> s d z * *> \par Further Details: @@ -187,6 +185,7 @@ *> * ===================================================================== SUBROUTINE CGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -203,16 +202,15 @@ SUBROUTINE CGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) * ===================================================================== * * .. Parameters .. - COMPLEX ZERO, ONE - PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), - $ ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. INTEGER I COMPLEX ALPHA * .. * .. External Subroutines .. - EXTERNAL CLACGV, CLARF, CLARFG, XERBLA + EXTERNAL CLACGV, CLARF1F, CLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX, MIN @@ -246,13 +244,13 @@ SUBROUTINE CGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) CALL CLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1, $ TAUQ( I ) ) D( I ) = REAL( ALPHA ) - A( I, I ) = ONE * * Apply H(i)**H to A(i:m,i+1:n) from the left * IF( I.LT.N ) - $ CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, - $ CONJG( TAUQ( I ) ), A( I, I+1 ), LDA, WORK ) + $ CALL CLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAUQ( I ) ), A( I, I+1 ), LDA, + $ WORK ) A( I, I ) = D( I ) * IF( I.LT.N ) THEN @@ -265,12 +263,11 @@ SUBROUTINE CGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) CALL CLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), $ LDA, TAUP( I ) ) E( I ) = REAL( ALPHA ) - A( I, I+1 ) = ONE * * Apply G(i) to A(i+1:m,i+1:n) from the right * - CALL CLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, - $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) + CALL CLARF1F( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) CALL CLACGV( N-I, A( I, I+1 ), LDA ) A( I, I+1 ) = E( I ) ELSE @@ -290,13 +287,12 @@ SUBROUTINE CGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, $ TAUP( I ) ) D( I ) = REAL( ALPHA ) - A( I, I ) = ONE * * Apply G(i) to A(i+1:m,i:n) from the right * IF( I.LT.M ) - $ CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, - $ TAUP( I ), A( I+1, I ), LDA, WORK ) + $ CALL CLARF1F( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAUP( I ), A( I+1, I ), LDA, WORK ) CALL CLACGV( N-I+1, A( I, I ), LDA ) A( I, I ) = D( I ) * @@ -309,13 +305,12 @@ SUBROUTINE CGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) CALL CLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1, $ TAUQ( I ) ) E( I ) = REAL( ALPHA ) - A( I+1, I ) = ONE * * Apply H(i)**H to A(i+1:m,i+1:n) from the left * - CALL CLARF( 'Left', M-I, N-I, A( I+1, I ), 1, - $ CONJG( TAUQ( I ) ), A( I+1, I+1 ), LDA, - $ WORK ) + CALL CLARF1F( 'Left', M-I, N-I, A( I+1, I ), 1, + $ CONJG( TAUQ( I ) ), A( I+1, I+1 ), LDA, + $ WORK ) A( I+1, I ) = E( I ) ELSE TAUQ( I ) = ZERO diff --git a/lapack-netlib/SRC/cgeev.f b/lapack-netlib/SRC/cgeev.f index bb41599d1d..7ead6f3505 100644 --- a/lapack-netlib/SRC/cgeev.f +++ b/lapack-netlib/SRC/cgeev.f @@ -216,7 +216,7 @@ SUBROUTINE CGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR, $ CHSEQR, CLACPY, CLASCL, CSCAL, CTREVC3, CUNGHR * .. * .. External Functions .. - LOGICAL LSAME + LOGICAL LSAME, SISNAN INTEGER ISAMAX, ILAENV REAL SLAMCH, SCNRM2, CLANGE, SROUNDUP_LWORK EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SCNRM2, CLANGE, @@ -329,6 +329,10 @@ SUBROUTINE CGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR, ELSE IF( ANRM.GT.BIGNUM ) THEN SCALEA = .TRUE. CSCALE = BIGNUM + ELSE IF ( SISNAN( ANRM ) ) THEN + INFO = -4 + CALL XERBLA( 'CGEEV', -INFO ) + RETURN END IF IF( SCALEA ) $ CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) diff --git a/lapack-netlib/SRC/cgehd2.f b/lapack-netlib/SRC/cgehd2.f index d8b40b180c..4a5400667f 100644 --- a/lapack-netlib/SRC/cgehd2.f +++ b/lapack-netlib/SRC/cgehd2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CGEHD2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -106,7 +104,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup gehd2 * *> \par Further Details: * ===================== @@ -146,6 +144,7 @@ *> * ===================================================================== SUBROUTINE CGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -160,16 +159,11 @@ SUBROUTINE CGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) * * ===================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) -* .. * .. Local Scalars .. INTEGER I - COMPLEX ALPHA * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFG, XERBLA + EXTERNAL CLARF1F, CLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX, MIN @@ -197,21 +191,19 @@ SUBROUTINE CGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) * * Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) * - ALPHA = A( I+1, I ) - CALL CLARFG( IHI-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAU( I ) ) - A( I+1, I ) = ONE + CALL CLARFG( IHI-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) * * Apply H(i) to A(1:ihi,i+1:ihi) from the right * - CALL CLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), - $ A( 1, I+1 ), LDA, WORK ) + CALL CLARF1F( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) * * Apply H(i)**H to A(i+1:ihi,i+1:n) from the left * - CALL CLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, - $ CONJG( TAU( I ) ), A( I+1, I+1 ), LDA, WORK ) + CALL CLARF1F( 'Left', IHI-I, N-I, A( I+1, I ), 1, + $ CONJG( TAU( I ) ), A( I+1, I+1 ), LDA, WORK ) * - A( I+1, I ) = ALPHA 10 CONTINUE * RETURN diff --git a/lapack-netlib/SRC/cgelq2.f b/lapack-netlib/SRC/cgelq2.f index 0ea4a7200f..97bc676b17 100644 --- a/lapack-netlib/SRC/cgelq2.f +++ b/lapack-netlib/SRC/cgelq2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CGELQ2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -104,7 +102,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup gelq2 * *> \par Further Details: * ===================== @@ -126,6 +124,7 @@ *> * ===================================================================== SUBROUTINE CGELQ2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -140,16 +139,11 @@ SUBROUTINE CGELQ2( M, N, A, LDA, TAU, WORK, INFO ) * * ===================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) -* .. * .. Local Scalars .. INTEGER I, K - COMPLEX ALPHA * .. * .. External Subroutines .. - EXTERNAL CLACGV, CLARF, CLARFG, XERBLA + EXTERNAL CLACGV, CLARF1F, CLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -178,18 +172,15 @@ SUBROUTINE CGELQ2( M, N, A, LDA, TAU, WORK, INFO ) * Generate elementary reflector H(i) to annihilate A(i,i+1:n) * CALL CLACGV( N-I+1, A( I, I ), LDA ) - ALPHA = A( I, I ) - CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + CALL CLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, $ TAU( I ) ) IF( I.LT.M ) THEN * * Apply H(i) to A(i+1:m,i:n) from the right * - A( I, I ) = ONE - CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), - $ A( I+1, I ), LDA, WORK ) + CALL CLARF1F( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAU( I ), A( I+1, I ), LDA, WORK ) END IF - A( I, I ) = ALPHA CALL CLACGV( N-I+1, A( I, I ), LDA ) 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/cgeql2.f b/lapack-netlib/SRC/cgeql2.f index 41a5f9e049..a089d267ac 100644 --- a/lapack-netlib/SRC/cgeql2.f +++ b/lapack-netlib/SRC/cgeql2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CGEQL2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup geql2 * *> \par Further Details: * ===================== @@ -120,6 +118,7 @@ *> * ===================================================================== SUBROUTINE CGEQL2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -134,16 +133,11 @@ SUBROUTINE CGEQL2( M, N, A, LDA, TAU, WORK, INFO ) * * ===================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) -* .. * .. Local Scalars .. INTEGER I, K - COMPLEX ALPHA * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFG, XERBLA + EXTERNAL CLARF1L, CLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX, MIN @@ -172,15 +166,13 @@ SUBROUTINE CGEQL2( M, N, A, LDA, TAU, WORK, INFO ) * Generate elementary reflector H(i) to annihilate * A(1:m-k+i-1,n-k+i) * - ALPHA = A( M-K+I, N-K+I ) - CALL CLARFG( M-K+I, ALPHA, A( 1, N-K+I ), 1, TAU( I ) ) + CALL CLARFG( M-K+I, A( M-K+I, N-K+I ), A( 1, N-K+I ), 1, + $ TAU( I ) ) * * Apply H(i)**H to A(1:m-k+i,1:n-k+i-1) from the left * - A( M-K+I, N-K+I ) = ONE - CALL CLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, - $ CONJG( TAU( I ) ), A, LDA, WORK ) - A( M-K+I, N-K+I ) = ALPHA + CALL CLARF1L( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, + $ CONJG( TAU( I ) ), A, LDA, WORK ) 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/cgeqp3rk.f b/lapack-netlib/SRC/cgeqp3rk.f index fecf8d85cc..2fda980adc 100644 --- a/lapack-netlib/SRC/cgeqp3rk.f +++ b/lapack-netlib/SRC/cgeqp3rk.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CGEQP3RK + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -552,27 +550,19 @@ *> C. H. Bischof, Math. and Comp. Sci. Div., Argonne National Lab, USA. *> A BLAS-3 version of the QR factorization with column pivoting. *> LAPACK Working Note 114 -*> \htmlonly *> https://www.netlib.org/lapack/lawnspdf/lawn114.pdf -*> \endhtmlonly *> and in *> SIAM J. Sci. Comput., 19(5):1486-1494, Sept. 1998. -*> \htmlonly *> https://doi.org/10.1137/S1064827595296732 -*> \endhtmlonly *> *> [2] A partial column norm updating strategy developed in 2006. *> Z. Drmac and Z. Bujanovic, Dept. of Math., University of Zagreb, Croatia. *> On the failure of rank revealing QR factorization software – a case study. *> LAPACK Working Note 176. -*> \htmlonly *> http://www.netlib.org/lapack/lawnspdf/lawn176.pdf -*> \endhtmlonly *> and in *> ACM Trans. Math. Softw. 35, 2, Article 12 (July 2008), 28 pages. -*> \htmlonly *> https://doi.org/10.1145/1377612.1377616 -*> \endhtmlonly * *> \par Contributors: * ================== @@ -678,7 +668,7 @@ SUBROUTINE CGEQP3RK( M, N, NRHS, KMAX, ABSTOL, RELTOL, A, LDA, * Minimal workspace size in case of using only unblocked * BLAS 2 code in CLAQP2RK. * 1) CLAQP2RK: N+NRHS-1 to use in WORK array that is used -* in CLARF subroutine inside CLAQP2RK to apply an +* in CLARF1F subroutine inside CLAQP2RK to apply an * elementary reflector from the left. * TOTAL_WORK_SIZE = 3*N + NRHS - 1 * @@ -694,7 +684,7 @@ SUBROUTINE CGEQP3RK( M, N, NRHS, KMAX, ABSTOL, RELTOL, A, LDA, * 1) CGEQP3RK, CLAQP2RK, CLAQP3RK: 2*N to store full and * partial column 2-norms. * 2) CLAQP2RK: N+NRHS-1 to use in WORK array that is used -* in CLARF subroutine to apply an elementary reflector +* in CLARF1F subroutine to apply an elementary reflector * from the left. * 3) CLAQP3RK: NB*(N+NRHS) to use in the work array F that * is used to apply a block reflector from @@ -894,7 +884,8 @@ SUBROUTINE CGEQP3RK( M, N, NRHS, KMAX, ABSTOL, RELTOL, A, LDA, * Determine when to cross over from blocked to unblocked code. * (for N less than NX, unblocked code should be used). * - NX = MAX( 0, ILAENV( IXOVER, 'CGEQP3RK', ' ', M, N, -1, -1 ) ) + NX = MAX( 0, ILAENV( IXOVER, 'CGEQP3RK', ' ', M, N, -1, + $ -1 ) ) * IF( NX.LT.MINMN ) THEN * diff --git a/lapack-netlib/SRC/cgeqr2.f b/lapack-netlib/SRC/cgeqr2.f index b0b346b2db..775d33c515 100644 --- a/lapack-netlib/SRC/cgeqr2.f +++ b/lapack-netlib/SRC/cgeqr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CGEQR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -105,7 +103,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup geqr2 * *> \par Further Details: * ===================== @@ -127,6 +125,7 @@ *> * ===================================================================== SUBROUTINE CGEQR2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -141,16 +140,11 @@ SUBROUTINE CGEQR2( M, N, A, LDA, TAU, WORK, INFO ) * * ===================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) -* .. * .. Local Scalars .. INTEGER I, K - COMPLEX ALPHA * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFG, XERBLA + EXTERNAL CLARF1F, CLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX, MIN @@ -184,11 +178,8 @@ SUBROUTINE CGEQR2( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i)**H to A(i:m,i+1:n) from the left * - ALPHA = A( I, I ) - A( I, I ) = ONE - CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, - $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) - A( I, I ) = ALPHA + CALL CLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) END IF 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/cgeqr2p.f b/lapack-netlib/SRC/cgeqr2p.f index 7be7e7a1c9..72e3945780 100644 --- a/lapack-netlib/SRC/cgeqr2p.f +++ b/lapack-netlib/SRC/cgeqr2p.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CGEQR2P + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,7 +105,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup geqr2p * *> \par Further Details: * ===================== @@ -131,6 +129,7 @@ *> * ===================================================================== SUBROUTINE CGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -145,16 +144,11 @@ SUBROUTINE CGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) * * ===================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) -* .. * .. Local Scalars .. INTEGER I, K - COMPLEX ALPHA * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFGP, XERBLA + EXTERNAL CLARF1F, CLARFGP, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX, MIN @@ -188,11 +182,8 @@ SUBROUTINE CGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i)**H to A(i:m,i+1:n) from the left * - ALPHA = A( I, I ) - A( I, I ) = ONE - CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, - $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) - A( I, I ) = ALPHA + CALL CLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) END IF 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/cgerq2.f b/lapack-netlib/SRC/cgerq2.f index a2cf5cf696..3b8a959387 100644 --- a/lapack-netlib/SRC/cgerq2.f +++ b/lapack-netlib/SRC/cgerq2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CGERQ2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup gerq2 * *> \par Further Details: * ===================== @@ -120,6 +118,7 @@ *> * ===================================================================== SUBROUTINE CGERQ2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -134,16 +133,11 @@ SUBROUTINE CGERQ2( M, N, A, LDA, TAU, WORK, INFO ) * * ===================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) -* .. * .. Local Scalars .. INTEGER I, K - COMPLEX ALPHA * .. * .. External Subroutines .. - EXTERNAL CLACGV, CLARF, CLARFG, XERBLA + EXTERNAL CLACGV, CLARF1L, CLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -173,16 +167,13 @@ SUBROUTINE CGERQ2( M, N, A, LDA, TAU, WORK, INFO ) * A(m-k+i,1:n-k+i-1) * CALL CLACGV( N-K+I, A( M-K+I, 1 ), LDA ) - ALPHA = A( M-K+I, N-K+I ) - CALL CLARFG( N-K+I, ALPHA, A( M-K+I, 1 ), LDA, + CALL CLARFG( N-K+I, A( M-K+I, N-K+I ), A( M-K+I, 1 ), LDA, $ TAU( I ) ) * * Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right * - A( M-K+I, N-K+I ) = ONE - CALL CLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, - $ TAU( I ), A, LDA, WORK ) - A( M-K+I, N-K+I ) = ALPHA + CALL CLARF1L( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, + $ TAU( I ), A, LDA, WORK ) CALL CLACGV( N-K+I-1, A( M-K+I, 1 ), LDA ) 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/cgetc2.f b/lapack-netlib/SRC/cgetc2.f index 94267d7670..85e54e20e1 100644 --- a/lapack-netlib/SRC/cgetc2.f +++ b/lapack-netlib/SRC/cgetc2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CGETC2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEauxiliary +*> \ingroup getc2 * *> \par Contributors: * ================== @@ -108,6 +106,7 @@ * * ===================================================================== SUBROUTINE CGETC2( N, A, LDA, IPIV, JPIV, INFO ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -132,7 +131,7 @@ SUBROUTINE CGETC2( N, A, LDA, IPIV, JPIV, INFO ) REAL BIGNUM, EPS, SMIN, SMLNUM, XMAX * .. * .. External Subroutines .. - EXTERNAL CGERU, CSWAP, SLABAD + EXTERNAL CGERU, CSWAP * .. * .. External Functions .. REAL SLAMCH @@ -155,7 +154,6 @@ SUBROUTINE CGETC2( N, A, LDA, IPIV, JPIV, INFO ) EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) / EPS BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) * * Handle the case N=1 by itself * @@ -177,8 +175,8 @@ SUBROUTINE CGETC2( N, A, LDA, IPIV, JPIV, INFO ) * Find max element in matrix A * XMAX = ZERO - DO 20 IP = I, N - DO 10 JP = I, N + DO 20 JP = I, N + DO 10 IP = I, N IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN XMAX = ABS( A( IP, JP ) ) IPV = IP diff --git a/lapack-netlib/SRC/chegvd.f b/lapack-netlib/SRC/chegvd.f index 34f56b154b..a20243ff92 100644 --- a/lapack-netlib/SRC/chegvd.f +++ b/lapack-netlib/SRC/chegvd.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CHEGVD + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -238,8 +236,10 @@ *> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA *> * ===================================================================== - SUBROUTINE CHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + SUBROUTINE CHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, + $ WORK, $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK driver routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -272,7 +272,8 @@ SUBROUTINE CHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. - EXTERNAL CHEEVD, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA + EXTERNAL CHEEVD, CHEGST, CPOTRF, CTRMM, CTRSM, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, REAL @@ -318,7 +319,7 @@ SUBROUTINE CHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, * IF( INFO.EQ.0 ) THEN WORK( 1 ) = SROUNDUP_LWORK(LOPT) - RWORK( 1 ) = LROPT + RWORK( 1 ) = SROUNDUP_LWORK(LROPT) IWORK( 1 ) = LIOPT * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -353,7 +354,8 @@ SUBROUTINE CHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, * Transform problem to standard eigenvalue problem and solve. * CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) - CALL CHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, LRWORK, + CALL CHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + $ LRWORK, $ IWORK, LIWORK, INFO ) LOPT = MAX( LOPT, INT( REAL( WORK( 1 ) ) ) ) LROPT = MAX( LROPT, INT( RWORK( 1 ) ) ) @@ -394,7 +396,7 @@ SUBROUTINE CHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, END IF * WORK( 1 ) = SROUNDUP_LWORK(LOPT) - RWORK( 1 ) = LROPT + RWORK( 1 ) = SROUNDUP_LWORK(LROPT) IWORK( 1 ) = LIOPT * RETURN diff --git a/lapack-netlib/SRC/chpgvd.f b/lapack-netlib/SRC/chpgvd.f index 7815efb4e2..8a6634f2cd 100644 --- a/lapack-netlib/SRC/chpgvd.f +++ b/lapack-netlib/SRC/chpgvd.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CHPGVD + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -220,8 +218,10 @@ *> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA * * ===================================================================== - SUBROUTINE CHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + SUBROUTINE CHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, + $ WORK, $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK driver routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -250,7 +250,8 @@ SUBROUTINE CHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, EXTERNAL LSAME, SROUNDUP_LWORK * .. * .. External Subroutines .. - EXTERNAL CHPEVD, CHPGST, CPPTRF, CTPMV, CTPSV, XERBLA + EXTERNAL CHPEVD, CHPGST, CPPTRF, CTPMV, CTPSV, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, REAL @@ -294,7 +295,7 @@ SUBROUTINE CHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, END IF * WORK( 1 ) = SROUNDUP_LWORK(LWMIN) - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = SROUNDUP_LWORK(LRWMIN) IWORK( 1 ) = LIWMIN IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 @@ -376,7 +377,7 @@ SUBROUTINE CHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, END IF * WORK( 1 ) = SROUNDUP_LWORK(LWMIN) - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = SROUNDUP_LWORK(LRWMIN) IWORK( 1 ) = LIWMIN RETURN * diff --git a/lapack-netlib/SRC/clacpy.f b/lapack-netlib/SRC/clacpy.f index ab4404a29a..0e1a88e70a 100644 --- a/lapack-netlib/SRC/clacpy.f +++ b/lapack-netlib/SRC/clacpy.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CLACPY + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -96,10 +94,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERauxiliary +*> \ingroup lacpy * * ===================================================================== SUBROUTINE CLACPY( UPLO, M, N, A, LDA, B, LDB ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -135,7 +134,7 @@ SUBROUTINE CLACPY( UPLO, M, N, A, LDA, B, LDB ) 20 CONTINUE * ELSE IF( LSAME( UPLO, 'L' ) ) THEN - DO 40 J = 1, N + DO 40 J = 1, MIN( M, N ) DO 30 I = J, M B( I, J ) = A( I, J ) 30 CONTINUE diff --git a/lapack-netlib/SRC/clantr.f b/lapack-netlib/SRC/clantr.f index f1dce1402d..a671f60a2f 100644 --- a/lapack-netlib/SRC/clantr.f +++ b/lapack-netlib/SRC/clantr.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CLANTR + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -134,11 +132,13 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERauxiliary +*> \ingroup lantr * * ===================================================================== - REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, + $ LDA, $ WORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -188,14 +188,16 @@ REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 20 J = 1, N DO 10 I = 1, MIN( M, J-1 ) SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ SISNAN( SUM ) ) VALUE = SUM 10 CONTINUE 20 CONTINUE ELSE - DO 40 J = 1, N + DO 40 J = 1, MIN( M, N ) DO 30 I = J + 1, M SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ SISNAN( SUM ) ) VALUE = SUM 30 CONTINUE 40 CONTINUE END IF @@ -205,14 +207,16 @@ REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 60 J = 1, N DO 50 I = 1, MIN( M, J ) SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ SISNAN( SUM ) ) VALUE = SUM 50 CONTINUE 60 CONTINUE ELSE - DO 80 J = 1, N + DO 80 J = 1, MIN( M, N ) DO 70 I = J, M SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ SISNAN( SUM ) ) VALUE = SUM 70 CONTINUE 80 CONTINUE END IF @@ -239,7 +243,7 @@ REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM 110 CONTINUE ELSE - DO 140 J = 1, N + DO 140 J = 1, MIN( M, N ) IF( UDIAG ) THEN SUM = ONE DO 120 I = J + 1, M @@ -286,7 +290,7 @@ REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 220 I = N + 1, M WORK( I ) = ZERO 220 CONTINUE - DO 240 J = 1, N + DO 240 J = 1, MIN( M, N ) DO 230 I = J + 1, M WORK( I ) = WORK( I ) + ABS( A( I, J ) ) 230 CONTINUE @@ -295,7 +299,7 @@ REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 250 I = 1, M WORK( I ) = ZERO 250 CONTINUE - DO 270 J = 1, N + DO 270 J = 1, MIN( M, N ) DO 260 I = J, M WORK( I ) = WORK( I ) + ABS( A( I, J ) ) 260 CONTINUE @@ -307,36 +311,39 @@ REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, SUM = WORK( I ) IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM 280 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. + $ ( LSAME( NORM, 'E' ) ) ) THEN * * Find normF(A). * IF( LSAME( UPLO, 'U' ) ) THEN IF( LSAME( DIAG, 'U' ) ) THEN SCALE = ONE - SUM = MIN( M, N ) + SUM = REAL( MIN( M, N ) ) DO 290 J = 2, N - CALL CLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + CALL CLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, + $ SUM ) 290 CONTINUE ELSE SCALE = ZERO SUM = ONE DO 300 J = 1, N - CALL CLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + CALL CLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, + $ SUM ) 300 CONTINUE END IF ELSE IF( LSAME( DIAG, 'U' ) ) THEN SCALE = ONE - SUM = MIN( M, N ) - DO 310 J = 1, N + SUM = REAL( MIN( M, N ) ) + DO 310 J = 1, MIN( M, N ) CALL CLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, $ SUM ) 310 CONTINUE ELSE SCALE = ZERO SUM = ONE - DO 320 J = 1, N + DO 320 J = 1, MIN( M, N ) CALL CLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) 320 CONTINUE END IF diff --git a/lapack-netlib/SRC/claqp2.f b/lapack-netlib/SRC/claqp2.f index 6e41afeb4a..ea1e4edfcb 100644 --- a/lapack-netlib/SRC/claqp2.f +++ b/lapack-netlib/SRC/claqp2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CLAQP2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -122,7 +120,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERauxiliary +*> \ingroup laqp2 * *> \par Contributors: * ================== @@ -139,13 +137,12 @@ *> *> LAPACK Working Note 176 * -*> \htmlonly *> [PDF] -*> \endhtmlonly * * ===================================================================== SUBROUTINE CLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, $ WORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -164,17 +161,14 @@ SUBROUTINE CLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, * * .. Parameters .. REAL ZERO, ONE - COMPLEX CONE - PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, - $ CONE = ( 1.0E+0, 0.0E+0 ) ) + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. INTEGER I, ITEMP, J, MN, OFFPI, PVT REAL TEMP, TEMP2, TOL3Z - COMPLEX AII * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFG, CSWAP + EXTERNAL CLARF1F, CLARFG, CSWAP * .. * .. Intrinsic Functions .. INTRINSIC ABS, CONJG, MAX, MIN, SQRT @@ -211,7 +205,8 @@ SUBROUTINE CLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, * Generate elementary reflector H(i). * IF( OFFPI.LT.M ) THEN - CALL CLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + CALL CLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), + $ 1, $ TAU( I ) ) ELSE CALL CLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) @@ -221,12 +216,9 @@ SUBROUTINE CLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, * * Apply H(i)**H to A(offset+i:m,i+1:n) from the left. * - AII = A( OFFPI, I ) - A( OFFPI, I ) = CONE - CALL CLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, - $ CONJG( TAU( I ) ), A( OFFPI, I+1 ), LDA, - $ WORK( 1 ) ) - A( OFFPI, I ) = AII + CALL CLARF1F( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ CONJG( TAU( I ) ), A( OFFPI, I+1 ), LDA, + $ WORK( 1 ) ) END IF * * Update partial column norms. diff --git a/lapack-netlib/SRC/claqp2rk.f b/lapack-netlib/SRC/claqp2rk.f index 0501c50bb4..d27d978e95 100644 --- a/lapack-netlib/SRC/claqp2rk.f +++ b/lapack-netlib/SRC/claqp2rk.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CLAQP2RK + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -254,7 +252,7 @@ *> \param[out] WORK *> \verbatim *> WORK is COMPLEX array, dimension (N-1) -*> Used in CLARF subroutine to apply an elementary +*> Used in CLARF1F subroutine to apply an elementary *> reflector from the left. *> \endverbatim *> @@ -304,27 +302,19 @@ *> C. H. Bischof, Math. and Comp. Sci. Div., Argonne National Lab, USA. *> A BLAS-3 version of the QR factorization with column pivoting. *> LAPACK Working Note 114 -*> \htmlonly *> https://www.netlib.org/lapack/lawnspdf/lawn114.pdf -*> \endhtmlonly *> and in *> SIAM J. Sci. Comput., 19(5):1486-1494, Sept. 1998. -*> \htmlonly *> https://doi.org/10.1137/S1064827595296732 -*> \endhtmlonly *> *> [2] A partial column norm updating strategy developed in 2006. *> Z. Drmac and Z. Bujanovic, Dept. of Math., University of Zagreb, Croatia. *> On the failure of rank revealing QR factorization software – a case study. *> LAPACK Working Note 176. -*> \htmlonly *> http://www.netlib.org/lapack/lawnspdf/lawn176.pdf -*> \endhtmlonly *> and in *> ACM Trans. Math. Softw. 35, 2, Article 12 (July 2008), 28 pages. -*> \htmlonly *> https://doi.org/10.1145/1377612.1377616 -*> \endhtmlonly * *> \par Contributors: * ================== @@ -364,18 +354,16 @@ SUBROUTINE CLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) - COMPLEX CZERO, CONE - PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), - $ CONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. - INTEGER I, ITEMP, J, JMAXC2NRM, KK, KP, MINMNFACT, - $ MINMNUPDT + INTEGER I, ITEMP, J, JMAXC2NRM, KK, KP, + $ KBOUND, MINMNFACT, MINMNUPDT REAL HUGEVAL, TAUNAN, TEMP, TEMP2, TOL3Z - COMPLEX AIKK * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFG, CSWAP + EXTERNAL CLARF1F, CLARFG, CSWAP * .. * .. Intrinsic Functions .. INTRINSIC ABS, REAL, CONJG, AIMAG, MAX, MIN, SQRT @@ -402,13 +390,13 @@ SUBROUTINE CLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * MINMNFACT = MIN( M-IOFFSET, N ) MINMNUPDT = MIN( M-IOFFSET, N+NRHS ) - KMAX = MIN( KMAX, MINMNFACT ) + KBOUND = MIN( KMAX, MINMNFACT ) TOL3Z = SQRT( SLAMCH( 'Epsilon' ) ) HUGEVAL = SLAMCH( 'Overflow' ) * * Compute the factorization, KK is the lomn loop index. * - DO KK = 1, KMAX + DO KK = 1, KBOUND * I = IOFFSET + KK * @@ -633,12 +621,9 @@ SUBROUTINE CLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * condition is satisfied, not only KK < N+NRHS ) * IF( KK.LT.MINMNUPDT ) THEN - AIKK = A( I, KK ) - A( I, KK ) = CONE - CALL CLARF( 'Left', M-I+1, N+NRHS-KK, A( I, KK ), 1, - $ CONJG( TAU( KK ) ), A( I, KK+1 ), LDA, - $ WORK( 1 ) ) - A( I, KK ) = AIKK + CALL CLARF1F( 'Left', M-I+1, N+NRHS-KK, A( I, KK ), 1, + $ CONJG( TAU( KK ) ), A( I, KK+1 ), LDA, + $ WORK( 1 ) ) END IF * IF( KK.LT.MINMNFACT ) THEN @@ -689,7 +674,7 @@ SUBROUTINE CLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * i.e. no condition was triggered to exit the routine. * Set the number of factorized columns. * - K = KMAX + K = KBOUND * * We reached the end of the loop, i.e. all KMAX columns were * factorized, we need to set MAXC2NRMK and RELMAXC2NRMK before diff --git a/lapack-netlib/SRC/claqr2.f b/lapack-netlib/SRC/claqr2.f index 1695fbe5bd..22aa712349 100644 --- a/lapack-netlib/SRC/claqr2.f +++ b/lapack-netlib/SRC/claqr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CLAQR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -254,7 +252,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERauxiliary +*> \ingroup laqr2 * *> \par Contributors: * ================== @@ -263,9 +261,11 @@ *> University of Kansas, USA *> * ===================================================================== - SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, + $ ILOZ, $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, $ NV, WV, LDWV, WORK, LWORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -291,7 +291,7 @@ SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0 ) * .. * .. Local Scalars .. - COMPLEX BETA, CDUM, S, TAU + COMPLEX CDUM, S, TAU REAL FOO, SAFMAX, SAFMIN, SMLNUM, ULP INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN, $ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWKOPT @@ -301,8 +301,9 @@ SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, EXTERNAL SLAMCH * .. * .. External Subroutines .. - EXTERNAL CCOPY, CGEHRD, CGEMM, CLACPY, CLAHQR, CLARF, - $ CLARFG, CLASET, CTREXC, CUNMHR, SLABAD + EXTERNAL CCOPY, CGEHRD, CGEMM, CLACPY, CLAHQR, + $ CLARF1F, + $ CLARFG, CLASET, CTREXC, CUNMHR * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, CONJG, INT, MAX, MIN, REAL @@ -329,7 +330,8 @@ SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * * ==== Workspace query call to CUNMHR ==== * - CALL CUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + CALL CUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, + $ LDV, $ WORK, -1, INFO ) LWK2 = INT( WORK( 1 ) ) * @@ -360,7 +362,6 @@ SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * SAFMIN = SLAMCH( 'SAFE MINIMUM' ) SAFMAX = RONE / SAFMIN - CALL SLABAD( SAFMIN, SAFMAX ) ULP = SLAMCH( 'PRECISION' ) SMLNUM = SAFMIN*( REAL( N ) / ULP ) * @@ -399,7 +400,8 @@ SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . here and there to keep track.) ==== * CALL CLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) - CALL CCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) + CALL CCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), + $ LDT+1 ) * CALL CLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) CALL CLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, @@ -451,7 +453,8 @@ SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, 20 CONTINUE ILST = I IF( IFST.NE.ILST ) - $ CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + $ CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ INFO ) 30 CONTINUE END IF * @@ -471,18 +474,17 @@ SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, DO 50 I = 1, NS WORK( I ) = CONJG( WORK( I ) ) 50 CONTINUE - BETA = WORK( 1 ) - CALL CLARFG( NS, BETA, WORK( 2 ), 1, TAU ) - WORK( 1 ) = ONE + CALL CLARFG( NS, WORK( 1 ), WORK( 2 ), 1, TAU ) * - CALL CLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) + CALL CLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), + $ LDT ) * - CALL CLARF( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT, - $ WORK( JW+1 ) ) - CALL CLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, - $ WORK( JW+1 ) ) - CALL CLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, - $ WORK( JW+1 ) ) + CALL CLARF1F( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT, + $ WORK( JW+1 ) ) + CALL CLARF1F( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL CLARF1F( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) * CALL CGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), $ LWORK-JW, INFO ) @@ -500,7 +502,8 @@ SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . H and Z, if requested. ==== * IF( NS.GT.1 .AND. S.NE.ZERO ) - $ CALL CUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ CALL CUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, + $ LDV, $ WORK( JW+1 ), LWORK-JW, INFO ) * * ==== Update vertical slab in H ==== @@ -514,7 +517,8 @@ SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, KLN = MIN( NV, KWTOP-KROW ) CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), $ LDH, V, LDV, ZERO, WV, LDWV ) - CALL CLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + CALL CLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), + $ LDH ) 60 CONTINUE * * ==== Update horizontal slab in H ==== @@ -534,7 +538,8 @@ SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, IF( WANTZ ) THEN DO 80 KROW = ILOZ, IHIZ, NV KLN = MIN( NV, IHIZ-KROW+1 ) - CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, + $ KWTOP ), $ LDZ, V, LDV, ZERO, WV, LDWV ) CALL CLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), $ LDZ ) diff --git a/lapack-netlib/SRC/claqr3.f b/lapack-netlib/SRC/claqr3.f index 2f5402de97..c0f3530c30 100644 --- a/lapack-netlib/SRC/claqr3.f +++ b/lapack-netlib/SRC/claqr3.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CLAQR3 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -251,7 +249,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERauxiliary +*> \ingroup laqr3 * *> \par Contributors: * ================== @@ -260,9 +258,11 @@ *> University of Kansas, USA *> * ===================================================================== - SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, + $ ILOZ, $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, $ NV, WV, LDWV, WORK, LWORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -288,7 +288,7 @@ SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0 ) * .. * .. Local Scalars .. - COMPLEX BETA, CDUM, S, TAU + COMPLEX CDUM, S, TAU REAL FOO, SAFMAX, SAFMIN, SMLNUM, ULP INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN, $ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3, @@ -300,8 +300,9 @@ SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, EXTERNAL SLAMCH, ILAENV * .. * .. External Subroutines .. - EXTERNAL CCOPY, CGEHRD, CGEMM, CLACPY, CLAHQR, CLAQR4, - $ CLARF, CLARFG, CLASET, CTREXC, CUNMHR, SLABAD + EXTERNAL CCOPY, CGEHRD, CGEMM, CLACPY, CLAHQR, + $ CLAQR4, + $ CLARF1F, CLARFG, CLASET, CTREXC, CUNMHR * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CMPLX, CONJG, INT, MAX, MIN, REAL @@ -328,13 +329,15 @@ SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * * ==== Workspace query call to CUNMHR ==== * - CALL CUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + CALL CUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, + $ LDV, $ WORK, -1, INFO ) LWK2 = INT( WORK( 1 ) ) * * ==== Workspace query call to CLAQR4 ==== * - CALL CLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, V, + CALL CLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, + $ V, $ LDV, WORK, -1, INFQR ) LWK3 = INT( WORK( 1 ) ) * @@ -365,7 +368,6 @@ SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * SAFMIN = SLAMCH( 'SAFE MINIMUM' ) SAFMAX = RONE / SAFMIN - CALL SLABAD( SAFMIN, SAFMAX ) ULP = SLAMCH( 'PRECISION' ) SMLNUM = SAFMIN*( REAL( N ) / ULP ) * @@ -404,15 +406,18 @@ SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . here and there to keep track.) ==== * CALL CLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) - CALL CCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) + CALL CCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), + $ LDT+1 ) * CALL CLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) NMIN = ILAENV( 12, 'CLAQR3', 'SV', JW, 1, JW, LWORK ) IF( JW.GT.NMIN ) THEN - CALL CLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + CALL CLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), + $ 1, $ JW, V, LDV, WORK, LWORK, INFQR ) ELSE - CALL CLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + CALL CLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), + $ 1, $ JW, V, LDV, INFQR ) END IF * @@ -462,7 +467,8 @@ SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, 20 CONTINUE ILST = I IF( IFST.NE.ILST ) - $ CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + $ CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ INFO ) 30 CONTINUE END IF * @@ -482,18 +488,17 @@ SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, DO 50 I = 1, NS WORK( I ) = CONJG( WORK( I ) ) 50 CONTINUE - BETA = WORK( 1 ) - CALL CLARFG( NS, BETA, WORK( 2 ), 1, TAU ) - WORK( 1 ) = ONE + CALL CLARFG( NS, WORK( 1 ), WORK( 2 ), 1, TAU ) * - CALL CLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) + CALL CLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), + $ LDT ) * - CALL CLARF( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT, - $ WORK( JW+1 ) ) - CALL CLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, - $ WORK( JW+1 ) ) - CALL CLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, - $ WORK( JW+1 ) ) + CALL CLARF1F( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT, + $ WORK( JW+1 ) ) + CALL CLARF1F( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL CLARF1F( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) * CALL CGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), $ LWORK-JW, INFO ) @@ -511,7 +516,8 @@ SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . H and Z, if requested. ==== * IF( NS.GT.1 .AND. S.NE.ZERO ) - $ CALL CUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ CALL CUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, + $ LDV, $ WORK( JW+1 ), LWORK-JW, INFO ) * * ==== Update vertical slab in H ==== @@ -525,7 +531,8 @@ SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, KLN = MIN( NV, KWTOP-KROW ) CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), $ LDH, V, LDV, ZERO, WV, LDWV ) - CALL CLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + CALL CLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), + $ LDH ) 60 CONTINUE * * ==== Update horizontal slab in H ==== @@ -545,7 +552,8 @@ SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, IF( WANTZ ) THEN DO 80 KROW = ILOZ, IHIZ, NV KLN = MIN( NV, IHIZ-KROW+1 ) - CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, + $ KWTOP ), $ LDZ, V, LDV, ZERO, WV, LDWV ) CALL CLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), $ LDZ ) diff --git a/lapack-netlib/SRC/clarf1f.c b/lapack-netlib/SRC/clarf1f.c new file mode 100644 index 0000000000..1644a44489 --- /dev/null +++ b/lapack-netlib/SRC/clarf1f.c @@ -0,0 +1,551 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* Table of constant values */ + +static complex c_b1 = {1.f,0.f}; +static complex c_b2 = {0.f,0.f}; +static integer c__1 = 1; + +/* > \brief \b CLARF1F applies an elementary reflector to a general rectangular */ +/* matrix assuming v(1) = 1. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > Download CLARF1F + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CLARF1F( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) */ + +/* CHARACTER SIDE */ +/* INTEGER INCV, LDC, M, N */ +/* COMPLEX TAU */ +/* COMPLEX C( LDC, * ), V( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CLARF1F applies a complex elementary reflector H to a complex m by n matrix */ +/* > C, from either the left or the right. H is represented in the form */ +/* > */ +/* > H = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar and v is a complex vector assuming v(1) = 1. */ +/* > */ +/* > If tau = 0, then H is taken to be the unit matrix. */ +/* > */ +/* > To apply H**H (the conjugate transpose of H), supply conjg(tau) instead */ +/* > tau. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': form H * C */ +/* > = 'R': form C * H */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is COMPLEX array, dimension */ +/* > (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* > or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* > The vector v in the representation of H. V is not used if */ +/* > TAU = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCV */ +/* > \verbatim */ +/* > INCV is INTEGER */ +/* > The increment between elements of v. INCV <> 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX */ +/* > The value tau in the representation of H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* > or C * H if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension */ +/* > (N) if SIDE = 'L' */ +/* > or (M) if SIDE = 'R' */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup larf1f */ + +/* ===================================================================== */ +/* Subroutine */ int clarf1f_(char *side, integer *m, integer *n, complex *v, + integer *incv, complex *tau, complex *c__, integer *ldc, complex * + work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1, i__2, i__3; + complex q__1, q__2, q__3; + + /* Local variables */ + integer i__; + logical applyleft; + extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, integer *), + cscal_(integer *, complex *, complex *, integer *), cgemv_(char * + , integer *, integer *, complex *, complex *, integer *, complex * + , integer *, complex *, complex *, integer *); + extern logical lsame_(char *, char *); + integer lastc; + extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *); + integer lastv; + extern integer ilaclc_(integer *, integer *, complex *, integer *), + ilaclr_(integer *, integer *, complex *, integer *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, "L"); + lastv = 1; + lastc = 0; + if (tau->r != 0.f || tau->i != 0.f) { +/* Set up variables for scanning V. LASTV begins pointing to the end */ +/* of V up to V(1). */ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + if (*incv > 0) { + i__ = (lastv - 1) * *incv + 1; + } else { + i__ = 1; + } +/* Look for the last non-zero row in V. */ + for(;;) { /* while(complicated condition) */ + i__1 = i__; + if (!(lastv > 1 && (v[i__1].r == 0.f && v[i__1].i == 0.f))) + break; + --lastv; + i__ -= *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc); + } + } + if (lastc == 0) { + return 0; + } + if (applyleft) { + +/* Form H * C */ + + if (lastv == 1) { + +/* C(1,1:lastc) := ( 1 - tau ) * C(1,1:lastc) */ + + q__1.r = 1.f - tau->r, q__1.i = 0.f - tau->i; + cscal_(&lastc, &q__1, &c__[c_offset], ldc); + } else { + +/* w(1:lastc,1) := C(2:lastv,1:lastc)**H * v(2:lastv,1) */ + + i__1 = lastv - 1; + cgemv_("Conjugate transpose", &i__1, &lastc, &c_b1, &c__[c_dim1 + + 2], ldc, &v[*incv + 1], incv, &c_b2, &work[1], &c__1); + +/* w(1:lastc,1) += v(1,1) * C(1,1:lastc)**H */ + + i__1 = lastc; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + r_cnjg(&q__2, &c__[i__ * c_dim1 + 1]); + q__1.r = work[i__3].r + q__2.r, q__1.i = work[i__3].i + + q__2.i; + work[i__2].r = q__1.r, work[i__2].i = q__1.i; + } + +/* C(1, 1:lastc) += - tau * v(1,1) * w(1:lastc,1)**H */ + + i__1 = lastc; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ * c_dim1 + 1; + i__3 = i__ * c_dim1 + 1; + r_cnjg(&q__3, &work[i__]); + q__2.r = tau->r * q__3.r - tau->i * q__3.i, q__2.i = tau->r * + q__3.i + tau->i * q__3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + } + +/* C(2:lastv,1:lastc) += - tau * v(2:lastv,1) * w(1:lastc,1)**H */ + + i__1 = lastv - 1; + q__1.r = -tau->r, q__1.i = -tau->i; + cgerc_(&i__1, &lastc, &q__1, &v[*incv + 1], incv, &work[1], &c__1, + &c__[c_dim1 + 2], ldc); + } + } else { + +/* Form C * H */ + + if (lastv == 1) { + +/* C(1:lastc,1) := ( 1 - tau ) * C(1:lastc,1) */ + + q__1.r = 1.f - tau->r, q__1.i = 0.f - tau->i; + cscal_(&lastc, &q__1, &c__[c_offset], &c__1); + } else { + +/* w(1:lastc,1) := C(1:lastc,2:lastv) * v(2:lastv,1) */ + + i__1 = lastv - 1; + cgemv_("No transpose", &lastc, &i__1, &c_b1, &c__[(c_dim1 << 1) + + 1], ldc, &v[*incv + 1], incv, &c_b2, &work[1], &c__1); + +/* w(1:lastc,1) += v(1,1) * C(1:lastc,1) */ + + caxpy_(&lastc, &c_b1, &c__[c_offset], &c__1, &work[1], &c__1); + +/* C(1:lastc,1) += - tau * v(1,1) * w(1:lastc,1) */ + + q__1.r = -tau->r, q__1.i = -tau->i; + caxpy_(&lastc, &q__1, &work[1], &c__1, &c__[c_offset], &c__1); + +/* C(1:lastc,2:lastv) += - tau * w(1:lastc,1) * v(2:lastv)**H */ + + i__1 = lastv - 1; + q__1.r = -tau->r, q__1.i = -tau->i; + cgerc_(&lastc, &i__1, &q__1, &work[1], &c__1, &v[*incv + 1], incv, + &c__[(c_dim1 << 1) + 1], ldc); + } + } + return 0; + +/* End of CLARF1F */ + +} /* clarf1f_ */ + diff --git a/lapack-netlib/SRC/clarf1f.f b/lapack-netlib/SRC/clarf1f.f new file mode 100644 index 0000000000..b5d4e47a46 --- /dev/null +++ b/lapack-netlib/SRC/clarf1f.f @@ -0,0 +1,266 @@ +*> \brief \b CLARF1F applies an elementary reflector to a general rectangular +* matrix assuming v(1) = 1. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> Download CLARF1F + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +* +* Definition: +* =========== +* +* SUBROUTINE CLARF1F( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* COMPLEX TAU +* .. +* .. Array Arguments .. +* COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARF1F applies a complex elementary reflector H to a complex m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar and v is a complex vector assuming v(1) = 1. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead +*> tau. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larf1f +* +* ===================================================================== + SUBROUTINE CLARF1F( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) + IMPLICIT NONE +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CGEMV, CGERC, CSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILACLR, ILACLC + EXTERNAL LSAME, ILACLR, ILACLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 1 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V up to V(1). + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.1 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILACLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILACLR(M, LASTV, C, LDC) + END IF + END IF + IF( LASTC.EQ.0 ) THEN + RETURN + END IF + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.EQ.1 ) THEN +* +* C(1,1:lastc) := ( 1 - tau ) * C(1,1:lastc) +* + CALL CSCAL( LASTC, ONE - TAU, C, LDC ) + ELSE +* +* w(1:lastc,1) := C(2:lastv,1:lastc)**H * v(2:lastv,1) +* + CALL CGEMV( 'Conjugate transpose', LASTV - 1, LASTC, ONE, + $ C( 2, 1 ), LDC, V( 1 + INCV ), INCV, ZERO, + $ WORK, 1 ) +* +* w(1:lastc,1) += v(1,1) * C(1,1:lastc)**H +* + DO I = 1, LASTC + WORK( I ) = WORK( I ) + CONJG( C( 1, I ) ) + END DO +* +* C(1, 1:lastc) += - tau * v(1,1) * w(1:lastc,1)**H +* + DO I = 1, LASTC + C( 1, I ) = C( 1, I ) - TAU * CONJG( WORK( I ) ) + END DO +* +* C(2:lastv,1:lastc) += - tau * v(2:lastv,1) * w(1:lastc,1)**H +* + CALL CGERC( LASTV - 1, LASTC, -TAU, V( 1 + INCV ), INCV, + $ WORK, 1, C( 2, 1 ), LDC ) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.EQ.1 ) THEN +* +* C(1:lastc,1) := ( 1 - tau ) * C(1:lastc,1) +* + CALL CSCAL( LASTC, ONE - TAU, C, 1 ) + ELSE +* +* w(1:lastc,1) := C(1:lastc,2:lastv) * v(2:lastv,1) +* + CALL CGEMV( 'No transpose', LASTC, LASTV - 1, ONE, + $ C( 1, 2 ), LDC, V( 1 + INCV ), INCV, ZERO, + $ WORK, 1 ) +* +* w(1:lastc,1) += v(1,1) * C(1:lastc,1) +* + CALL CAXPY( LASTC, ONE, C, 1, WORK, 1 ) +* +* C(1:lastc,1) += - tau * v(1,1) * w(1:lastc,1) +* + CALL CAXPY( LASTC, -TAU, WORK, 1, C, 1 ) +* +* C(1:lastc,2:lastv) += - tau * w(1:lastc,1) * v(2:lastv)**H +* + CALL CGERC( LASTC, LASTV - 1, -TAU, WORK, 1, + $ V( 1 + INCV ), INCV, C( 1, 2 ), LDC ) + END IF + END IF + RETURN +* +* End of CLARF1F +* + END diff --git a/lapack-netlib/SRC/clarf1l.c b/lapack-netlib/SRC/clarf1l.c new file mode 100644 index 0000000000..054d6416fd --- /dev/null +++ b/lapack-netlib/SRC/clarf1l.c @@ -0,0 +1,551 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* Table of constant values */ + +static complex c_b1 = {1.f,0.f}; +static complex c_b2 = {0.f,0.f}; +static integer c__1 = 1; + +/* > \brief \b CLARF1L applies an elementary reflector to a general rectangular */ +/* matrix assuming v(lastv) = 1, where lastv is the last non-zero */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > Download CLARF1L + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CLARF1L( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) */ + +/* CHARACTER SIDE */ +/* INTEGER INCV, LDC, M, N */ +/* COMPLEX TAU */ +/* COMPLEX C( LDC, * ), V( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CLARF1L applies a complex elementary reflector H to a complex m by n matrix */ +/* > C, from either the left or the right. H is represented in the form */ +/* > */ +/* > H = I - tau * v * v**H */ +/* > */ +/* > where tau is a real scalar and v is a real vector assuming v(lastv) = 1, */ +/* > where lastv is the last non-zero element. */ +/* > */ +/* > If tau = 0, then H is taken to be the unit matrix. */ +/* > */ +/* > To apply H**H (the conjugate transpose of H), supply conjg(tau) instead */ +/* > tau. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': form H * C */ +/* > = 'R': form C * H */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is COMPLEX array, dimension */ +/* > (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* > or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* > The vector v in the representation of H. V is not used if */ +/* > TAU = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCV */ +/* > \verbatim */ +/* > INCV is INTEGER */ +/* > The increment between elements of v. INCV > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX */ +/* > The value tau in the representation of H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* > or C * H if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension */ +/* > (N) if SIDE = 'L' */ +/* > or (M) if SIDE = 'R' */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup larf1f */ + +/* ===================================================================== */ +/* Subroutine */ int clarf1l_(char *side, integer *m, integer *n, complex *v, + integer *incv, complex *tau, complex *c__, integer *ldc, complex * + work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1, i__2, i__3; + complex q__1, q__2, q__3; + + /* Local variables */ + integer i__, j; + logical applyleft; + extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, integer *), + cscal_(integer *, complex *, complex *, integer *), cgemv_(char * + , integer *, integer *, complex *, complex *, integer *, complex * + , integer *, complex *, complex *, integer *); + extern logical lsame_(char *, char *); + integer lastc; + extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *); + integer lastv; + extern integer ilaclc_(integer *, integer *, complex *, integer *), + ilaclr_(integer *, integer *, complex *, integer *); + integer firstv; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, "L"); + firstv = 1; + lastc = 0; + if (tau->r != 0.f || tau->i != 0.f) { +/* Set up variables for scanning V. LASTV begins pointing to the end */ +/* of V up to V(1). */ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + i__ = 1; +/* Look for the last non-zero row in V. */ + for(;;) { /* while(complicated condition) */ + i__1 = i__; + if (!(lastv > firstv && (v[i__1].r == 0.f && v[i__1].i == 0.f))) + break; + ++firstv; + i__ += *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc); + } + } + if (lastc == 0) { + return 0; + } + if (applyleft) { + +/* Form H * C */ + + if (lastv == firstv) { + +/* C(lastv,1:lastc) := ( 1 - tau ) * C(lastv,1:lastc) */ + + q__1.r = 1.f - tau->r, q__1.i = 0.f - tau->i; + cscal_(&lastc, &q__1, &c__[lastv + c_dim1], ldc); + } else { + +/* w(1:lastc,1) := C(firstv:lastv-1,1:lastc)**T * v(firstv:lastv-1,1) */ + + i__1 = lastv - firstv; + cgemv_("Conjugate transpose", &i__1, &lastc, &c_b1, &c__[firstv + + c_dim1], ldc, &v[i__], incv, &c_b2, &work[1], &c__1); + +/* w(1:lastc,1) += C(lastv,1:lastc)**H * v(lastv,1) */ + + i__1 = lastc; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + r_cnjg(&q__2, &c__[lastv + j * c_dim1]); + q__1.r = work[i__3].r + q__2.r, q__1.i = work[i__3].i + + q__2.i; + work[i__2].r = q__1.r, work[i__2].i = q__1.i; + } + +/* C(lastv,1:lastc) += - tau * v(lastv,1) * w(1:lastc,1)**H */ + + i__1 = lastc; + for (j = 1; j <= i__1; ++j) { + i__2 = lastv + j * c_dim1; + i__3 = lastv + j * c_dim1; + r_cnjg(&q__3, &work[j]); + q__2.r = tau->r * q__3.r - tau->i * q__3.i, q__2.i = tau->r * + q__3.i + tau->i * q__3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + } + +/* C(firstv:lastv-1,1:lastc) += - tau * v(firstv:lastv-1,1) * w(1:lastc,1)**H */ + + i__1 = lastv - firstv; + q__1.r = -tau->r, q__1.i = -tau->i; + cgerc_(&i__1, &lastc, &q__1, &v[i__], incv, &work[1], &c__1, &c__[ + firstv + c_dim1], ldc); + } + } else { + +/* Form C * H */ + + if (lastv == firstv) { + +/* C(1:lastc,lastv) := ( 1 - tau ) * C(1:lastc,lastv) */ + + q__1.r = 1.f - tau->r, q__1.i = 0.f - tau->i; + cscal_(&lastc, &q__1, &c__[lastv * c_dim1 + 1], &c__1); + } else { + +/* w(1:lastc,1) := C(1:lastc,firstv:lastv-1) * v(firstv:lastv-1,1) */ + + i__1 = lastv - firstv; + cgemv_("No transpose", &lastc, &i__1, &c_b1, &c__[firstv * c_dim1 + + 1], ldc, &v[i__], incv, &c_b2, &work[1], &c__1); + +/* w(1:lastc,1) += C(1:lastc,lastv) * v(lastv,1) */ + + caxpy_(&lastc, &c_b1, &c__[lastv * c_dim1 + 1], &c__1, &work[1], & + c__1); + +/* C(1:lastc,lastv) += - tau * v(lastv,1) * w(1:lastc,1) */ + + q__1.r = -tau->r, q__1.i = -tau->i; + caxpy_(&lastc, &q__1, &work[1], &c__1, &c__[lastv * c_dim1 + 1], & + c__1); + +/* C(1:lastc,firstv:lastv-1) += - tau * w(1:lastc,1) * v(firstv:lastv-1)**H */ + + i__1 = lastv - firstv; + q__1.r = -tau->r, q__1.i = -tau->i; + cgerc_(&lastc, &i__1, &q__1, &work[1], &c__1, &v[i__], incv, &c__[ + firstv * c_dim1 + 1], ldc); + } + } + return 0; + +/* End of CLARF1L */ + +} /* clarf1l_ */ + diff --git a/lapack-netlib/SRC/clarf1l.f b/lapack-netlib/SRC/clarf1l.f new file mode 100644 index 0000000000..a592255f16 --- /dev/null +++ b/lapack-netlib/SRC/clarf1l.f @@ -0,0 +1,264 @@ +*> \brief \b CLARF1L applies an elementary reflector to a general rectangular +* matrix assuming v(lastv) = 1, where lastv is the last non-zero +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> Download CLARF1L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +* +* Definition: +* =========== +* +* SUBROUTINE CLARF1L( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* COMPLEX TAU +* .. +* .. Array Arguments .. +* COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARF1L applies a complex elementary reflector H to a complex m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a real scalar and v is a real vector assuming v(lastv) = 1, +*> where lastv is the last non-zero element. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead +*> tau. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV > 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larf1f +* +* ===================================================================== + SUBROUTINE CLARF1L( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) + IMPLICIT NONE +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, J, LASTV, LASTC, FIRSTV +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CGEMV, CGERC, CSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILACLR, ILACLC + EXTERNAL LSAME, ILACLR, ILACLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + FIRSTV = 1 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V up to V(1). + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + I = 1 +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.FIRSTV .AND. V( I ).EQ.ZERO ) + FIRSTV = FIRSTV + 1 + I = I + INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILACLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILACLR(M, LASTV, C, LDC) + END IF + END IF + IF( LASTC.EQ.0 ) THEN + RETURN + END IF + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.EQ.FIRSTV ) THEN +* +* C(lastv,1:lastc) := ( 1 - tau ) * C(lastv,1:lastc) +* + CALL CSCAL( LASTC, ONE - TAU, C( LASTV, 1 ), LDC ) + ELSE +* +* w(1:lastc,1) := C(firstv:lastv-1,1:lastc)**T * v(firstv:lastv-1,1) +* + CALL CGEMV( 'Conjugate transpose', LASTV - FIRSTV, LASTC, + $ ONE, C( FIRSTV, 1 ), LDC, V( I ), INCV, ZERO, + $ WORK, 1 ) +* +* w(1:lastc,1) += C(lastv,1:lastc)**H * v(lastv,1) +* + DO J = 1, LASTC + WORK( J ) = WORK( J ) + CONJG( C( LASTV, J ) ) + END DO +* +* C(lastv,1:lastc) += - tau * v(lastv,1) * w(1:lastc,1)**H +* + DO J = 1, LASTC + C( LASTV, J ) = C( LASTV, J ) + $ - TAU * CONJG( WORK( J ) ) + END DO +* +* C(firstv:lastv-1,1:lastc) += - tau * v(firstv:lastv-1,1) * w(1:lastc,1)**H +* + CALL CGERC( LASTV - FIRSTV, LASTC, -TAU, V( I ), INCV, + $ WORK, 1, C( FIRSTV, 1 ), LDC) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.EQ.FIRSTV ) THEN +* +* C(1:lastc,lastv) := ( 1 - tau ) * C(1:lastc,lastv) +* + CALL CSCAL( LASTC, ONE - TAU, C( 1, LASTV ), 1 ) + ELSE +* +* w(1:lastc,1) := C(1:lastc,firstv:lastv-1) * v(firstv:lastv-1,1) +* + CALL CGEMV( 'No transpose', LASTC, LASTV - FIRSTV, ONE, + $ C( 1, FIRSTV ), LDC, V( I ), INCV, ZERO, + $ WORK, 1 ) +* +* w(1:lastc,1) += C(1:lastc,lastv) * v(lastv,1) +* + CALL CAXPY( LASTC, ONE, C( 1, LASTV ), 1, WORK, 1 ) +* +* C(1:lastc,lastv) += - tau * v(lastv,1) * w(1:lastc,1) +* + CALL CAXPY( LASTC, -TAU, WORK, 1, C( 1, LASTV ), 1 ) +* +* C(1:lastc,firstv:lastv-1) += - tau * w(1:lastc,1) * v(firstv:lastv-1)**H +* + CALL CGERC( LASTC, LASTV - FIRSTV, -TAU, WORK, 1, V( I ), + $ INCV, C( 1, FIRSTV ), LDC ) + END IF + END IF + RETURN +* +* End of CLARF1L +* + END diff --git a/lapack-netlib/SRC/clascl.f b/lapack-netlib/SRC/clascl.f index f9aace0bc4..33716bf758 100644 --- a/lapack-netlib/SRC/clascl.f +++ b/lapack-netlib/SRC/clascl.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CLASCL + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -136,10 +134,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERauxiliary +*> \ingroup lascl * * ===================================================================== - SUBROUTINE CLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) + SUBROUTINE CLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, + $ INFO ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -291,7 +291,7 @@ SUBROUTINE CLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) * * Lower triangular matrix * - DO 50 J = 1, N + DO 50 J = 1, MIN( M, N ) DO 40 I = J, M A( I, J ) = A( I, J )*MUL 40 CONTINUE diff --git a/lapack-netlib/SRC/ctgsen.f b/lapack-netlib/SRC/ctgsen.f index 180e96b322..2e79bfcc9b 100644 --- a/lapack-netlib/SRC/ctgsen.f +++ b/lapack-netlib/SRC/ctgsen.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CTGSEN + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -242,7 +240,7 @@ *> \verbatim *> LWORK is INTEGER *> The dimension of the array WORK. LWORK >= 1 -*> If IJOB = 1, 2 or 4, LWORK >= 2*M*(N-M) +*> If IJOB = 1, 2 or 4, LWORK >= 2*M*(N-M) + 1 *> If IJOB = 3 or 5, LWORK >= 4*M*(N-M) *> *> If LWORK = -1, then a workspace query is assumed; the routine @@ -427,9 +425,11 @@ *> 1996. *> * ===================================================================== - SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, + SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, + $ LDB, $ ALPHA, BETA, Q, LDQ, Z, LDZ, M, PL, PR, DIF, $ WORK, LWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -473,7 +473,8 @@ SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * .. * .. External Subroutines .. REAL SLAMCH - EXTERNAL CLACN2, CLACPY, CLASSQ, CSCAL, CTGEXC, CTGSYL, + EXTERNAL CLACN2, CLACPY, CLASSQ, CSCAL, CTGEXC, + $ CTGSYL, $ SLAMCH, XERBLA * .. * .. Intrinsic Functions .. @@ -531,7 +532,7 @@ SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, END IF * IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN - LWMIN = MAX( 1, 2*M*(N-M) ) + LWMIN = MAX( 1, 2*M*(N-M) + 1 ) LIWMIN = MAX( 1, N+2 ) ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN LWMIN = MAX( 1, 4*M*(N-M) ) @@ -593,7 +594,8 @@ SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * and Z that will swap adjacent diagonal blocks in (A, B). * IF( K.NE.KS ) - $ CALL CTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ CALL CTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, $ LDZ, K, KS, IERR ) * IF( IERR.GT.0 ) THEN @@ -623,7 +625,8 @@ SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, N2 = N - M I = N1 + 1 CALL CLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 ) - CALL CLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ), + CALL CLACPY( 'Full', N1, N2, B( 1, I ), LDB, + $ WORK( N1*N2+1 ), $ N1 ) IJB = 0 CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, @@ -665,14 +668,16 @@ SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Frobenius norm-based Difu estimate. * - CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), $ N1, DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), $ LWORK-2*N1*N2, IWORK, IERR ) * * Frobenius norm-based Difl estimate. * - CALL CTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK, + CALL CTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, $ N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ), $ N2, DSCALE, DIF( 2 ), WORK( N1*N2*2+1 ), $ LWORK-2*N1*N2, IWORK, IERR ) @@ -700,7 +705,8 @@ SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve generalized Sylvester equation * - CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), + $ LDA, $ WORK, N1, B, LDB, B( I, I ), LDB, $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, @@ -709,7 +715,8 @@ SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve the transposed variant. * - CALL CTGSYL( 'C', IJB, N1, N2, A, LDA, A( I, I ), LDA, + CALL CTGSYL( 'C', IJB, N1, N2, A, LDA, A( I, I ), + $ LDA, $ WORK, N1, B, LDB, B( I, I ), LDB, $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, @@ -729,7 +736,8 @@ SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve generalized Sylvester equation * - CALL CTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + CALL CTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, + $ LDA, $ WORK, N2, B( I, I ), LDB, B, LDB, $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, @@ -738,7 +746,8 @@ SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve the transposed variant. * - CALL CTGSYL( 'C', IJB, N2, N1, A( I, I ), LDA, A, LDA, + CALL CTGSYL( 'C', IJB, N2, N1, A( I, I ), LDA, A, + $ LDA, $ WORK, N2, B, LDB, B( I, I ), LDB, $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, diff --git a/lapack-netlib/SRC/ctrsyl3.f b/lapack-netlib/SRC/ctrsyl3.f index 586dc0207f..0538a36e4d 100644 --- a/lapack-netlib/SRC/ctrsyl3.f +++ b/lapack-netlib/SRC/ctrsyl3.f @@ -1,10 +1,23 @@ *> \brief \b CTRSYL3 * -* Definition: -* =========== +* Definition: +* =========== * +* SUBROUTINE CTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, +* C, LDC, SCALE, SWORK, LDSWORK, INFO ) * -*> \par Purpose +* .. Scalar Arguments .. +* CHARACTER TRANA, TRANB +* INTEGER INFO, ISGN, LDA, LDB, LDC, LDSWORK, M, N +* REAL SCALE +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* REAL SWORK( LDSWORK, * ) +* .. +* +* +*> \par Purpose: * ============= *> *> \verbatim @@ -22,8 +35,8 @@ *> This is the block version of the algorithm. *> \endverbatim * -* Arguments -* ========= +* Arguments: +* ========== * *> \param[in] TRANA *> \verbatim @@ -135,7 +148,7 @@ *> A and B are unchanged). *> \endverbatim * -*> \ingroup complexSYcomputational +*> \ingroup trsyl3 * * ===================================================================== * References: @@ -151,8 +164,8 @@ * Angelika Schwarz, Umea University, Sweden. * * ===================================================================== - SUBROUTINE CTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, - $ LDC, SCALE, SWORK, LDSWORK, INFO ) + SUBROUTINE CTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, + $ C, LDC, SCALE, SWORK, LDSWORK, INFO ) IMPLICIT NONE * * .. Scalar Arguments .. @@ -185,10 +198,12 @@ SUBROUTINE CTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, LOGICAL LSAME INTEGER ILAENV REAL CLANGE, SLAMCH, SLARMM - EXTERNAL CLANGE, ILAENV, LSAME, SLAMCH, SLARMM + EXTERNAL CLANGE, ILAENV, LSAME, SLAMCH, + $ SLARMM * .. * .. External Subroutines .. - EXTERNAL CSSCAL, CGEMM, CLASCL, CTRSYL, XERBLA + EXTERNAL CSSCAL, CGEMM, CLASCL, CTRSYL, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, EXPONENT, MAX, MIN, REAL @@ -214,9 +229,8 @@ SUBROUTINE CTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, INFO = 0 LQUERY = ( LDSWORK.EQ.-1 ) IF( LQUERY ) THEN - LDSWORK = 2 - SWORK(1,1) = MAX( NBA, NBB ) - SWORK(2,1) = 2 * NBB + NBA + SWORK(1,1) = REAL( MAX( NBA, NBB ) ) + SWORK(2,1) = REAL( 2 * NBB + NBA ) END IF * * Test the input arguments @@ -1068,8 +1082,8 @@ SUBROUTINE CTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, * form (1/SCALE)*X if SCALE is REAL. Set SCALE to * zero and give up. * - SWORK(1,1) = MAX( NBA, NBB ) - SWORK(2,1) = 2 * NBB + NBA + SWORK(1,1) = REAL( MAX( NBA, NBB ) ) + SWORK(2,1) = REAL( 2 * NBB + NBA ) RETURN END IF * @@ -1132,8 +1146,8 @@ SUBROUTINE CTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, * * Restore workspace dimensions * - SWORK(1,1) = MAX( NBA, NBB ) - SWORK(2,1) = 2 * NBB + NBA + SWORK(1,1) = REAL( MAX( NBA, NBB ) ) + SWORK(2,1) = REAL( 2 * NBB + NBA ) * RETURN * diff --git a/lapack-netlib/SRC/cunbdb.f b/lapack-netlib/SRC/cunbdb.f index b45dcfde6f..ee0cb2871e 100644 --- a/lapack-netlib/SRC/cunbdb.f +++ b/lapack-netlib/SRC/cunbdb.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNBDB + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -281,9 +279,11 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, + SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, + $ LDX12, $ X21, LDX21, X22, LDX22, THETA, PHI, TAUP1, $ TAUP2, TAUQ1, TAUQ2, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -306,8 +306,6 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, * .. Parameters .. REAL REALONE PARAMETER ( REALONE = 1.0E0 ) - COMPLEX ONE - PARAMETER ( ONE = (1.0E0,0.0E0) ) * .. * .. Local Scalars .. LOGICAL COLMAJOR, LQUERY @@ -315,7 +313,8 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, REAL Z1, Z2, Z3, Z4 * .. * .. External Subroutines .. - EXTERNAL CAXPY, CLARF, CLARFGP, CSCAL, XERBLA + EXTERNAL CAXPY, CLARF1F, CLARFGP, CSCAL, + $ XERBLA EXTERNAL CLACGV * * .. @@ -418,11 +417,11 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ SCNRM2( P-I+1, X11(I,I), 1 ) ) * IF( P .GT. I ) THEN - CALL CLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + CALL CLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, + $ TAUP1(I) ) ELSE IF ( P .EQ. I ) THEN CALL CLARFGP( P-I+1, X11(I,I), X11(I,I), 1, TAUP1(I) ) END IF - X11(I,I) = ONE IF ( M-P .GT. I ) THEN CALL CLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, $ TAUP2(I) ) @@ -430,19 +429,20 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL CLARFGP( M-P-I+1, X21(I,I), X21(I,I), 1, $ TAUP2(I) ) END IF - X21(I,I) = ONE * IF ( Q .GT. I ) THEN - CALL CLARF( 'L', P-I+1, Q-I, X11(I,I), 1, - $ CONJG(TAUP1(I)), X11(I,I+1), LDX11, WORK ) - CALL CLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, - $ CONJG(TAUP2(I)), X21(I,I+1), LDX21, WORK ) + CALL CLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, + $ CONJG(TAUP1(I)), X11(I,I+1), LDX11, + $ WORK ) + CALL CLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, + $ CONJG(TAUP2(I)), X21(I,I+1), LDX21, + $ WORK ) END IF IF ( M-Q+1 .GT. I ) THEN - CALL CLARF( 'L', P-I+1, M-Q-I+1, X11(I,I), 1, - $ CONJG(TAUP1(I)), X12(I,I), LDX12, WORK ) - CALL CLARF( 'L', M-P-I+1, M-Q-I+1, X21(I,I), 1, - $ CONJG(TAUP2(I)), X22(I,I), LDX22, WORK ) + CALL CLARF1F( 'L', P-I+1, M-Q-I+1, X11(I,I), 1, + $ CONJG(TAUP1(I)), X12(I,I), LDX12, WORK ) + CALL CLARF1F( 'L', M-P-I+1, M-Q-I+1, X21(I,I), 1, + $ CONJG(TAUP2(I)), X22(I,I), LDX22, WORK ) END IF * IF( I .LT. Q ) THEN @@ -451,7 +451,8 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL CAXPY( Q-I, CMPLX( Z2*Z3*COS(THETA(I)), 0.0E0 ), $ X21(I,I+1), LDX21, X11(I,I+1), LDX11 ) END IF - CALL CSCAL( M-Q-I+1, CMPLX( -Z1*Z4*SIN(THETA(I)), 0.0E0 ), + CALL CSCAL( M-Q-I+1, CMPLX( -Z1*Z4*SIN(THETA(I)), + $ 0.0E0 ), $ X12(I,I), LDX12 ) CALL CAXPY( M-Q-I+1, CMPLX( Z2*Z4*COS(THETA(I)), 0.0E0 ), $ X22(I,I), LDX22, X12(I,I), LDX12 ) @@ -469,7 +470,6 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL CLARFGP( Q-I, X11(I,I+1), X11(I,I+2), LDX11, $ TAUQ1(I) ) END IF - X11(I,I+1) = ONE END IF IF ( M-Q+1 .GT. I ) THEN CALL CLACGV( M-Q-I+1, X12(I,I), LDX12 ) @@ -481,21 +481,20 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ TAUQ2(I) ) END IF END IF - X12(I,I) = ONE * IF( I .LT. Q ) THEN - CALL CLARF( 'R', P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), - $ X11(I+1,I+1), LDX11, WORK ) - CALL CLARF( 'R', M-P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), - $ X21(I+1,I+1), LDX21, WORK ) + CALL CLARF1F( 'R', P-I, Q-I, X11(I,I+1), LDX11, + $ TAUQ1(I), X11(I+1,I+1), LDX11, WORK ) + CALL CLARF1F( 'R', M-P-I, Q-I, X11(I,I+1), LDX11, + $ TAUQ1(I), X21(I+1,I+1), LDX21, WORK ) END IF IF ( P .GT. I ) THEN - CALL CLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), - $ X12(I+1,I), LDX12, WORK ) + CALL CLARF1F( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X12(I+1,I), LDX12, WORK ) END IF IF ( M-P .GT. I ) THEN - CALL CLARF( 'R', M-P-I, M-Q-I+1, X12(I,I), LDX12, - $ TAUQ2(I), X22(I+1,I), LDX22, WORK ) + CALL CLARF1F( 'R', M-P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(I+1,I), LDX22, WORK ) END IF * IF( I .LT. Q ) @@ -518,15 +517,14 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL CLARFGP( M-Q-I+1, X12(I,I), X12(I,I+1), LDX12, $ TAUQ2(I) ) END IF - X12(I,I) = ONE * IF ( P .GT. I ) THEN - CALL CLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), - $ X12(I+1,I), LDX12, WORK ) + CALL CLARF1F( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X12(I+1,I), LDX12, WORK ) END IF IF( M-P-Q .GE. 1 ) - $ CALL CLARF( 'R', M-P-Q, M-Q-I+1, X12(I,I), LDX12, - $ TAUQ2(I), X22(Q+1,I), LDX22, WORK ) + $ CALL CLARF1F( 'R', M-P-Q, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(Q+1,I), LDX22, WORK ) * CALL CLACGV( M-Q-I+1, X12(I,I), LDX12 ) * @@ -541,9 +539,9 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL CLACGV( M-P-Q-I+1, X22(Q+I,P+I), LDX22 ) CALL CLARFGP( M-P-Q-I+1, X22(Q+I,P+I), X22(Q+I,P+I+1), $ LDX22, TAUQ2(P+I) ) - X22(Q+I,P+I) = ONE - CALL CLARF( 'R', M-P-Q-I, M-P-Q-I+1, X22(Q+I,P+I), LDX22, - $ TAUQ2(P+I), X22(Q+I+1,P+I), LDX22, WORK ) + CALL CLARF1F( 'R', M-P-Q-I, M-P-Q-I+1, X22(Q+I,P+I), + $ LDX22, TAUQ2(P+I), X22(Q+I+1,P+I), LDX22, + $ WORK ) * CALL CLACGV( M-P-Q-I+1, X22(Q+I,P+I), LDX22 ) * @@ -580,8 +578,8 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL CLACGV( P-I+1, X11(I,I), LDX11 ) CALL CLACGV( M-P-I+1, X21(I,I), LDX21 ) * - CALL CLARFGP( P-I+1, X11(I,I), X11(I,I+1), LDX11, TAUP1(I) ) - X11(I,I) = ONE + CALL CLARFGP( P-I+1, X11(I,I), X11(I,I+1), LDX11, + $ TAUP1(I) ) IF ( I .EQ. M-P ) THEN CALL CLARFGP( M-P-I+1, X21(I,I), X21(I,I), LDX21, $ TAUP2(I) ) @@ -589,16 +587,15 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL CLARFGP( M-P-I+1, X21(I,I), X21(I,I+1), LDX21, $ TAUP2(I) ) END IF - X21(I,I) = ONE * - CALL CLARF( 'R', Q-I, P-I+1, X11(I,I), LDX11, TAUP1(I), - $ X11(I+1,I), LDX11, WORK ) - CALL CLARF( 'R', M-Q-I+1, P-I+1, X11(I,I), LDX11, TAUP1(I), - $ X12(I,I), LDX12, WORK ) - CALL CLARF( 'R', Q-I, M-P-I+1, X21(I,I), LDX21, TAUP2(I), - $ X21(I+1,I), LDX21, WORK ) - CALL CLARF( 'R', M-Q-I+1, M-P-I+1, X21(I,I), LDX21, - $ TAUP2(I), X22(I,I), LDX22, WORK ) + CALL CLARF1F( 'R', Q-I, P-I+1, X11(I,I), LDX11, TAUP1(I), + $ X11(I+1,I), LDX11, WORK ) + CALL CLARF1F( 'R', M-Q-I+1, P-I+1, X11(I,I), LDX11, + $ TAUP1(I), X12(I,I), LDX12, WORK ) + CALL CLARF1F( 'R', Q-I, M-P-I+1, X21(I,I), LDX21, + $ TAUP2(I), X21(I+1,I), LDX21, WORK ) + CALL CLARF1F( 'R', M-Q-I+1, M-P-I+1, X21(I,I), LDX21, + $ TAUP2(I), X22(I,I), LDX22, WORK ) * CALL CLACGV( P-I+1, X11(I,I), LDX11 ) CALL CLACGV( M-P-I+1, X21(I,I), LDX21 ) @@ -609,7 +606,8 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL CAXPY( Q-I, CMPLX( Z2*Z3*COS(THETA(I)), 0.0E0 ), $ X21(I+1,I), 1, X11(I+1,I), 1 ) END IF - CALL CSCAL( M-Q-I+1, CMPLX( -Z1*Z4*SIN(THETA(I)), 0.0E0 ), + CALL CSCAL( M-Q-I+1, CMPLX( -Z1*Z4*SIN(THETA(I)), + $ 0.0E0 ), $ X12(I,I), 1 ) CALL CAXPY( M-Q-I+1, CMPLX( Z2*Z4*COS(THETA(I)), 0.0E0 ), $ X22(I,I), 1, X12(I,I), 1 ) @@ -619,24 +617,27 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ SCNRM2( M-Q-I+1, X12(I,I), 1 ) ) * IF( I .LT. Q ) THEN - CALL CLARFGP( Q-I, X11(I+1,I), X11(I+2,I), 1, TAUQ1(I) ) - X11(I+1,I) = ONE + CALL CLARFGP( Q-I, X11(I+1,I), X11(I+2,I), 1, + $ TAUQ1(I) ) END IF - CALL CLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, TAUQ2(I) ) - X12(I,I) = ONE + CALL CLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, + $ TAUQ2(I) ) * IF( I .LT. Q ) THEN - CALL CLARF( 'L', Q-I, P-I, X11(I+1,I), 1, - $ CONJG(TAUQ1(I)), X11(I+1,I+1), LDX11, WORK ) - CALL CLARF( 'L', Q-I, M-P-I, X11(I+1,I), 1, - $ CONJG(TAUQ1(I)), X21(I+1,I+1), LDX21, WORK ) + CALL CLARF1F( 'L', Q-I, P-I, X11(I+1,I), 1, + $ CONJG(TAUQ1(I)), X11(I+1,I+1), LDX11, + $ WORK ) + CALL CLARF1F( 'L', Q-I, M-P-I, X11(I+1,I), 1, + $ CONJG(TAUQ1(I)), X21(I+1,I+1), LDX21, + $ WORK ) END IF - CALL CLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, CONJG(TAUQ2(I)), - $ X12(I,I+1), LDX12, WORK ) + CALL CLARF1F( 'L', M-Q-I+1, P-I, X12(I,I), 1, + $ CONJG(TAUQ2(I)), X12(I,I+1), LDX12, WORK ) IF ( M-P .GT. I ) THEN - CALL CLARF( 'L', M-Q-I+1, M-P-I, X12(I,I), 1, - $ CONJG(TAUQ2(I)), X22(I,I+1), LDX22, WORK ) + CALL CLARF1F( 'L', M-Q-I+1, M-P-I, X12(I,I), 1, + $ CONJG(TAUQ2(I)), X22(I,I+1), LDX22, + $ WORK ) END IF END DO * @@ -644,17 +645,20 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, * DO I = Q + 1, P * - CALL CSCAL( M-Q-I+1, CMPLX( -Z1*Z4, 0.0E0 ), X12(I,I), 1 ) - CALL CLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, TAUQ2(I) ) - X12(I,I) = ONE + CALL CSCAL( M-Q-I+1, CMPLX( -Z1*Z4, 0.0E0 ), X12(I,I), + $ 1 ) + CALL CLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, + $ TAUQ2(I) ) * IF ( P .GT. I ) THEN - CALL CLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, - $ CONJG(TAUQ2(I)), X12(I,I+1), LDX12, WORK ) + CALL CLARF1F( 'L', M-Q-I+1, P-I, X12(I,I), 1, + $ CONJG(TAUQ2(I)), X12(I,I+1), LDX12, + $ WORK ) END IF IF( M-P-Q .GE. 1 ) - $ CALL CLARF( 'L', M-Q-I+1, M-P-Q, X12(I,I), 1, - $ CONJG(TAUQ2(I)), X22(I,Q+1), LDX22, WORK ) + $ CALL CLARF1F( 'L', M-Q-I+1, M-P-Q, X12(I,I), 1, + $ CONJG(TAUQ2(I)), X22(I,Q+1), LDX22, + $ WORK ) * END DO * @@ -666,11 +670,10 @@ SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ X22(P+I,Q+I), 1 ) CALL CLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I+1,Q+I), 1, $ TAUQ2(P+I) ) - X22(P+I,Q+I) = ONE IF ( M-P-Q .NE. I ) THEN - CALL CLARF( 'L', M-P-Q-I+1, M-P-Q-I, X22(P+I,Q+I), 1, - $ CONJG(TAUQ2(P+I)), X22(P+I,Q+I+1), LDX22, - $ WORK ) + CALL CLARF1F( 'L', M-P-Q-I+1, M-P-Q-I, X22(P+I,Q+I), + $ 1, CONJG(TAUQ2(P+I)), X22(P+I,Q+I+1), + $ LDX22, WORK ) END IF END DO * diff --git a/lapack-netlib/SRC/cunbdb1.f b/lapack-netlib/SRC/cunbdb1.f index a4875ab5ba..08b2fd8465 100644 --- a/lapack-netlib/SRC/cunbdb1.f +++ b/lapack-netlib/SRC/cunbdb1.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNBDB1 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -197,8 +195,10 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE CUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE CUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -215,10 +215,6 @@ SUBROUTINE CUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * * ==================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = (1.0E0,0.0E0) ) -* .. * .. Local Scalars .. REAL C, S INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, @@ -226,7 +222,8 @@ SUBROUTINE CUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, XERBLA + EXTERNAL CLARF1F, CLARFGP, CUNBDB5, CSROT, + $ XERBLA EXTERNAL CLACGV * .. * .. External Functions .. @@ -285,24 +282,24 @@ SUBROUTINE CUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, THETA(I) = ATAN2( REAL( X21(I,I) ), REAL( X11(I,I) ) ) C = COS( THETA(I) ) S = SIN( THETA(I) ) - X11(I,I) = ONE - X21(I,I) = ONE - CALL CLARF( 'L', P-I+1, Q-I, X11(I,I), 1, CONJG(TAUP1(I)), - $ X11(I,I+1), LDX11, WORK(ILARF) ) - CALL CLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, CONJG(TAUP2(I)), - $ X21(I,I+1), LDX21, WORK(ILARF) ) + CALL CLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, CONJG(TAUP1(I)), + $ X11(I,I+1), LDX11, WORK(ILARF) ) + CALL CLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, + $ CONJG(TAUP2(I)), X21(I,I+1), LDX21, + $ WORK(ILARF) ) * IF( I .LT. Q ) THEN CALL CSROT( Q-I, X11(I,I+1), LDX11, X21(I,I+1), LDX21, C, $ S ) CALL CLACGV( Q-I, X21(I,I+1), LDX21 ) - CALL CLARFGP( Q-I, X21(I,I+1), X21(I,I+2), LDX21, TAUQ1(I) ) + CALL CLARFGP( Q-I, X21(I,I+1), X21(I,I+2), LDX21, + $ TAUQ1(I) ) S = REAL( X21(I,I+1) ) - X21(I,I+1) = ONE - CALL CLARF( 'R', P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), - $ X11(I+1,I+1), LDX11, WORK(ILARF) ) - CALL CLARF( 'R', M-P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), - $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + CALL CLARF1F( 'R', P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + CALL CLARF1F( 'R', M-P-I, Q-I, X21(I,I+1), LDX21, + $ TAUQ1(I), X21(I+1,I+1), LDX21, + $ WORK(ILARF) ) CALL CLACGV( Q-I, X21(I,I+1), LDX21 ) C = SQRT( SCNRM2( P-I, X11(I+1,I+1), 1 )**2 $ + SCNRM2( M-P-I, X21(I+1,I+1), 1 )**2 ) diff --git a/lapack-netlib/SRC/cunbdb2.f b/lapack-netlib/SRC/cunbdb2.f index 6399964f8d..337e572a0d 100644 --- a/lapack-netlib/SRC/cunbdb2.f +++ b/lapack-netlib/SRC/cunbdb2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNBDB2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -197,8 +195,10 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE CUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE CUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -216,9 +216,8 @@ SUBROUTINE CUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * ==================================================================== * * .. Parameters .. - COMPLEX NEGONE, ONE - PARAMETER ( NEGONE = (-1.0E0,0.0E0), - $ ONE = (1.0E0,0.0E0) ) + COMPLEX NEGONE + PARAMETER ( NEGONE = (-1.0E0,0.0E0) ) * .. * .. Local Scalars .. REAL C, S @@ -227,7 +226,8 @@ SUBROUTINE CUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, CSCAL, CLACGV, + EXTERNAL CLARF1F, CLARFGP, CUNBDB5, CSROT, CSCAL, + $ CLACGV, $ XERBLA * .. * .. External Functions .. @@ -288,11 +288,10 @@ SUBROUTINE CUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, CALL CLACGV( Q-I+1, X11(I,I), LDX11 ) CALL CLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) C = REAL( X11(I,I) ) - X11(I,I) = ONE - CALL CLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X11(I+1,I), LDX11, WORK(ILARF) ) - CALL CLARF( 'R', M-P-I+1, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X21(I,I), LDX21, WORK(ILARF) ) + CALL CLARF1F( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL CLARF1F( 'R', M-P-I+1, Q-I+1, X11(I,I), LDX11, + $ TAUQ1(I), X21(I,I), LDX21, WORK(ILARF) ) CALL CLACGV( Q-I+1, X11(I,I), LDX11 ) S = SQRT( SCNRM2( P-I, X11(I+1,I), 1 )**2 $ + SCNRM2( M-P-I+1, X21(I,I), 1 )**2 ) @@ -308,13 +307,13 @@ SUBROUTINE CUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, PHI(I) = ATAN2( REAL( X11(I+1,I) ), REAL( X21(I,I) ) ) C = COS( PHI(I) ) S = SIN( PHI(I) ) - X11(I+1,I) = ONE - CALL CLARF( 'L', P-I, Q-I, X11(I+1,I), 1, CONJG(TAUP1(I)), - $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + CALL CLARF1F( 'L', P-I, Q-I, X11(I+1,I), 1, + $ CONJG(TAUP1(I)), X11(I+1,I+1), LDX11, + $ WORK(ILARF) ) END IF - X21(I,I) = ONE - CALL CLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, CONJG(TAUP2(I)), - $ X21(I,I+1), LDX21, WORK(ILARF) ) + CALL CLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, + $ CONJG(TAUP2(I)), X21(I,I+1), LDX21, + $ WORK(ILARF) ) * END DO * @@ -322,9 +321,9 @@ SUBROUTINE CUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * DO I = P + 1, Q CALL CLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) - X21(I,I) = ONE - CALL CLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, CONJG(TAUP2(I)), - $ X21(I,I+1), LDX21, WORK(ILARF) ) + CALL CLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, + $ CONJG(TAUP2(I)), X21(I,I+1), LDX21, + $ WORK(ILARF) ) END DO * RETURN diff --git a/lapack-netlib/SRC/cunbdb3.f b/lapack-netlib/SRC/cunbdb3.f index d024605979..c03de68257 100644 --- a/lapack-netlib/SRC/cunbdb3.f +++ b/lapack-netlib/SRC/cunbdb3.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNBDB3 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -197,8 +195,10 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE CUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE CUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -215,10 +215,6 @@ SUBROUTINE CUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * * ==================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = (1.0E0,0.0E0) ) -* .. * .. Local Scalars .. REAL C, S INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, @@ -226,7 +222,8 @@ SUBROUTINE CUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, CLACGV, XERBLA + EXTERNAL CLARF1F, CLARFGP, CUNBDB5, CSROT, CLACGV, + $ XERBLA * .. * .. External Functions .. REAL SCNRM2, SROUNDUP_LWORK @@ -287,11 +284,10 @@ SUBROUTINE CUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, CALL CLACGV( Q-I+1, X21(I,I), LDX21 ) CALL CLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) S = REAL( X21(I,I) ) - X21(I,I) = ONE - CALL CLARF( 'R', P-I+1, Q-I+1, X21(I,I), LDX21, TAUQ1(I), - $ X11(I,I), LDX11, WORK(ILARF) ) - CALL CLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), - $ X21(I+1,I), LDX21, WORK(ILARF) ) + CALL CLARF1F( 'R', P-I+1, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I,I), LDX11, WORK(ILARF) ) + CALL CLARF1F( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) CALL CLACGV( Q-I+1, X21(I,I), LDX21 ) C = SQRT( SCNRM2( P-I+1, X11(I,I), 1 )**2 $ + SCNRM2( M-P-I, X21(I+1,I), 1 )**2 ) @@ -302,17 +298,17 @@ SUBROUTINE CUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ WORK(IORBDB5), LORBDB5, CHILDINFO ) CALL CLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) IF( I .LT. M-P ) THEN - CALL CLARFGP( M-P-I, X21(I+1,I), X21(I+2,I), 1, TAUP2(I) ) + CALL CLARFGP( M-P-I, X21(I+1,I), X21(I+2,I), 1, + $ TAUP2(I) ) PHI(I) = ATAN2( REAL( X21(I+1,I) ), REAL( X11(I,I) ) ) C = COS( PHI(I) ) S = SIN( PHI(I) ) - X21(I+1,I) = ONE - CALL CLARF( 'L', M-P-I, Q-I, X21(I+1,I), 1, CONJG(TAUP2(I)), - $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + CALL CLARF1F( 'L', M-P-I, Q-I, X21(I+1,I), 1, + $ CONJG(TAUP2(I)), X21(I+1,I+1), LDX21, + $ WORK(ILARF) ) END IF - X11(I,I) = ONE - CALL CLARF( 'L', P-I+1, Q-I, X11(I,I), 1, CONJG(TAUP1(I)), - $ X11(I,I+1), LDX11, WORK(ILARF) ) + CALL CLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, CONJG(TAUP1(I)), + $ X11(I,I+1), LDX11, WORK(ILARF) ) * END DO * @@ -320,9 +316,8 @@ SUBROUTINE CUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * DO I = M-P + 1, Q CALL CLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) - X11(I,I) = ONE - CALL CLARF( 'L', P-I+1, Q-I, X11(I,I), 1, CONJG(TAUP1(I)), - $ X11(I,I+1), LDX11, WORK(ILARF) ) + CALL CLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, CONJG(TAUP1(I)), + $ X11(I,I+1), LDX11, WORK(ILARF) ) END DO * RETURN diff --git a/lapack-netlib/SRC/cunbdb4.f b/lapack-netlib/SRC/cunbdb4.f index 33acc1ee51..16e71860c1 100644 --- a/lapack-netlib/SRC/cunbdb4.f +++ b/lapack-netlib/SRC/cunbdb4.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNBDB4 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -207,9 +205,11 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE CUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE CUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, $ INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -227,8 +227,8 @@ SUBROUTINE CUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * ==================================================================== * * .. Parameters .. - COMPLEX NEGONE, ONE, ZERO - PARAMETER ( NEGONE = (-1.0E0,0.0E0), ONE = (1.0E0,0.0E0), + COMPLEX NEGONE, ZERO + PARAMETER ( NEGONE = (-1.0E0,0.0E0), $ ZERO = (0.0E0,0.0E0) ) * .. * .. Local Scalars .. @@ -238,7 +238,8 @@ SUBROUTINE CUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, CSCAL, CLACGV, + EXTERNAL CLARF1F, CLARFGP, CUNBDB5, CSROT, CSCAL, + $ CLACGV, $ XERBLA * .. * .. External Functions .. @@ -302,44 +303,43 @@ SUBROUTINE CUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ LORBDB5, CHILDINFO ) CALL CSCAL( P, NEGONE, PHANTOM(1), 1 ) CALL CLARFGP( P, PHANTOM(1), PHANTOM(2), 1, TAUP1(1) ) - CALL CLARFGP( M-P, PHANTOM(P+1), PHANTOM(P+2), 1, TAUP2(1) ) + CALL CLARFGP( M-P, PHANTOM(P+1), PHANTOM(P+2), 1, + $ TAUP2(1) ) THETA(I) = ATAN2( REAL( PHANTOM(1) ), REAL( PHANTOM(P+1) ) ) C = COS( THETA(I) ) S = SIN( THETA(I) ) - PHANTOM(1) = ONE - PHANTOM(P+1) = ONE - CALL CLARF( 'L', P, Q, PHANTOM(1), 1, CONJG(TAUP1(1)), X11, - $ LDX11, WORK(ILARF) ) - CALL CLARF( 'L', M-P, Q, PHANTOM(P+1), 1, CONJG(TAUP2(1)), - $ X21, LDX21, WORK(ILARF) ) + CALL CLARF1F( 'L', P, Q, PHANTOM(1), 1, CONJG(TAUP1(1)), + $ X11, LDX11, WORK(ILARF) ) + CALL CLARF1F( 'L', M-P, Q, PHANTOM(P+1), 1, + $ CONJG(TAUP2(1)), X21, LDX21, WORK(ILARF) ) ELSE CALL CUNBDB5( P-I+1, M-P-I+1, Q-I+1, X11(I,I-1), 1, $ X21(I,I-1), 1, X11(I,I), LDX11, X21(I,I), $ LDX21, WORK(IORBDB5), LORBDB5, CHILDINFO ) CALL CSCAL( P-I+1, NEGONE, X11(I,I-1), 1 ) - CALL CLARFGP( P-I+1, X11(I,I-1), X11(I+1,I-1), 1, TAUP1(I) ) + CALL CLARFGP( P-I+1, X11(I,I-1), X11(I+1,I-1), 1, + $ TAUP1(I) ) CALL CLARFGP( M-P-I+1, X21(I,I-1), X21(I+1,I-1), 1, $ TAUP2(I) ) THETA(I) = ATAN2( REAL( X11(I,I-1) ), REAL( X21(I,I-1) ) ) C = COS( THETA(I) ) S = SIN( THETA(I) ) - X11(I,I-1) = ONE - X21(I,I-1) = ONE - CALL CLARF( 'L', P-I+1, Q-I+1, X11(I,I-1), 1, - $ CONJG(TAUP1(I)), X11(I,I), LDX11, WORK(ILARF) ) - CALL CLARF( 'L', M-P-I+1, Q-I+1, X21(I,I-1), 1, - $ CONJG(TAUP2(I)), X21(I,I), LDX21, WORK(ILARF) ) + CALL CLARF1F( 'L', P-I+1, Q-I+1, X11(I,I-1), 1, + $ CONJG(TAUP1(I)), X11(I,I), LDX11, + $ WORK(ILARF) ) + CALL CLARF1F( 'L', M-P-I+1, Q-I+1, X21(I,I-1), 1, + $ CONJG(TAUP2(I)), X21(I,I), LDX21, + $ WORK(ILARF) ) END IF * CALL CSROT( Q-I+1, X11(I,I), LDX11, X21(I,I), LDX21, S, -C ) CALL CLACGV( Q-I+1, X21(I,I), LDX21 ) CALL CLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) C = REAL( X21(I,I) ) - X21(I,I) = ONE - CALL CLARF( 'R', P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), - $ X11(I+1,I), LDX11, WORK(ILARF) ) - CALL CLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), - $ X21(I+1,I), LDX21, WORK(ILARF) ) + CALL CLARF1F( 'R', P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL CLARF1F( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) CALL CLACGV( Q-I+1, X21(I,I), LDX21 ) IF( I .LT. M-Q ) THEN S = SQRT( SCNRM2( P-I, X11(I+1,I), 1 )**2 @@ -354,11 +354,10 @@ SUBROUTINE CUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, DO I = M - Q + 1, P CALL CLACGV( Q-I+1, X11(I,I), LDX11 ) CALL CLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) - X11(I,I) = ONE - CALL CLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X11(I+1,I), LDX11, WORK(ILARF) ) - CALL CLARF( 'R', Q-P, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X21(M-Q+1,I), LDX21, WORK(ILARF) ) + CALL CLARF1F( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL CLARF1F( 'R', Q-P, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X21(M-Q+1,I), LDX21, WORK(ILARF) ) CALL CLACGV( Q-I+1, X11(I,I), LDX11 ) END DO * @@ -366,11 +365,12 @@ SUBROUTINE CUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * DO I = P + 1, Q CALL CLACGV( Q-I+1, X21(M-Q+I-P,I), LDX21 ) - CALL CLARFGP( Q-I+1, X21(M-Q+I-P,I), X21(M-Q+I-P,I+1), LDX21, + CALL CLARFGP( Q-I+1, X21(M-Q+I-P,I), X21(M-Q+I-P,I+1), + $ LDX21, $ TAUQ1(I) ) - X21(M-Q+I-P,I) = ONE - CALL CLARF( 'R', Q-I, Q-I+1, X21(M-Q+I-P,I), LDX21, TAUQ1(I), - $ X21(M-Q+I-P+1,I), LDX21, WORK(ILARF) ) + CALL CLARF1F( 'R', Q-I, Q-I+1, X21(M-Q+I-P,I), LDX21, + $ TAUQ1(I), X21(M-Q+I-P+1,I), LDX21, + $ WORK(ILARF) ) CALL CLACGV( Q-I+1, X21(M-Q+I-P,I), LDX21 ) END DO * diff --git a/lapack-netlib/SRC/cung2l.f b/lapack-netlib/SRC/cung2l.f index a05843a5d3..477876cc5c 100644 --- a/lapack-netlib/SRC/cung2l.f +++ b/lapack-netlib/SRC/cung2l.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNG2L + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,10 +105,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup ung2l * * ===================================================================== SUBROUTINE CUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -134,7 +133,7 @@ SUBROUTINE CUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, II, J, L * .. * .. External Subroutines .. - EXTERNAL CLARF, CSCAL, XERBLA + EXTERNAL CLARF1L, CSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -178,8 +177,8 @@ SUBROUTINE CUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) * Apply H(i) to A(1:m-k+i,1:n-k+i) from the left * A( M-N+II, II ) = ONE - CALL CLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, - $ LDA, WORK ) + CALL CLARF1L( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), + $ A, LDA, WORK ) CALL CSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) A( M-N+II, II ) = ONE - TAU( I ) * diff --git a/lapack-netlib/SRC/cung2r.f b/lapack-netlib/SRC/cung2r.f index a984818c1e..d48e050aa9 100644 --- a/lapack-netlib/SRC/cung2r.f +++ b/lapack-netlib/SRC/cung2r.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNG2R + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,10 +105,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup ung2r * * ===================================================================== SUBROUTINE CUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -134,7 +133,7 @@ SUBROUTINE CUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, J, L * .. * .. External Subroutines .. - EXTERNAL CLARF, CSCAL, XERBLA + EXTERNAL CLARF1F, CSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -177,9 +176,8 @@ SUBROUTINE CUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) * Apply H(i) to A(i:m,i:n) from the left * IF( I.LT.N ) THEN - A( I, I ) = ONE - CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), - $ A( I, I+1 ), LDA, WORK ) + CALL CLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) END IF IF( I.LT.M ) $ CALL CSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) diff --git a/lapack-netlib/SRC/cungl2.f b/lapack-netlib/SRC/cungl2.f index 81a3b89cd8..8b5b2b8457 100644 --- a/lapack-netlib/SRC/cungl2.f +++ b/lapack-netlib/SRC/cungl2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNGL2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -106,10 +104,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup ungl2 * * ===================================================================== SUBROUTINE CUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -133,7 +132,7 @@ SUBROUTINE CUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, J, L * .. * .. External Subroutines .. - EXTERNAL CLACGV, CLARF, CSCAL, XERBLA + EXTERNAL CLACGV, CLARF1F, CSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX @@ -182,9 +181,9 @@ SUBROUTINE CUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) IF( I.LT.N ) THEN CALL CLACGV( N-I, A( I, I+1 ), LDA ) IF( I.LT.M ) THEN - A( I, I ) = ONE - CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, - $ CONJG( TAU( I ) ), A( I+1, I ), LDA, WORK ) + CALL CLARF1F( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ CONJG( TAU( I ) ), A( I+1, I ), LDA, + $ WORK ) END IF CALL CSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) CALL CLACGV( N-I, A( I, I+1 ), LDA ) diff --git a/lapack-netlib/SRC/cungr2.f b/lapack-netlib/SRC/cungr2.f index 1f2f2b4610..e421117f02 100644 --- a/lapack-netlib/SRC/cungr2.f +++ b/lapack-netlib/SRC/cungr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNGR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,10 +105,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup ungr2 * * ===================================================================== SUBROUTINE CUNGR2( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -134,7 +133,7 @@ SUBROUTINE CUNGR2( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, II, J, L * .. * .. External Subroutines .. - EXTERNAL CLACGV, CLARF, CSCAL, XERBLA + EXTERNAL CLACGV, CLARF1L, CSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX @@ -183,8 +182,8 @@ SUBROUTINE CUNGR2( M, N, K, A, LDA, TAU, WORK, INFO ) * CALL CLACGV( N-M+II-1, A( II, 1 ), LDA ) A( II, N-M+II ) = ONE - CALL CLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, - $ CONJG( TAU( I ) ), A, LDA, WORK ) + CALL CLARF1L( 'Right', II-1, N-M+II, A( II, 1 ), LDA, + $ CONJG( TAU( I ) ), A, LDA, WORK ) CALL CSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA ) CALL CLACGV( N-M+II-1, A( II, 1 ), LDA ) A( II, N-M+II ) = ONE - CONJG( TAU( I ) ) diff --git a/lapack-netlib/SRC/cunm2l.f b/lapack-netlib/SRC/cunm2l.f index 416c0a0c36..0b6ffc8ca0 100644 --- a/lapack-netlib/SRC/cunm2l.f +++ b/lapack-netlib/SRC/cunm2l.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNM2L + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -151,11 +149,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unm2l * * ===================================================================== SUBROUTINE CUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -171,21 +170,17 @@ SUBROUTINE CUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * ===================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) -* .. * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, MI, NI, NQ - COMPLEX AII, TAUI + COMPLEX TAUI * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL CLARF, XERBLA + EXTERNAL CLARF1L, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX @@ -266,10 +261,8 @@ SUBROUTINE CUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, ELSE TAUI = CONJG( TAU( I ) ) END IF - AII = A( NQ-K+I, I ) - A( NQ-K+I, I ) = ONE - CALL CLARF( SIDE, MI, NI, A( 1, I ), 1, TAUI, C, LDC, WORK ) - A( NQ-K+I, I ) = AII + CALL CLARF1L( SIDE, MI, NI, A( 1, I ), 1, TAUI, C, LDC, + $ WORK ) 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/cunm2r.f b/lapack-netlib/SRC/cunm2r.f index a79e9a78d0..c357356235 100644 --- a/lapack-netlib/SRC/cunm2r.f +++ b/lapack-netlib/SRC/cunm2r.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNM2R + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -151,11 +149,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unm2r * * ===================================================================== SUBROUTINE CUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -171,21 +170,17 @@ SUBROUTINE CUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * ===================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) -* .. * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ - COMPLEX AII, TAUI + COMPLEX TAUI * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL CLARF, XERBLA + EXTERNAL CLARF1F, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX @@ -270,11 +265,8 @@ SUBROUTINE CUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, ELSE TAUI = CONJG( TAU( I ) ) END IF - AII = A( I, I ) - A( I, I ) = ONE - CALL CLARF( SIDE, MI, NI, A( I, I ), 1, TAUI, C( IC, JC ), LDC, - $ WORK ) - A( I, I ) = AII + CALL CLARF1F( SIDE, MI, NI, A( I, I ), 1, TAUI, C( IC, JC ), + $ LDC, WORK ) 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/cunml2.f b/lapack-netlib/SRC/cunml2.f index 6af8cc0358..edd0a39ef2 100644 --- a/lapack-netlib/SRC/cunml2.f +++ b/lapack-netlib/SRC/cunml2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNML2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -151,11 +149,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unml2 * * ===================================================================== SUBROUTINE CUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -171,21 +170,17 @@ SUBROUTINE CUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * ===================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) -* .. * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ - COMPLEX AII, TAUI + COMPLEX TAUI * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL CLACGV, CLARF, XERBLA + EXTERNAL CLACGV, CLARF1F, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX @@ -272,11 +267,8 @@ SUBROUTINE CUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, END IF IF( I.LT.NQ ) $ CALL CLACGV( NQ-I, A( I, I+1 ), LDA ) - AII = A( I, I ) - A( I, I ) = ONE - CALL CLARF( SIDE, MI, NI, A( I, I ), LDA, TAUI, C( IC, JC ), - $ LDC, WORK ) - A( I, I ) = AII + CALL CLARF1F( SIDE, MI, NI, A( I, I ), LDA, TAUI, C( IC, + $ JC ), LDC, WORK ) IF( I.LT.NQ ) $ CALL CLACGV( NQ-I, A( I, I+1 ), LDA ) 10 CONTINUE diff --git a/lapack-netlib/SRC/cunmr2.f b/lapack-netlib/SRC/cunmr2.f index ebd4cfbb64..ca4f9fd6f8 100644 --- a/lapack-netlib/SRC/cunmr2.f +++ b/lapack-netlib/SRC/cunmr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUNMR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -151,11 +149,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup unmr2 * * ===================================================================== SUBROUTINE CUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -171,21 +170,17 @@ SUBROUTINE CUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * ===================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) -* .. * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, MI, NI, NQ - COMPLEX AII, TAUI + COMPLEX TAUI * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL CLACGV, CLARF, XERBLA + EXTERNAL CLACGV, CLARF1L, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX @@ -267,10 +262,8 @@ SUBROUTINE CUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, TAUI = TAU( I ) END IF CALL CLACGV( NQ-K+I-1, A( I, 1 ), LDA ) - AII = A( I, NQ-K+I ) - A( I, NQ-K+I ) = ONE - CALL CLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAUI, C, LDC, WORK ) - A( I, NQ-K+I ) = AII + CALL CLARF1L( SIDE, MI, NI, A( I, 1 ), LDA, TAUI, C, LDC, + $ WORK ) CALL CLACGV( NQ-K+I-1, A( I, 1 ), LDA ) 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/cupmtr.f b/lapack-netlib/SRC/cupmtr.f index 2629e91792..b4fb38e854 100644 --- a/lapack-netlib/SRC/cupmtr.f +++ b/lapack-netlib/SRC/cupmtr.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CUPMTR + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -142,11 +140,13 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup upmtr * * ===================================================================== - SUBROUTINE CUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, + SUBROUTINE CUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, + $ WORK, $ INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -162,21 +162,17 @@ SUBROUTINE CUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, * * ===================================================================== * -* .. Parameters .. - COMPLEX ONE - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) -* .. * .. Local Scalars .. LOGICAL FORWRD, LEFT, NOTRAN, UPPER INTEGER I, I1, I2, I3, IC, II, JC, MI, NI, NQ - COMPLEX AII, TAUI + COMPLEX TAUI * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL CLARF, XERBLA + EXTERNAL CLARF1F, CLARF1L, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG, MAX @@ -265,11 +261,8 @@ SUBROUTINE CUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, ELSE TAUI = CONJG( TAU( I ) ) END IF - AII = AP( II ) - AP( II ) = ONE - CALL CLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAUI, C, LDC, - $ WORK ) - AP( II ) = AII + CALL CLARF1L( SIDE, MI, NI, AP( II-I+1 ), 1, TAUI, C, + $ LDC, WORK ) * IF( FORWRD ) THEN II = II + I + 2 @@ -305,8 +298,6 @@ SUBROUTINE CUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, END IF * DO 20 I = I1, I2, I3 - AII = AP( II ) - AP( II ) = ONE IF( LEFT ) THEN * * H(i) or H(i)**H is applied to C(i+1:m,1:n) @@ -328,9 +319,8 @@ SUBROUTINE CUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, ELSE TAUI = CONJG( TAU( I ) ) END IF - CALL CLARF( SIDE, MI, NI, AP( II ), 1, TAUI, C( IC, JC ), - $ LDC, WORK ) - AP( II ) = AII + CALL CLARF1F( SIDE, MI, NI, AP( II ), 1, TAUI, C( IC, + $ JC ), LDC, WORK ) * IF( FORWRD ) THEN II = II + NQ - I + 1 diff --git a/lapack-netlib/SRC/dgebd2.f b/lapack-netlib/SRC/dgebd2.f index daaa187aff..b94bcc4784 100644 --- a/lapack-netlib/SRC/dgebd2.f +++ b/lapack-netlib/SRC/dgebd2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DGEBD2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -132,7 +130,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleGEcomputational +*> \ingroup gebd2 * *> \par Further Details: * ===================== @@ -186,6 +184,7 @@ *> * ===================================================================== SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -202,14 +201,14 @@ SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) * ===================================================================== * * .. Parameters .. - DOUBLE PRECISION ZERO, ONE - PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) * .. * .. Local Scalars .. INTEGER I * .. * .. External Subroutines .. - EXTERNAL DLARF, DLARFG, XERBLA + EXTERNAL DLARF1F, DLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -242,14 +241,13 @@ SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, $ TAUQ( I ) ) D( I ) = A( I, I ) - A( I, I ) = ONE * * Apply H(i) to A(i:m,i+1:n) from the left * IF( I.LT.N ) - $ CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ), + $ CALL DLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, + $ TAUQ( I ), $ A( I, I+1 ), LDA, WORK ) - A( I, I ) = D( I ) * IF( I.LT.N ) THEN * @@ -259,13 +257,11 @@ SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), $ LDA, TAUP( I ) ) E( I ) = A( I, I+1 ) - A( I, I+1 ) = ONE * * Apply G(i) to A(i+1:m,i+1:n) from the right * - CALL DLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + CALL DLARF1F( 'Right', M-I, N-I, A( I, I+1 ), LDA, $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) - A( I, I+1 ) = E( I ) ELSE TAUP( I ) = ZERO END IF @@ -278,33 +274,32 @@ SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) * * Generate elementary reflector G(i) to annihilate A(i,i+1:n) * - CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), + $ LDA, $ TAUP( I ) ) D( I ) = A( I, I ) - A( I, I ) = ONE * * Apply G(i) to A(i+1:m,i:n) from the right * IF( I.LT.M ) - $ CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ CALL DLARF1F( 'Right', M-I, N-I+1, A( I, I ), LDA, $ TAUP( I ), A( I+1, I ), LDA, WORK ) - A( I, I ) = D( I ) * IF( I.LT.M ) THEN * * Generate elementary reflector H(i) to annihilate * A(i+2:m,i) * - CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), + $ 1, $ TAUQ( I ) ) E( I ) = A( I+1, I ) - A( I+1, I ) = ONE * * Apply H(i) to A(i+1:m,i+1:n) from the left * - CALL DLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ), + CALL DLARF1F( 'Left', M-I, N-I, A( I+1, I ), 1, + $ TAUQ( I ), $ A( I+1, I+1 ), LDA, WORK ) - A( I+1, I ) = E( I ) ELSE TAUQ( I ) = ZERO END IF diff --git a/lapack-netlib/SRC/dgeev.f b/lapack-netlib/SRC/dgeev.f index 4677b9f520..b33f278d2d 100644 --- a/lapack-netlib/SRC/dgeev.f +++ b/lapack-netlib/SRC/dgeev.f @@ -228,7 +228,7 @@ SUBROUTINE DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, $ XERBLA * .. * .. External Functions .. - LOGICAL LSAME + LOGICAL LSAME, DISNAN INTEGER IDAMAX, ILAENV DOUBLE PRECISION DLAMCH, DLANGE, DLAPY2, DNRM2 EXTERNAL LSAME, IDAMAX, ILAENV, DLAMCH, DLANGE, DLAPY2, @@ -350,6 +350,10 @@ SUBROUTINE DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, ELSE IF( ANRM.GT.BIGNUM ) THEN SCALEA = .TRUE. CSCALE = BIGNUM + ELSE IF ( DISNAN (ANRM ) ) THEN + INFO = -4 + CALL XERBLA ( 'DGEEV', -INFO ) + RETURN END IF IF( SCALEA ) $ CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) diff --git a/lapack-netlib/SRC/dgehd2.f b/lapack-netlib/SRC/dgehd2.f index c71e38433f..eaaf091a60 100644 --- a/lapack-netlib/SRC/dgehd2.f +++ b/lapack-netlib/SRC/dgehd2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DGEHD2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -106,7 +104,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleGEcomputational +*> \ingroup gehd2 * *> \par Further Details: * ===================== @@ -146,6 +144,7 @@ *> * ===================================================================== SUBROUTINE DGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -166,10 +165,9 @@ SUBROUTINE DGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) * .. * .. Local Scalars .. INTEGER I - DOUBLE PRECISION AII * .. * .. External Subroutines .. - EXTERNAL DLARF, DLARFG, XERBLA + EXTERNAL DLARF1F, DLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -199,20 +197,17 @@ SUBROUTINE DGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) * CALL DLARFG( IHI-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, $ TAU( I ) ) - AII = A( I+1, I ) - A( I+1, I ) = ONE * * Apply H(i) to A(1:ihi,i+1:ihi) from the right * - CALL DLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + CALL DLARF1F( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), $ A( 1, I+1 ), LDA, WORK ) * * Apply H(i) to A(i+1:ihi,i+1:n) from the left * - CALL DLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, TAU( I ), + CALL DLARF1F( 'Left', IHI-I, N-I, A( I+1, I ), 1, TAU( I ), $ A( I+1, I+1 ), LDA, WORK ) * - A( I+1, I ) = AII 10 CONTINUE * RETURN diff --git a/lapack-netlib/SRC/dgelq2.f b/lapack-netlib/SRC/dgelq2.f index 9915c57d47..b7c8c933d8 100644 --- a/lapack-netlib/SRC/dgelq2.f +++ b/lapack-netlib/SRC/dgelq2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DGELQ2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -104,7 +102,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleGEcomputational +*> \ingroup gelq2 * *> \par Further Details: * ===================== @@ -126,6 +124,7 @@ *> * ===================================================================== SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -146,10 +145,9 @@ SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO ) * .. * .. Local Scalars .. INTEGER I, K - DOUBLE PRECISION AII * .. * .. External Subroutines .. - EXTERNAL DLARF, DLARFG, XERBLA + EXTERNAL DLARF1F, DLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -183,11 +181,9 @@ SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i) to A(i+1:m,i:n) from the right * - AII = A( I, I ) - A( I, I ) = ONE - CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), + CALL DLARF1F( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAU( I ), $ A( I+1, I ), LDA, WORK ) - A( I, I ) = AII END IF 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/dgeql2.f b/lapack-netlib/SRC/dgeql2.f index 2d3ce1419f..1b2f55a658 100644 --- a/lapack-netlib/SRC/dgeql2.f +++ b/lapack-netlib/SRC/dgeql2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DGEQL2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleGEcomputational +*> \ingroup geql2 * *> \par Further Details: * ===================== @@ -120,6 +118,7 @@ *> * ===================================================================== SUBROUTINE DGEQL2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -140,10 +139,9 @@ SUBROUTINE DGEQL2( M, N, A, LDA, TAU, WORK, INFO ) * .. * .. Local Scalars .. INTEGER I, K - DOUBLE PRECISION AII * .. * .. External Subroutines .. - EXTERNAL DLARF, DLARFG, XERBLA + EXTERNAL DLARF1L, DLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -177,11 +175,9 @@ SUBROUTINE DGEQL2( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left * - AII = A( M-K+I, N-K+I ) - A( M-K+I, N-K+I ) = ONE - CALL DLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, TAU( I ), + CALL DLARF1L( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, + $ TAU( I ), $ A, LDA, WORK ) - A( M-K+I, N-K+I ) = AII 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/dgeqp3rk.f b/lapack-netlib/SRC/dgeqp3rk.f index b8e41b39cd..8645f88ebb 100644 --- a/lapack-netlib/SRC/dgeqp3rk.f +++ b/lapack-netlib/SRC/dgeqp3rk.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DGEQP3RK + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -546,27 +544,19 @@ *> C. H. Bischof, Math. and Comp. Sci. Div., Argonne National Lab, USA. *> A BLAS-3 version of the QR factorization with column pivoting. *> LAPACK Working Note 114 -*> \htmlonly *> https://www.netlib.org/lapack/lawnspdf/lawn114.pdf -*> \endhtmlonly *> and in *> SIAM J. Sci. Comput., 19(5):1486-1494, Sept. 1998. -*> \htmlonly *> https://doi.org/10.1137/S1064827595296732 -*> \endhtmlonly *> *> [2] A partial column norm updating strategy developed in 2006. *> Z. Drmac and Z. Bujanovic, Dept. of Math., University of Zagreb, Croatia. *> On the failure of rank revealing QR factorization software – a case study. *> LAPACK Working Note 176. -*> \htmlonly *> http://www.netlib.org/lapack/lawnspdf/lawn176.pdf -*> \endhtmlonly *> and in *> ACM Trans. Math. Softw. 35, 2, Article 12 (July 2008), 28 pages. -*> \htmlonly *> https://doi.org/10.1145/1377612.1377616 -*> \endhtmlonly * *> \par Contributors: * ================== @@ -670,7 +660,7 @@ SUBROUTINE DGEQP3RK( M, N, NRHS, KMAX, ABSTOL, RELTOL, A, LDA, * 1) DGEQP3RK and DLAQP2RK: 2*N to store full and partial * column 2-norms. * 2) DLAQP2RK: N+NRHS-1 to use in WORK array that is used -* in DLARF subroutine inside DLAQP2RK to apply an +* in DLARF1F subroutine inside DLAQP2RK to apply an * elementary reflector from the left. * TOTAL_WORK_SIZE = 3*N + NRHS - 1 * @@ -686,7 +676,7 @@ SUBROUTINE DGEQP3RK( M, N, NRHS, KMAX, ABSTOL, RELTOL, A, LDA, * 1) DGEQP3RK, DLAQP2RK, DLAQP3RK: 2*N to store full and * partial column 2-norms. * 2) DLAQP2RK: N+NRHS-1 to use in WORK array that is used -* in DLARF subroutine to apply an elementary reflector +* in DLARF1F subroutine to apply an elementary reflector * from the left. * 3) DLAQP3RK: NB*(N+NRHS) to use in the work array F that * is used to apply a block reflector from @@ -886,7 +876,8 @@ SUBROUTINE DGEQP3RK( M, N, NRHS, KMAX, ABSTOL, RELTOL, A, LDA, * Determine when to cross over from blocked to unblocked code. * (for N less than NX, unblocked code should be used). * - NX = MAX( 0, ILAENV( IXOVER, 'DGEQP3RK', ' ', M, N, -1, -1 )) + NX = MAX( 0, ILAENV( IXOVER, 'DGEQP3RK', ' ', M, N, -1, + $ -1 )) * IF( NX.LT.MINMN ) THEN * diff --git a/lapack-netlib/SRC/dgeqr2.f b/lapack-netlib/SRC/dgeqr2.f index 5791b3a915..94872f54e6 100644 --- a/lapack-netlib/SRC/dgeqr2.f +++ b/lapack-netlib/SRC/dgeqr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DGEQR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -105,7 +103,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleGEcomputational +*> \ingroup geqr2 * *> \par Further Details: * ===================== @@ -127,6 +125,7 @@ *> * ===================================================================== SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -147,10 +146,9 @@ SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO ) * .. * .. Local Scalars .. INTEGER I, K - DOUBLE PRECISION AII * .. * .. External Subroutines .. - EXTERNAL DLARF, DLARFG, XERBLA + EXTERNAL DLARF1F, DLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -184,11 +182,8 @@ SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i) to A(i:m,i+1:n) from the left * - AII = A( I, I ) - A( I, I ) = ONE - CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + CALL DLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), $ A( I, I+1 ), LDA, WORK ) - A( I, I ) = AII END IF 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/dgeqr2p.f b/lapack-netlib/SRC/dgeqr2p.f index c7b62d87d7..cce4d346ea 100644 --- a/lapack-netlib/SRC/dgeqr2p.f +++ b/lapack-netlib/SRC/dgeqr2p.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DGEQR2P + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,7 +105,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleGEcomputational +*> \ingroup geqr2p * *> \par Further Details: * ===================== @@ -131,6 +129,7 @@ *> * ===================================================================== SUBROUTINE DGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -151,10 +150,9 @@ SUBROUTINE DGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) * .. * .. Local Scalars .. INTEGER I, K - DOUBLE PRECISION AII * .. * .. External Subroutines .. - EXTERNAL DLARF, DLARFGP, XERBLA + EXTERNAL DLARF1F, DLARFGP, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -188,11 +186,8 @@ SUBROUTINE DGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i) to A(i:m,i+1:n) from the left * - AII = A( I, I ) - A( I, I ) = ONE - CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + CALL DLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), $ A( I, I+1 ), LDA, WORK ) - A( I, I ) = AII END IF 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/dgerq2.f b/lapack-netlib/SRC/dgerq2.f index 97d33761da..23ff2d068c 100644 --- a/lapack-netlib/SRC/dgerq2.f +++ b/lapack-netlib/SRC/dgerq2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DGERQ2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleGEcomputational +*> \ingroup gerq2 * *> \par Further Details: * ===================== @@ -120,6 +118,7 @@ *> * ===================================================================== SUBROUTINE DGERQ2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -140,10 +139,9 @@ SUBROUTINE DGERQ2( M, N, A, LDA, TAU, WORK, INFO ) * .. * .. Local Scalars .. INTEGER I, K - DOUBLE PRECISION AII * .. * .. External Subroutines .. - EXTERNAL DLARF, DLARFG, XERBLA + EXTERNAL DLARF1L, DLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -177,11 +175,8 @@ SUBROUTINE DGERQ2( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right * - AII = A( M-K+I, N-K+I ) - A( M-K+I, N-K+I ) = ONE - CALL DLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, + CALL DLARF1L( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, $ TAU( I ), A, LDA, WORK ) - A( M-K+I, N-K+I ) = AII 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/dgetc2.f b/lapack-netlib/SRC/dgetc2.f index d2f0ede826..4f9a289d96 100644 --- a/lapack-netlib/SRC/dgetc2.f +++ b/lapack-netlib/SRC/dgetc2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DGETC2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleGEauxiliary +*> \ingroup getc2 * *> \par Contributors: * ================== @@ -108,6 +106,7 @@ * * ===================================================================== SUBROUTINE DGETC2( N, A, LDA, IPIV, JPIV, INFO ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -132,7 +131,7 @@ SUBROUTINE DGETC2( N, A, LDA, IPIV, JPIV, INFO ) DOUBLE PRECISION BIGNUM, EPS, SMIN, SMLNUM, XMAX * .. * .. External Subroutines .. - EXTERNAL DGER, DSWAP, DLABAD + EXTERNAL DGER, DSWAP * .. * .. External Functions .. DOUBLE PRECISION DLAMCH @@ -155,7 +154,6 @@ SUBROUTINE DGETC2( N, A, LDA, IPIV, JPIV, INFO ) EPS = DLAMCH( 'P' ) SMLNUM = DLAMCH( 'S' ) / EPS BIGNUM = ONE / SMLNUM - CALL DLABAD( SMLNUM, BIGNUM ) * * Handle the case N=1 by itself * @@ -177,8 +175,8 @@ SUBROUTINE DGETC2( N, A, LDA, IPIV, JPIV, INFO ) * Find max element in matrix A * XMAX = ZERO - DO 20 IP = I, N - DO 10 JP = I, N + DO 20 JP = I, N + DO 10 IP = I, N IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN XMAX = ABS( A( IP, JP ) ) IPV = IP diff --git a/lapack-netlib/SRC/dlacpy.f b/lapack-netlib/SRC/dlacpy.f index 917aa1e2a2..ae8cc7aa6b 100644 --- a/lapack-netlib/SRC/dlacpy.f +++ b/lapack-netlib/SRC/dlacpy.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DLACPY + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -96,10 +94,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup OTHERauxiliary +*> \ingroup lacpy * * ===================================================================== SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -134,7 +133,7 @@ SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB ) 10 CONTINUE 20 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN - DO 40 J = 1, N + DO 40 J = 1, MIN( M, N ) DO 30 I = J, M B( I, J ) = A( I, J ) 30 CONTINUE diff --git a/lapack-netlib/SRC/dlantr.f b/lapack-netlib/SRC/dlantr.f index 9b68f19755..69804b52b0 100644 --- a/lapack-netlib/SRC/dlantr.f +++ b/lapack-netlib/SRC/dlantr.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DLANTR + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -133,11 +131,13 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERauxiliary +*> \ingroup lantr * * ===================================================================== - DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, + $ LDA, $ WORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -186,14 +186,16 @@ DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 20 J = 1, N DO 10 I = 1, MIN( M, J-1 ) SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ DISNAN( SUM ) ) VALUE = SUM 10 CONTINUE 20 CONTINUE ELSE - DO 40 J = 1, N + DO 40 J = 1, MIN( M, N ) DO 30 I = J + 1, M SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ DISNAN( SUM ) ) VALUE = SUM 30 CONTINUE 40 CONTINUE END IF @@ -203,14 +205,16 @@ DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 60 J = 1, N DO 50 I = 1, MIN( M, J ) SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ DISNAN( SUM ) ) VALUE = SUM 50 CONTINUE 60 CONTINUE ELSE - DO 80 J = 1, N + DO 80 J = 1, MIN( M, N ) DO 70 I = J, M SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ DISNAN( SUM ) ) VALUE = SUM 70 CONTINUE 80 CONTINUE END IF @@ -237,7 +241,7 @@ DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, LDA, IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM 110 CONTINUE ELSE - DO 140 J = 1, N + DO 140 J = 1, MIN( M, N ) IF( UDIAG ) THEN SUM = ONE DO 120 I = J + 1, M @@ -284,7 +288,7 @@ DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 220 I = N + 1, M WORK( I ) = ZERO 220 CONTINUE - DO 240 J = 1, N + DO 240 J = 1, MIN (M, N ) DO 230 I = J + 1, M WORK( I ) = WORK( I ) + ABS( A( I, J ) ) 230 CONTINUE @@ -293,7 +297,7 @@ DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 250 I = 1, M WORK( I ) = ZERO 250 CONTINUE - DO 270 J = 1, N + DO 270 J = 1, MIN( M, N ) DO 260 I = J, M WORK( I ) = WORK( I ) + ABS( A( I, J ) ) 260 CONTINUE @@ -305,7 +309,8 @@ DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, LDA, SUM = WORK( I ) IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM 280 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. + $ ( LSAME( NORM, 'E' ) ) ) THEN * * Find normF(A). * @@ -314,27 +319,29 @@ DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, LDA, SCALE = ONE SUM = MIN( M, N ) DO 290 J = 2, N - CALL DLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + CALL DLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, + $ SUM ) 290 CONTINUE ELSE SCALE = ZERO SUM = ONE DO 300 J = 1, N - CALL DLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + CALL DLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, + $ SUM ) 300 CONTINUE END IF ELSE IF( LSAME( DIAG, 'U' ) ) THEN SCALE = ONE SUM = MIN( M, N ) - DO 310 J = 1, N + DO 310 J = 1, MIN( M, N ) CALL DLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, $ SUM ) 310 CONTINUE ELSE SCALE = ZERO SUM = ONE - DO 320 J = 1, N + DO 320 J = 1, MIN( M, N ) CALL DLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) 320 CONTINUE END IF diff --git a/lapack-netlib/SRC/dlaqp2.f b/lapack-netlib/SRC/dlaqp2.f index b99de6d7d5..d32f075484 100644 --- a/lapack-netlib/SRC/dlaqp2.f +++ b/lapack-netlib/SRC/dlaqp2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DLAQP2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -122,7 +120,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERauxiliary +*> \ingroup laqp2 * *> \par Contributors: * ================== @@ -139,13 +137,12 @@ *> *> LAPACK Working Note 176 * -*> \htmlonly *> [PDF] -*> \endhtmlonly * * ===================================================================== SUBROUTINE DLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, $ WORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -168,7 +165,7 @@ SUBROUTINE DLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, * .. * .. Local Scalars .. INTEGER I, ITEMP, J, MN, OFFPI, PVT - DOUBLE PRECISION AII, TEMP, TEMP2, TOL3Z + DOUBLE PRECISION TEMP, TEMP2, TOL3Z * .. * .. External Subroutines .. EXTERNAL DLARF, DLARFG, DSWAP @@ -208,7 +205,8 @@ SUBROUTINE DLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, * Generate elementary reflector H(i). * IF( OFFPI.LT.M ) THEN - CALL DLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + CALL DLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), + $ 1, $ TAU( I ) ) ELSE CALL DLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) @@ -218,11 +216,8 @@ SUBROUTINE DLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, * * Apply H(i)**T to A(offset+i:m,i+1:n) from the left. * - AII = A( OFFPI, I ) - A( OFFPI, I ) = ONE - CALL DLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + CALL DLARF1F( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, $ TAU( I ), A( OFFPI, I+1 ), LDA, WORK( 1 ) ) - A( OFFPI, I ) = AII END IF * * Update partial column norms. diff --git a/lapack-netlib/SRC/dlaqp2rk.f b/lapack-netlib/SRC/dlaqp2rk.f index aecd6bb69c..ae2d62cac5 100644 --- a/lapack-netlib/SRC/dlaqp2rk.f +++ b/lapack-netlib/SRC/dlaqp2rk.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DLAQP2RK + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -253,7 +251,7 @@ *> \param[out] WORK *> \verbatim *> WORK is DOUBLE PRECISION array, dimension (N-1) -*> Used in DLARF subroutine to apply an elementary +*> Used in DLARF1F subroutine to apply an elementary *> reflector from the left. *> \endverbatim *> @@ -303,27 +301,19 @@ *> C. H. Bischof, Math. and Comp. Sci. Div., Argonne National Lab, USA. *> A BLAS-3 version of the QR factorization with column pivoting. *> LAPACK Working Note 114 -*> \htmlonly *> https://www.netlib.org/lapack/lawnspdf/lawn114.pdf -*> \endhtmlonly *> and in *> SIAM J. Sci. Comput., 19(5):1486-1494, Sept. 1998. -*> \htmlonly *> https://doi.org/10.1137/S1064827595296732 -*> \endhtmlonly *> *> [2] A partial column norm updating strategy developed in 2006. *> Z. Drmac and Z. Bujanovic, Dept. of Math., University of Zagreb, Croatia. *> On the failure of rank revealing QR factorization software – a case study. *> LAPACK Working Note 176. -*> \htmlonly *> http://www.netlib.org/lapack/lawnspdf/lawn176.pdf -*> \endhtmlonly *> and in *> ACM Trans. Math. Softw. 35, 2, Article 12 (July 2008), 28 pages. -*> \htmlonly *> https://doi.org/10.1145/1377612.1377616 -*> \endhtmlonly * *> \par Contributors: * ================== @@ -365,12 +355,12 @@ SUBROUTINE DLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) * .. * .. Local Scalars .. - INTEGER I, ITEMP, J, JMAXC2NRM, KK, KP, MINMNFACT, - $ MINMNUPDT - DOUBLE PRECISION AIKK, HUGEVAL, TEMP, TEMP2, TOL3Z + INTEGER I, ITEMP, J, JMAXC2NRM, KK, KP, + $ KBOUND, MINMNFACT, MINMNUPDT + DOUBLE PRECISION HUGEVAL, TEMP, TEMP2, TOL3Z * .. * .. External Subroutines .. - EXTERNAL DLARF, DLARFG, DSWAP + EXTERNAL DLARF1F, DLARFG, DSWAP * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, SQRT @@ -397,13 +387,13 @@ SUBROUTINE DLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * MINMNFACT = MIN( M-IOFFSET, N ) MINMNUPDT = MIN( M-IOFFSET, N+NRHS ) - KMAX = MIN( KMAX, MINMNFACT ) + KBOUND = MIN( KMAX, MINMNFACT ) TOL3Z = SQRT( DLAMCH( 'Epsilon' ) ) HUGEVAL = DLAMCH( 'Overflow' ) * * Compute the factorization, KK is the lomn loop index. * - DO KK = 1, KMAX + DO KK = 1, KBOUND * I = IOFFSET + KK * @@ -621,11 +611,8 @@ SUBROUTINE DLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * condition is satisfied, not only KK < N+NRHS ) * IF( KK.LT.MINMNUPDT ) THEN - AIKK = A( I, KK ) - A( I, KK ) = ONE - CALL DLARF( 'Left', M-I+1, N+NRHS-KK, A( I, KK ), 1, + CALL DLARF1F( 'Left', M-I+1, N+NRHS-KK, A( I, KK ), 1, $ TAU( KK ), A( I, KK+1 ), LDA, WORK( 1 ) ) - A( I, KK ) = AIKK END IF * IF( KK.LT.MINMNFACT ) THEN @@ -676,7 +663,7 @@ SUBROUTINE DLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * i.e. no condition was triggered to exit the routine. * Set the number of factorized columns. * - K = KMAX + K = KBOUND * * We reached the end of the loop, i.e. all KMAX columns were * factorized, we need to set MAXC2NRMK and RELMAXC2NRMK before diff --git a/lapack-netlib/SRC/dlaqr2.f b/lapack-netlib/SRC/dlaqr2.f index 515c836582..02ae83cb5d 100644 --- a/lapack-netlib/SRC/dlaqr2.f +++ b/lapack-netlib/SRC/dlaqr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DLAQR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -263,7 +261,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERauxiliary +*> \ingroup laqr2 * *> \par Contributors: * ================== @@ -272,9 +270,11 @@ *> University of Kansas, USA *> * ===================================================================== - SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, + $ ILOZ, $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, $ LDT, NV, WV, LDWV, WORK, LWORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -309,8 +309,9 @@ SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, EXTERNAL DLAMCH * .. * .. External Subroutines .. - EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR, - $ DLANV2, DLARF, DLARFG, DLASET, DORMHR, DTREXC + EXTERNAL DCOPY, DGEHRD, DGEMM, DLACPY, + $ DLAHQR, + $ DLANV2, DLARF1F, DLARFG, DLASET, DORMHR, DTREXC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT @@ -331,7 +332,8 @@ SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * * ==== Workspace query call to DORMHR ==== * - CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, + $ LDV, $ WORK, -1, INFO ) LWK2 = INT( WORK( 1 ) ) * @@ -362,7 +364,6 @@ SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * SAFMIN = DLAMCH( 'SAFE MINIMUM' ) SAFMAX = ONE / SAFMIN - CALL DLABAD( SAFMIN, SAFMAX ) ULP = DLAMCH( 'PRECISION' ) SMLNUM = SAFMIN*( DBLE( N ) / ULP ) * @@ -402,7 +403,8 @@ SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . here and there to keep track.) ==== * CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) - CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) + CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), + $ LDT+1 ) * CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), @@ -449,7 +451,8 @@ SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . (DTREXC can not fail in this case.) ==== * IFST = NS - CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ WORK, $ INFO ) ILST = ILST + 1 END IF @@ -474,7 +477,8 @@ SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . ILST in case of a rare exchange failure. ==== * IFST = NS - CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ WORK, $ INFO ) ILST = ILST + 2 END IF @@ -536,7 +540,8 @@ SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, SORTED = .false. IFST = I ILST = K - CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ WORK, $ INFO ) IF( INFO.EQ.0 ) THEN I = ILST @@ -591,15 +596,15 @@ SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, CALL DCOPY( NS, V, LDV, WORK, 1 ) BETA = WORK( 1 ) CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU ) - WORK( 1 ) = ONE * - CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) + CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), + $ LDT ) * - CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT, + CALL DLARF1F( 'L', NS, JW, WORK, 1, TAU, T, LDT, $ WORK( JW+1 ) ) - CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + CALL DLARF1F( 'R', NS, NS, WORK, 1, TAU, T, LDT, $ WORK( JW+1 ) ) - CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + CALL DLARF1F( 'R', JW, NS, WORK, 1, TAU, V, LDV, $ WORK( JW+1 ) ) * CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), @@ -618,7 +623,8 @@ SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . H and Z, if requested. ==== * IF( NS.GT.1 .AND. S.NE.ZERO ) - $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, + $ LDV, $ WORK( JW+1 ), LWORK-JW, INFO ) * * ==== Update vertical slab in H ==== @@ -632,7 +638,8 @@ SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, KLN = MIN( NV, KWTOP-KROW ) CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), $ LDH, V, LDV, ZERO, WV, LDWV ) - CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), + $ LDH ) 70 CONTINUE * * ==== Update horizontal slab in H ==== @@ -652,7 +659,8 @@ SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, IF( WANTZ ) THEN DO 90 KROW = ILOZ, IHIZ, NV KLN = MIN( NV, IHIZ-KROW+1 ) - CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, + $ KWTOP ), $ LDZ, V, LDV, ZERO, WV, LDWV ) CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), $ LDZ ) diff --git a/lapack-netlib/SRC/dlaqr3.f b/lapack-netlib/SRC/dlaqr3.f index 36e08f02e8..9ddd8c7a4a 100644 --- a/lapack-netlib/SRC/dlaqr3.f +++ b/lapack-netlib/SRC/dlaqr3.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DLAQR3 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -260,7 +258,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERauxiliary +*> \ingroup laqr3 * *> \par Contributors: * ================== @@ -269,9 +267,11 @@ *> University of Kansas, USA *> * ===================================================================== - SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, + $ ILOZ, $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, $ LDT, NV, WV, LDWV, WORK, LWORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -307,9 +307,9 @@ SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, EXTERNAL DLAMCH, ILAENV * .. * .. External Subroutines .. - EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR, - $ DLANV2, DLAQR4, DLARF, DLARFG, DLASET, DORMHR, - $ DTREXC + EXTERNAL DCOPY, DGEHRD, DGEMM, DLACPY, DLAHQR, + $ DLANV2, + $ DLAQR4, DLARF1F, DLARFG, DLASET, DORMHR, DTREXC * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT @@ -330,13 +330,15 @@ SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * * ==== Workspace query call to DORMHR ==== * - CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, + $ LDV, $ WORK, -1, INFO ) LWK2 = INT( WORK( 1 ) ) * * ==== Workspace query call to DLAQR4 ==== * - CALL DLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, JW, + CALL DLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, + $ JW, $ V, LDV, WORK, -1, INFQR ) LWK3 = INT( WORK( 1 ) ) * @@ -367,7 +369,6 @@ SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * SAFMIN = DLAMCH( 'SAFE MINIMUM' ) SAFMAX = ONE / SAFMIN - CALL DLABAD( SAFMIN, SAFMAX ) ULP = DLAMCH( 'PRECISION' ) SMLNUM = SAFMIN*( DBLE( N ) / ULP ) * @@ -407,7 +408,8 @@ SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . here and there to keep track.) ==== * CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) - CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) + CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), + $ LDT+1 ) * CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) NMIN = ILAENV( 12, 'DLAQR3', 'SV', JW, 1, JW, LWORK ) @@ -460,7 +462,8 @@ SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . (DTREXC can not fail in this case.) ==== * IFST = NS - CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ WORK, $ INFO ) ILST = ILST + 1 END IF @@ -485,7 +488,8 @@ SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . ILST in case of a rare exchange failure. ==== * IFST = NS - CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ WORK, $ INFO ) ILST = ILST + 2 END IF @@ -547,7 +551,8 @@ SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, SORTED = .false. IFST = I ILST = K - CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ WORK, $ INFO ) IF( INFO.EQ.0 ) THEN I = ILST @@ -602,15 +607,15 @@ SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, CALL DCOPY( NS, V, LDV, WORK, 1 ) BETA = WORK( 1 ) CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU ) - WORK( 1 ) = ONE * - CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) + CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), + $ LDT ) * - CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT, + CALL DLARF1F( 'L', NS, JW, WORK, 1, TAU, T, LDT, $ WORK( JW+1 ) ) - CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + CALL DLARF1F( 'R', NS, NS, WORK, 1, TAU, T, LDT, $ WORK( JW+1 ) ) - CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + CALL DLARF1F( 'R', JW, NS, WORK, 1, TAU, V, LDV, $ WORK( JW+1 ) ) * CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), @@ -629,7 +634,8 @@ SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . H and Z, if requested. ==== * IF( NS.GT.1 .AND. S.NE.ZERO ) - $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, + $ LDV, $ WORK( JW+1 ), LWORK-JW, INFO ) * * ==== Update vertical slab in H ==== @@ -643,7 +649,8 @@ SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, KLN = MIN( NV, KWTOP-KROW ) CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), $ LDH, V, LDV, ZERO, WV, LDWV ) - CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), + $ LDH ) 70 CONTINUE * * ==== Update horizontal slab in H ==== @@ -663,7 +670,8 @@ SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, IF( WANTZ ) THEN DO 90 KROW = ILOZ, IHIZ, NV KLN = MIN( NV, IHIZ-KROW+1 ) - CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, + $ KWTOP ), $ LDZ, V, LDV, ZERO, WV, LDWV ) CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), $ LDZ ) diff --git a/lapack-netlib/SRC/dlarf1f.c b/lapack-netlib/SRC/dlarf1f.c new file mode 100644 index 0000000000..958a362fcb --- /dev/null +++ b/lapack-netlib/SRC/dlarf1f.c @@ -0,0 +1,569 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* Table of constant values */ + +static doublereal c_b4 = 1.; +static doublereal c_b5 = 0.; +static integer c__1 = 1; + +/* > \brief \b DLARF1F applies an elementary reflector to a general rectangular */ +/* matrix assuming v(1) = 1. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > Download DLARF1F + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLARF1F( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) */ + +/* CHARACTER SIDE */ +/* INTEGER INCV, LDC, M, N */ +/* DOUBLE PRECISION TAU */ +/* DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLARF1F applies a real elementary reflector H to a real m by n matrix */ +/* > C, from either the left or the right. H is represented in the form */ +/* > */ +/* > H = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar and v is a real vector. */ +/* > */ +/* > If tau = 0, then H is taken to be the unit matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': form H * C */ +/* > = 'R': form C * H */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension */ +/* > (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* > or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* > The vector v in the representation of H. V is not used if */ +/* > TAU = 0. V(1) is not referenced or modified. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCV */ +/* > \verbatim */ +/* > INCV is INTEGER */ +/* > The increment between elements of v. INCV <> 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION */ +/* > The value tau in the representation of H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* > or C * H if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension */ +/* > (N) if SIDE = 'L' */ +/* > or (M) if SIDE = 'R' */ +/* > \endverbatim */ + +/* To take advantage of the fact that v(1) = 1, we do the following */ +/* v = [ 1 v_2 ]**T */ +/* If SIDE='L' */ +/* |-----| */ +/* | C_1 | */ +/* C =| C_2 | */ +/* |-----| */ +/* C_1\in\mathbb{R}^{1\times n}, C_2\in\mathbb{R}^{m-1\times n} */ +/* So we compute: */ +/* C = HC = (I - \tau vv**T)C */ +/* = C - \tau vv**T C */ +/* w = C**T v = [ C_1**T C_2**T ] [ 1 v_2 ]**T */ +/* = C_1**T + C_2**T v ( DGEMM then DAXPY ) */ +/* C = C - \tau vv**T C */ +/* = C - \tau vw**T */ +/* Giving us C_1 = C_1 - \tau w**T ( DAXPY ) */ +/* and */ +/* C_2 = C_2 - \tau v_2w**T ( DGER ) */ +/* If SIDE='R' */ + +/* C = [ C_1 C_2 ] */ +/* C_1\in\mathbb{R}^{m\times 1}, C_2\in\mathbb{R}^{m\times n-1} */ +/* So we compute: */ +/* C = CH = C(I - \tau vv**T) */ +/* = C - \tau Cvv**T */ + +/* w = Cv = [ C_1 C_2 ] [ 1 v_2 ]**T */ +/* = C_1 + C_2v_2 ( DGEMM then DAXPY ) */ +/* C = C - \tau Cvv**T */ +/* = C - \tau wv**T */ +/* Giving us C_1 = C_1 - \tau w ( DAXPY ) */ +/* and */ +/* C_2 = C_2 - \tau wv_2**T ( DGER ) */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup larf */ + +/* ===================================================================== */ +/* Subroutine */ int dlarf1f_(char *side, integer *m, integer *n, doublereal * + v, integer *incv, doublereal *tau, doublereal *c__, integer *ldc, + doublereal *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer i__; + logical applyleft; + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), dscal_(integer *, doublereal *, doublereal *, integer + *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer lastc; + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + integer lastv; + extern integer iladlc_(integer *, integer *, doublereal *, integer *), + iladlr_(integer *, integer *, doublereal *, integer *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, "L"); + lastv = 1; + lastc = 0; + if (*tau != 0.) { +/* Set up variables for scanning V. LASTV begins pointing to the end */ +/* of V. */ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + if (*incv > 0) { + i__ = (lastv - 1) * *incv + 1; + } else { + i__ = 1; + } +/* Look for the last non-zero row in V. */ +/* Since we are assuming that V(1) = 1, and it is not stored, so we */ +/* shouldn't access it. */ + while(lastv > 1 && v[i__] == 0.) { + --lastv; + i__ -= *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = iladlc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = iladlr_(m, &lastv, &c__[c_offset], ldc); + } + } + if (lastc == 0) { + return 0; + } + if (applyleft) { + +/* Form H * C */ + +/* Check if lastv = 1. This means v = 1, So we just need to comp */ +/* C := HC = (1-\tau)C. */ + if (lastv == 1) { + +/* C(1,1:lastc) := ( 1 - tau ) * C(1,1:lastc) */ + + d__1 = 1. - *tau; + dscal_(&lastc, &d__1, &c__[c_offset], ldc); + } else { + +/* w(1:lastc,1) := C(1:lastv,1:lastc)**T * v(1:lastv,1) */ + +/* w(1:lastc,1) := C(2:lastv,1:lastc)**T * v(2:lastv,1) */ + i__1 = lastv - 1; + dgemv_("Transpose", &i__1, &lastc, &c_b4, &c__[c_dim1 + 2], ldc, & + v[*incv + 1], incv, &c_b5, &work[1], &c__1); +/* w(1:lastc,1) += C(1,1:lastc)**T * v(1,1) = C(1,1:lastc)**T */ + daxpy_(&lastc, &c_b4, &c__[c_offset], ldc, &work[1], &c__1); + +/* C(1:lastv,1:lastc) := C(...) - tau * v(1:lastv,1) * w(1:lastc,1)**T */ + +/* C(1, 1:lastc) := C(...) - tau * v(1,1) * w(1:lastc,1)**T */ +/* = C(...) - tau * w(1:lastc,1)**T */ + d__1 = -(*tau); + daxpy_(&lastc, &d__1, &work[1], &c__1, &c__[c_offset], ldc); +/* C(2:lastv,1:lastc) := C(...) - tau * v(2:lastv,1)*w(1:last */ + i__1 = lastv - 1; + d__1 = -(*tau); + dger_(&i__1, &lastc, &d__1, &v[*incv + 1], incv, &work[1], &c__1, + &c__[c_dim1 + 2], ldc); + } + } else { + +/* Form C * H */ + +/* Check if n = 1. This means v = 1, so we just need to compute */ +/* C := CH = C(1-\tau). */ + if (lastv == 1) { + +/* C(1:lastc,1) := ( 1 - tau ) * C(1:lastc,1) */ + + d__1 = 1. - *tau; + dscal_(&lastc, &d__1, &c__[c_offset], &c__1); + } else { + +/* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ + +/* w(1:lastc,1) := C(1:lastc,2:lastv) * v(2:lastv,1) */ + i__1 = lastv - 1; + dgemv_("No transpose", &lastc, &i__1, &c_b4, &c__[(c_dim1 << 1) + + 1], ldc, &v[*incv + 1], incv, &c_b5, &work[1], &c__1); +/* w(1:lastc,1) += C(1:lastc,1) v(1,1) = C(1:lastc,1) */ + daxpy_(&lastc, &c_b4, &c__[c_offset], &c__1, &work[1], &c__1); + +/* C(1:lastc,1:lastv) := C(...) - tau * w(1:lastc,1) * v(1:lastv,1)**T */ + +/* C(1:lastc,1) := C(...) - tau * w(1:lastc,1) * v(1,1)** */ +/* = C(...) - tau * w(1:lastc,1) */ + d__1 = -(*tau); + daxpy_(&lastc, &d__1, &work[1], &c__1, &c__[c_offset], &c__1); +/* C(1:lastc,2:lastv) := C(...) - tau * w(1:lastc,1) * v(2:la */ + i__1 = lastv - 1; + d__1 = -(*tau); + dger_(&lastc, &i__1, &d__1, &work[1], &c__1, &v[*incv + 1], incv, + &c__[(c_dim1 << 1) + 1], ldc); + } + } + return 0; + +/* End of DLARF1F */ + +} /* dlarf1f_ */ + diff --git a/lapack-netlib/SRC/dlarf1f.f b/lapack-netlib/SRC/dlarf1f.f new file mode 100644 index 0000000000..c65035c61f --- /dev/null +++ b/lapack-netlib/SRC/dlarf1f.f @@ -0,0 +1,291 @@ +*> \brief \b DLARF1F applies an elementary reflector to a general rectangular +* matrix assuming v(1) = 1. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> Download DLARF1F + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +* +* Definition: +* =========== +* +* SUBROUTINE DLARF1F( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARF1F applies a real elementary reflector H to a real m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. V(1) is not referenced or modified. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* To take advantage of the fact that v(1) = 1, we do the following +* v = [ 1 v_2 ]**T +* If SIDE='L' +* |-----| +* | C_1 | +* C =| C_2 | +* |-----| +* C_1\in\mathbb{R}^{1\times n}, C_2\in\mathbb{R}^{m-1\times n} +* So we compute: +* C = HC = (I - \tau vv**T)C +* = C - \tau vv**T C +* w = C**T v = [ C_1**T C_2**T ] [ 1 v_2 ]**T +* = C_1**T + C_2**T v ( DGEMM then DAXPY ) +* C = C - \tau vv**T C +* = C - \tau vw**T +* Giving us C_1 = C_1 - \tau w**T ( DAXPY ) +* and +* C_2 = C_2 - \tau v_2w**T ( DGER ) +* If SIDE='R' +* +* C = [ C_1 C_2 ] +* C_1\in\mathbb{R}^{m\times 1}, C_2\in\mathbb{R}^{m\times n-1} +* So we compute: +* C = CH = C(I - \tau vv**T) +* = C - \tau Cvv**T +* +* w = Cv = [ C_1 C_2 ] [ 1 v_2 ]**T +* = C_1 + C_2v_2 ( DGEMM then DAXPY ) +* C = C - \tau Cvv**T +* = C - \tau wv**T +* Giving us C_1 = C_1 - \tau w ( DAXPY ) +* and +* C_2 = C_2 - \tau wv_2**T ( DGER ) +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larf +* +* ===================================================================== + SUBROUTINE DLARF1F( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) + IMPLICIT NONE +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER, DAXPY, DSCAL +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILADLR, ILADLC + EXTERNAL LSAME, ILADLR, ILADLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 1 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +! Look for the last non-zero row in V. +! Since we are assuming that V(1) = 1, and it is not stored, so we +! shouldn't access it. + DO WHILE( LASTV.GT.1 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILADLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILADLR(M, LASTV, C, LDC) + END IF + END IF + IF( LASTC.EQ.0 ) THEN + RETURN + END IF + IF( APPLYLEFT ) THEN +* +* Form H * C +* + ! Check if lastv = 1. This means v = 1, So we just need to compute + ! C := HC = (1-\tau)C. + IF( LASTV.EQ.1 ) THEN +* +* C(1,1:lastc) := ( 1 - tau ) * C(1,1:lastc) +* + CALL DSCAL(LASTC, ONE - TAU, C, LDC) + ELSE +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**T * v(1:lastv,1) +* + ! w(1:lastc,1) := C(2:lastv,1:lastc)**T * v(2:lastv,1) + CALL DGEMV( 'Transpose', LASTV-1, LASTC, ONE, C(1+1,1), + $ LDC, V(1+INCV), INCV, ZERO, WORK, 1) + ! w(1:lastc,1) += C(1,1:lastc)**T * v(1,1) = C(1,1:lastc)**T + CALL DAXPY(LASTC, ONE, C, LDC, WORK, 1) +* +* C(1:lastv,1:lastc) := C(...) - tau * v(1:lastv,1) * w(1:lastc,1)**T +* + ! C(1, 1:lastc) := C(...) - tau * v(1,1) * w(1:lastc,1)**T + ! = C(...) - tau * w(1:lastc,1)**T + CALL DAXPY(LASTC, -TAU, WORK, 1, C, LDC) + ! C(2:lastv,1:lastc) := C(...) - tau * v(2:lastv,1)*w(1:lastc,1)**T + CALL DGER(LASTV-1, LASTC, -TAU, V(1+INCV), INCV, WORK, 1, + $ C(1+1,1), LDC) + END IF + ELSE +* +* Form C * H +* + ! Check if n = 1. This means v = 1, so we just need to compute + ! C := CH = C(1-\tau). + IF( LASTV.EQ.1 ) THEN +* +* C(1:lastc,1) := ( 1 - tau ) * C(1:lastc,1) +* + CALL DSCAL(LASTC, ONE - TAU, C, 1) + ELSE +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + ! w(1:lastc,1) := C(1:lastc,2:lastv) * v(2:lastv,1) + CALL DGEMV( 'No transpose', LASTC, LASTV-1, ONE, + $ C(1,1+1), LDC, V(1+INCV), INCV, ZERO, WORK, 1 ) + ! w(1:lastc,1) += C(1:lastc,1) v(1,1) = C(1:lastc,1) + CALL DAXPY(LASTC, ONE, C, 1, WORK, 1) +* +* C(1:lastc,1:lastv) := C(...) - tau * w(1:lastc,1) * v(1:lastv,1)**T +* + ! C(1:lastc,1) := C(...) - tau * w(1:lastc,1) * v(1,1)**T + ! = C(...) - tau * w(1:lastc,1) + CALL DAXPY(LASTC, -TAU, WORK, 1, C, 1) + ! C(1:lastc,2:lastv) := C(...) - tau * w(1:lastc,1) * v(2:lastv)**T + CALL DGER( LASTC, LASTV-1, -TAU, WORK, 1, V(1+INCV), + $ INCV, C(1,1+1), LDC ) + END IF + END IF + RETURN +* +* End of DLARF1F +* + END diff --git a/lapack-netlib/SRC/dlarf1l.c b/lapack-netlib/SRC/dlarf1l.c new file mode 100644 index 0000000000..aa951e935c --- /dev/null +++ b/lapack-netlib/SRC/dlarf1l.c @@ -0,0 +1,534 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* Table of constant values */ + +static doublereal c_b4 = 1.; +static doublereal c_b5 = 0.; +static integer c__1 = 1; + +/* > \brief \b DLARF1L applies an elementary reflector to a general rectangular */ +/* matrix assuming v(lastv) = 1 where lastv is the last non-zero */ +/* element */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > Download DLARF1L + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLARF1L( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) */ + +/* CHARACTER SIDE */ +/* INTEGER INCV, LDC, M, N */ +/* DOUBLE PRECISION TAU */ +/* DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLARF1L applies a real elementary reflector H to a real m by n matrix */ +/* > C, from either the left or the right. H is represented in the form */ +/* > */ +/* > H = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar and v is a real vector. */ +/* > */ +/* > If tau = 0, then H is taken to be the unit matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': form H * C */ +/* > = 'R': form C * H */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension */ +/* > (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* > or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* > The vector v in the representation of H. V is not used if */ +/* > TAU = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCV */ +/* > \verbatim */ +/* > INCV is INTEGER */ +/* > The increment between elements of v. INCV <> 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION */ +/* > The value tau in the representation of H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* > or C * H if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension */ +/* > (N) if SIDE = 'L' */ +/* > or (M) if SIDE = 'R' */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup larf */ + +/* ===================================================================== */ +/* Subroutine */ int dlarf1l_(char *side, integer *m, integer *n, doublereal * + v, integer *incv, doublereal *tau, doublereal *c__, integer *ldc, + doublereal *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer i__; + logical applyleft; + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), dscal_(integer *, doublereal *, doublereal *, integer + *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer lastc; + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + integer lastv; + extern integer iladlc_(integer *, integer *, doublereal *, integer *), + iladlr_(integer *, integer *, doublereal *, integer *); + integer firstv; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, "L"); + firstv = 1; + lastc = 0; + if (*tau != 0.) { +/* Set up variables for scanning V. LASTV begins pointing to the end */ +/* of V. */ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + i__ = 1; +/* Look for the last non-zero row in V. */ + while(lastv > firstv && v[i__] == 0.) { + ++firstv; + i__ += *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = iladlc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = iladlr_(m, &lastv, &c__[c_offset], ldc); + } + } + if (lastc == 0) { + return 0; + } + if (applyleft) { + +/* Form H * C */ + + if (lastv > 0) { +/* Check if m = 1. This means v = 1, So we just need to compu */ +/* C := HC = (1-\tau)C. */ + if (lastv == firstv) { + d__1 = 1. - *tau; + dscal_(&lastc, &d__1, &c__[firstv + c_dim1], ldc); + } else { + +/* w(1:lastc,1) := C(1:lastv,1:lastc)**T * v(1:lastv,1) */ + +/* w(1:lastc,1) := C(1:lastv-1,1:lastc)**T * v(1:lastv-1,1 */ + i__1 = lastv - firstv; + dgemv_("Transpose", &i__1, &lastc, &c_b4, &c__[firstv + + c_dim1], ldc, &v[i__], incv, &c_b5, &work[1], &c__1); +/* w(1:lastc,1) += C(lastv,1:lastc)**T * v(lastv,1) = C(la */ + daxpy_(&lastc, &c_b4, &c__[lastv + c_dim1], ldc, &work[1], & + c__1); + +/* C(1:lastv,1:lastc) := C(...) - tau * v(1:lastv,1) * w(1:lastc,1)**T */ + +/* C(lastv, 1:lastc) := C(...) - tau * v(lastv,1) * w(1: */ +/* = C(...) - tau * w(1:lastc,1)**T */ + d__1 = -(*tau); + daxpy_(&lastc, &d__1, &work[1], &c__1, &c__[lastv + c_dim1], + ldc); +/* C(1:lastv-1,1:lastc) := C(...) - tau * v(1:lastv-1,1)*w */ + i__1 = lastv - firstv; + d__1 = -(*tau); + dger_(&i__1, &lastc, &d__1, &v[i__], incv, &work[1], &c__1, & + c__[firstv + c_dim1], ldc); + } + } + } else { + +/* Form C * H */ + + if (lastv > 0) { +/* Check if n = 1. This means v = 1, so we just need to compu */ +/* C := CH = C(1-\tau). */ + if (lastv == firstv) { + d__1 = 1. - *tau; + dscal_(&lastc, &d__1, &c__[c_offset], &c__1); + } else { + +/* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ + +/* w(1:lastc,1) := C(1:lastc,1:lastv-1) * v(1:lastv-1,1) */ + i__1 = lastv - firstv; + dgemv_("No transpose", &lastc, &i__1, &c_b4, &c__[firstv * + c_dim1 + 1], ldc, &v[i__], incv, &c_b5, &work[1], & + c__1); +/* w(1:lastc,1) += C(1:lastc,lastv) * v(lastv,1) = C(1:las */ + daxpy_(&lastc, &c_b4, &c__[lastv * c_dim1 + 1], &c__1, &work[ + 1], &c__1); + +/* C(1:lastc,1:lastv) := C(...) - tau * w(1:lastc,1) * v(1:lastv,1)**T */ + +/* C(1:lastc,lastv) := C(...) - tau * w(1:lastc,1) * v */ +/* = C(...) - tau * w(1:lastc,1) */ + d__1 = -(*tau); + daxpy_(&lastc, &d__1, &work[1], &c__1, &c__[lastv * c_dim1 + + 1], &c__1); +/* C(1:lastc,1:lastv-1) := C(...) - tau * w(1:lastc,1) * v */ + i__1 = lastv - firstv; + d__1 = -(*tau); + dger_(&lastc, &i__1, &d__1, &work[1], &c__1, &v[i__], incv, & + c__[firstv * c_dim1 + 1], ldc); + } + } + } + return 0; + +/* End of DLARF1L */ + +} /* dlarf1l_ */ + diff --git a/lapack-netlib/SRC/dlarf1l.f b/lapack-netlib/SRC/dlarf1l.f new file mode 100644 index 0000000000..d225701fcd --- /dev/null +++ b/lapack-netlib/SRC/dlarf1l.f @@ -0,0 +1,251 @@ +*> \brief \b DLARF1L applies an elementary reflector to a general rectangular +* matrix assuming v(lastv) = 1 where lastv is the last non-zero +* element +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> Download DLARF1L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +* +* Definition: +* =========== +* +* SUBROUTINE DLARF1L( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARF1L applies a real elementary reflector H to a real m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larf +* +* ===================================================================== + SUBROUTINE DLARF1L( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) + IMPLICIT NONE +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, FIRSTV, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DGEMV, DGER, DSCAL +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILADLR, ILADLC + EXTERNAL LSAME, ILADLR, ILADLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + FIRSTV = 1 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + I = 1 +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.FIRSTV .AND. V( I ).EQ.ZERO ) + FIRSTV = FIRSTV + 1 + I = I + INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILADLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILADLR(M, LASTV, C, LDC) + END IF + END IF + IF( LASTC.EQ.0 ) THEN + RETURN + END IF + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.GT.0 ) THEN + ! Check if m = 1. This means v = 1, So we just need to compute + ! C := HC = (1-\tau)C. + IF( LASTV.EQ.FIRSTV ) THEN + CALL DSCAL(LASTC, ONE - TAU, C( FIRSTV, 1), LDC) + ELSE +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**T * v(1:lastv,1) +* + ! w(1:lastc,1) := C(1:lastv-1,1:lastc)**T * v(1:lastv-1,1) + CALL DGEMV( 'Transpose', LASTV-FIRSTV, LASTC, ONE, + $ C(FIRSTV,1), LDC, V(I), INCV, ZERO, + $ WORK, 1) + ! w(1:lastc,1) += C(lastv,1:lastc)**T * v(lastv,1) = C(lastv,1:lastc)**T + CALL DAXPY(LASTC, ONE, C(LASTV,1), LDC, WORK, 1) +* +* C(1:lastv,1:lastc) := C(...) - tau * v(1:lastv,1) * w(1:lastc,1)**T +* + ! C(lastv, 1:lastc) := C(...) - tau * v(lastv,1) * w(1:lastc,1)**T + ! = C(...) - tau * w(1:lastc,1)**T + CALL DAXPY(LASTC, -TAU, WORK, 1, C(LASTV,1), LDC) + ! C(1:lastv-1,1:lastc) := C(...) - tau * v(1:lastv-1,1)*w(1:lastc,1)**T + CALL DGER(LASTV-FIRSTV, LASTC, -TAU, V(I), INCV, + $ WORK, 1, C(FIRSTV,1), LDC) + END IF + END IF + ELSE +* +* Form C * H +* + IF( LASTV.GT.0 ) THEN + ! Check if n = 1. This means v = 1, so we just need to compute + ! C := CH = C(1-\tau). + IF( LASTV.EQ.FIRSTV ) THEN + CALL DSCAL(LASTC, ONE - TAU, C, 1) + ELSE +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + ! w(1:lastc,1) := C(1:lastc,1:lastv-1) * v(1:lastv-1,1) + CALL DGEMV( 'No transpose', LASTC, LASTV-FIRSTV, + $ ONE, C(1,FIRSTV), LDC, V(I), INCV, ZERO, WORK, 1 ) + ! w(1:lastc,1) += C(1:lastc,lastv) * v(lastv,1) = C(1:lastc,lastv) + CALL DAXPY(LASTC, ONE, C(1,LASTV), 1, WORK, 1) +* +* C(1:lastc,1:lastv) := C(...) - tau * w(1:lastc,1) * v(1:lastv,1)**T +* + ! C(1:lastc,lastv) := C(...) - tau * w(1:lastc,1) * v(lastv,1)**T + ! = C(...) - tau * w(1:lastc,1) + CALL DAXPY(LASTC, -TAU, WORK, 1, C(1,LASTV), 1) + ! C(1:lastc,1:lastv-1) := C(...) - tau * w(1:lastc,1) * v(1:lastv-1)**T + CALL DGER( LASTC, LASTV-FIRSTV, -TAU, WORK, 1, V(I), + $ INCV, C(1,FIRSTV), LDC ) + END IF + END IF + END IF + RETURN +* +* End of DLARF1L +* + END diff --git a/lapack-netlib/SRC/dlascl.f b/lapack-netlib/SRC/dlascl.f index 0a4bf21ce1..5db6fc40b8 100644 --- a/lapack-netlib/SRC/dlascl.f +++ b/lapack-netlib/SRC/dlascl.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DLASCL + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -136,10 +134,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup OTHERauxiliary +*> \ingroup lascl * * ===================================================================== - SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) + SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, + $ INFO ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -291,7 +291,7 @@ SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) * * Lower triangular matrix * - DO 50 J = 1, N + DO 50 J = 1, MIN( M, N ) DO 40 I = J, M A( I, J ) = A( I, J )*MUL 40 CONTINUE diff --git a/lapack-netlib/SRC/dopmtr.f b/lapack-netlib/SRC/dopmtr.f index c18074deca..5646b8eacc 100644 --- a/lapack-netlib/SRC/dopmtr.f +++ b/lapack-netlib/SRC/dopmtr.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DOPMTR + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -142,11 +140,13 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup upmtr * * ===================================================================== - SUBROUTINE DOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, + SUBROUTINE DOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, + $ WORK, $ INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -260,11 +260,9 @@ SUBROUTINE DOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, * * Apply H(i) * - AII = AP( II ) - AP( II ) = ONE - CALL DLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAU( I ), C, LDC, + CALL DLARF1L( SIDE, MI, NI, AP( II-I+1 ), 1, TAU( I ), C, + $ LDC, $ WORK ) - AP( II ) = AII * IF( FORWRD ) THEN II = II + I + 2 diff --git a/lapack-netlib/SRC/dorbdb.f b/lapack-netlib/SRC/dorbdb.f index 3edfda6b84..08be1794a6 100644 --- a/lapack-netlib/SRC/dorbdb.f +++ b/lapack-netlib/SRC/dorbdb.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DORBDB + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -255,7 +253,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup unbdb * *> \par Further Details: * ===================== @@ -281,9 +279,11 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, + SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, + $ LDX12, $ X21, LDX21, X22, LDX22, THETA, PHI, TAUP1, $ TAUP2, TAUQ1, TAUQ2, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -315,7 +315,8 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, DOUBLE PRECISION Z1, Z2, Z3, Z4 * .. * .. External Subroutines .. - EXTERNAL DAXPY, DLARF, DLARFGP, DSCAL, XERBLA + EXTERNAL DAXPY, DLARF1F, DLARFGP, DSCAL, + $ XERBLA * .. * .. External Functions .. DOUBLE PRECISION DNRM2 @@ -398,14 +399,16 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL DSCAL( P-I+1, Z1, X11(I,I), 1 ) ELSE CALL DSCAL( P-I+1, Z1*COS(PHI(I-1)), X11(I,I), 1 ) - CALL DAXPY( P-I+1, -Z1*Z3*Z4*SIN(PHI(I-1)), X12(I,I-1), + CALL DAXPY( P-I+1, -Z1*Z3*Z4*SIN(PHI(I-1)), X12(I, + $ I-1), $ 1, X11(I,I), 1 ) END IF IF( I .EQ. 1 ) THEN CALL DSCAL( M-P-I+1, Z2, X21(I,I), 1 ) ELSE CALL DSCAL( M-P-I+1, Z2*COS(PHI(I-1)), X21(I,I), 1 ) - CALL DAXPY( M-P-I+1, -Z2*Z3*Z4*SIN(PHI(I-1)), X22(I,I-1), + CALL DAXPY( M-P-I+1, -Z2*Z3*Z4*SIN(PHI(I-1)), X22(I, + $ I-1), $ 1, X21(I,I), 1 ) END IF * @@ -413,44 +416,48 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ DNRM2( P-I+1, X11(I,I), 1 ) ) * IF( P .GT. I ) THEN - CALL DLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + CALL DLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, + $ TAUP1(I) ) ELSE IF( P .EQ. I ) THEN CALL DLARFGP( P-I+1, X11(I,I), X11(I,I), 1, TAUP1(I) ) END IF - X11(I,I) = ONE IF ( M-P .GT. I ) THEN CALL DLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, $ TAUP2(I) ) ELSE IF ( M-P .EQ. I ) THEN - CALL DLARFGP( M-P-I+1, X21(I,I), X21(I,I), 1, TAUP2(I) ) + CALL DLARFGP( M-P-I+1, X21(I,I), X21(I,I), 1, + $ TAUP2(I) ) END IF - X21(I,I) = ONE * IF ( Q .GT. I ) THEN - CALL DLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), + CALL DLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), $ X11(I,I+1), LDX11, WORK ) END IF IF ( M-Q+1 .GT. I ) THEN - CALL DLARF( 'L', P-I+1, M-Q-I+1, X11(I,I), 1, TAUP1(I), + CALL DLARF1F( 'L', P-I+1, M-Q-I+1, X11(I,I), 1, + $ TAUP1(I), $ X12(I,I), LDX12, WORK ) END IF IF ( Q .GT. I ) THEN - CALL DLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), - $ X21(I,I+1), LDX21, WORK ) + CALL DLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, + $ TAUP2(I), X21(I,I+1), LDX21, WORK ) END IF IF ( M-Q+1 .GT. I ) THEN - CALL DLARF( 'L', M-P-I+1, M-Q-I+1, X21(I,I), 1, TAUP2(I), - $ X22(I,I), LDX22, WORK ) + CALL DLARF1F( 'L', M-P-I+1, M-Q-I+1, X21(I,I), 1, + $ TAUP2(I), X22(I,I), LDX22, WORK ) END IF * IF( I .LT. Q ) THEN CALL DSCAL( Q-I, -Z1*Z3*SIN(THETA(I)), X11(I,I+1), $ LDX11 ) - CALL DAXPY( Q-I, Z2*Z3*COS(THETA(I)), X21(I,I+1), LDX21, + CALL DAXPY( Q-I, Z2*Z3*COS(THETA(I)), X21(I,I+1), + $ LDX21, $ X11(I,I+1), LDX11 ) END IF - CALL DSCAL( M-Q-I+1, -Z1*Z4*SIN(THETA(I)), X12(I,I), LDX12 ) - CALL DAXPY( M-Q-I+1, Z2*Z4*COS(THETA(I)), X22(I,I), LDX22, + CALL DSCAL( M-Q-I+1, -Z1*Z4*SIN(THETA(I)), X12(I,I), + $ LDX12 ) + CALL DAXPY( M-Q-I+1, Z2*Z4*COS(THETA(I)), X22(I,I), + $ LDX22, $ X12(I,I), LDX12 ) * IF( I .LT. Q ) @@ -465,7 +472,6 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL DLARFGP( Q-I, X11(I,I+1), X11(I,I+2), LDX11, $ TAUQ1(I) ) END IF - X11(I,I+1) = ONE END IF IF ( Q+I-1 .LT. M ) THEN IF ( M-Q .EQ. I ) THEN @@ -476,20 +482,22 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ TAUQ2(I) ) END IF END IF - X12(I,I) = ONE * IF( I .LT. Q ) THEN - CALL DLARF( 'R', P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), + CALL DLARF1F( 'R', P-I, Q-I, X11(I,I+1), LDX11, + $ TAUQ1(I), $ X11(I+1,I+1), LDX11, WORK ) - CALL DLARF( 'R', M-P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), + CALL DLARF1F( 'R', M-P-I, Q-I, X11(I,I+1), LDX11, + $ TAUQ1(I), $ X21(I+1,I+1), LDX21, WORK ) END IF IF ( P .GT. I ) THEN - CALL DLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), + CALL DLARF1F( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), $ X12(I+1,I), LDX12, WORK ) END IF IF ( M-P .GT. I ) THEN - CALL DLARF( 'R', M-P-I, M-Q-I+1, X12(I,I), LDX12, + CALL DLARF1F( 'R', M-P-I, M-Q-I+1, X12(I,I), LDX12, $ TAUQ2(I), X22(I+1,I), LDX22, WORK ) END IF * @@ -507,14 +515,14 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL DLARFGP( M-Q-I+1, X12(I,I), X12(I,I+1), LDX12, $ TAUQ2(I) ) END IF - X12(I,I) = ONE * IF ( P .GT. I ) THEN - CALL DLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), + CALL DLARF1F( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), $ X12(I+1,I), LDX12, WORK ) END IF IF( M-P-Q .GE. 1 ) - $ CALL DLARF( 'R', M-P-Q, M-Q-I+1, X12(I,I), LDX12, + $ CALL DLARF1F( 'R', M-P-Q, M-Q-I+1, X12(I,I), LDX12, $ TAUQ2(I), X22(Q+1,I), LDX22, WORK ) * END DO @@ -531,9 +539,9 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL DLARFGP( M-P-Q-I+1, X22(Q+I,P+I), X22(Q+I,P+I+1), $ LDX22, TAUQ2(P+I) ) END IF - X22(Q+I,P+I) = ONE IF ( I .LT. M-P-Q ) THEN - CALL DLARF( 'R', M-P-Q-I, M-P-Q-I+1, X22(Q+I,P+I), LDX22, + CALL DLARF1F( 'R', M-P-Q-I, M-P-Q-I+1, X22(Q+I,P+I), + $ LDX22, $ TAUQ2(P+I), X22(Q+I+1,P+I), LDX22, WORK ) END IF * @@ -549,22 +557,25 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL DSCAL( P-I+1, Z1, X11(I,I), LDX11 ) ELSE CALL DSCAL( P-I+1, Z1*COS(PHI(I-1)), X11(I,I), LDX11 ) - CALL DAXPY( P-I+1, -Z1*Z3*Z4*SIN(PHI(I-1)), X12(I-1,I), + CALL DAXPY( P-I+1, -Z1*Z3*Z4*SIN(PHI(I-1)), X12(I-1, + $ I), $ LDX12, X11(I,I), LDX11 ) END IF IF( I .EQ. 1 ) THEN CALL DSCAL( M-P-I+1, Z2, X21(I,I), LDX21 ) ELSE - CALL DSCAL( M-P-I+1, Z2*COS(PHI(I-1)), X21(I,I), LDX21 ) - CALL DAXPY( M-P-I+1, -Z2*Z3*Z4*SIN(PHI(I-1)), X22(I-1,I), + CALL DSCAL( M-P-I+1, Z2*COS(PHI(I-1)), X21(I,I), + $ LDX21 ) + CALL DAXPY( M-P-I+1, -Z2*Z3*Z4*SIN(PHI(I-1)), X22(I-1, + $ I), $ LDX22, X21(I,I), LDX21 ) END IF * THETA(I) = ATAN2( DNRM2( M-P-I+1, X21(I,I), LDX21 ), $ DNRM2( P-I+1, X11(I,I), LDX11 ) ) * - CALL DLARFGP( P-I+1, X11(I,I), X11(I,I+1), LDX11, TAUP1(I) ) - X11(I,I) = ONE + CALL DLARFGP( P-I+1, X11(I,I), X11(I,I+1), LDX11, + $ TAUP1(I) ) IF ( I .EQ. M-P ) THEN CALL DLARFGP( M-P-I+1, X21(I,I), X21(I,I), LDX21, $ TAUP2(I) ) @@ -572,22 +583,23 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL DLARFGP( M-P-I+1, X21(I,I), X21(I,I+1), LDX21, $ TAUP2(I) ) END IF - X21(I,I) = ONE * IF ( Q .GT. I ) THEN - CALL DLARF( 'R', Q-I, P-I+1, X11(I,I), LDX11, TAUP1(I), + CALL DLARF1F( 'R', Q-I, P-I+1, X11(I,I), LDX11, + $ TAUP1(I), $ X11(I+1,I), LDX11, WORK ) END IF IF ( M-Q+1 .GT. I ) THEN - CALL DLARF( 'R', M-Q-I+1, P-I+1, X11(I,I), LDX11, + CALL DLARF1F( 'R', M-Q-I+1, P-I+1, X11(I,I), LDX11, $ TAUP1(I), X12(I,I), LDX12, WORK ) END IF IF ( Q .GT. I ) THEN - CALL DLARF( 'R', Q-I, M-P-I+1, X21(I,I), LDX21, TAUP2(I), + CALL DLARF1F( 'R', Q-I, M-P-I+1, X21(I,I), LDX21, + $ TAUP2(I), $ X21(I+1,I), LDX21, WORK ) END IF IF ( M-Q+1 .GT. I ) THEN - CALL DLARF( 'R', M-Q-I+1, M-P-I+1, X21(I,I), LDX21, + CALL DLARF1F( 'R', M-Q-I+1, M-P-I+1, X21(I,I), LDX21, $ TAUP2(I), X22(I,I), LDX22, WORK ) END IF * @@ -612,7 +624,6 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL DLARFGP( Q-I, X11(I+1,I), X11(I+2,I), 1, $ TAUQ1(I) ) END IF - X11(I+1,I) = ONE END IF IF ( M-Q .GT. I ) THEN CALL DLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, @@ -621,19 +632,18 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL DLARFGP( M-Q-I+1, X12(I,I), X12(I,I), 1, $ TAUQ2(I) ) END IF - X12(I,I) = ONE * IF( I .LT. Q ) THEN - CALL DLARF( 'L', Q-I, P-I, X11(I+1,I), 1, TAUQ1(I), + CALL DLARF1F( 'L', Q-I, P-I, X11(I+1,I), 1, TAUQ1(I), $ X11(I+1,I+1), LDX11, WORK ) - CALL DLARF( 'L', Q-I, M-P-I, X11(I+1,I), 1, TAUQ1(I), - $ X21(I+1,I+1), LDX21, WORK ) + CALL DLARF1F( 'L', Q-I, M-P-I, X11(I+1,I), 1, + $ TAUQ1(I), X21(I+1,I+1), LDX21, WORK ) END IF - CALL DLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, TAUQ2(I), - $ X12(I,I+1), LDX12, WORK ) + CALL DLARF1F( 'L', M-Q-I+1, P-I, X12(I,I), 1, TAUQ2(I), + $ X12(I,I+1), LDX12, WORK ) IF ( M-P-I .GT. 0 ) THEN - CALL DLARF( 'L', M-Q-I+1, M-P-I, X12(I,I), 1, TAUQ2(I), - $ X22(I,I+1), LDX22, WORK ) + CALL DLARF1F( 'L', M-Q-I+1, M-P-I, X12(I,I), 1, + $ TAUQ2(I), X22(I,I+1), LDX22, WORK ) END IF * END DO @@ -643,16 +653,16 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, DO I = Q + 1, P * CALL DSCAL( M-Q-I+1, -Z1*Z4, X12(I,I), 1 ) - CALL DLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, TAUQ2(I) ) - X12(I,I) = ONE + CALL DLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, + $ TAUQ2(I) ) * IF ( P .GT. I ) THEN - CALL DLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, TAUQ2(I), - $ X12(I,I+1), LDX12, WORK ) + CALL DLARF1F( 'L', M-Q-I+1, P-I, X12(I,I), 1, + $ TAUQ2(I), X12(I,I+1), LDX12, WORK ) END IF IF( M-P-Q .GE. 1 ) - $ CALL DLARF( 'L', M-Q-I+1, M-P-Q, X12(I,I), 1, TAUQ2(I), - $ X22(I,Q+1), LDX22, WORK ) + $ CALL DLARF1F( 'L', M-Q-I+1, M-P-Q, X12(I,I), 1, + $ TAUQ2(I), X22(I,Q+1), LDX22, WORK ) * END DO * @@ -662,15 +672,17 @@ SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, * CALL DSCAL( M-P-Q-I+1, Z2*Z4, X22(P+I,Q+I), 1 ) IF ( M-P-Q .EQ. I ) THEN - CALL DLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I,Q+I), 1, + CALL DLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I,Q+I), + $ 1, $ TAUQ2(P+I) ) ELSE - CALL DLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I+1,Q+I), 1, + CALL DLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I+1,Q+I), + $ 1, $ TAUQ2(P+I) ) - CALL DLARF( 'L', M-P-Q-I+1, M-P-Q-I, X22(P+I,Q+I), 1, - $ TAUQ2(P+I), X22(P+I,Q+I+1), LDX22, WORK ) + CALL DLARF1F( 'L', M-P-Q-I+1, M-P-Q-I, X22(P+I,Q+I), + $ 1, TAUQ2(P+I), X22(P+I,Q+I+1), LDX22, + $ WORK ) END IF - X22(P+I,Q+I) = ONE * END DO * diff --git a/lapack-netlib/SRC/dorbdb1.f b/lapack-netlib/SRC/dorbdb1.f index b5b2d13623..c52293bb68 100644 --- a/lapack-netlib/SRC/dorbdb1.f +++ b/lapack-netlib/SRC/dorbdb1.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DORBDB1 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -174,7 +172,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup unbdb1 * *> \par Further Details: * ===================== @@ -198,8 +196,10 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE DORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE DORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -227,7 +227,8 @@ SUBROUTINE DORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL DLARF, DLARFGP, DORBDB5, DROT, XERBLA + EXTERNAL DLARF1F, DLARFGP, DORBDB5, DROT, + $ XERBLA * .. * .. External Functions .. DOUBLE PRECISION DNRM2 @@ -285,22 +286,23 @@ SUBROUTINE DORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, THETA(I) = ATAN2( X21(I,I), X11(I,I) ) C = COS( THETA(I) ) S = SIN( THETA(I) ) - X11(I,I) = ONE - X21(I,I) = ONE - CALL DLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I,I+1), + CALL DLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I, + $ I+1), $ LDX11, WORK(ILARF) ) - CALL DLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + CALL DLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), $ X21(I,I+1), LDX21, WORK(ILARF) ) * IF( I .LT. Q ) THEN - CALL DROT( Q-I, X11(I,I+1), LDX11, X21(I,I+1), LDX21, C, S ) - CALL DLARFGP( Q-I, X21(I,I+1), X21(I,I+2), LDX21, TAUQ1(I) ) + CALL DROT( Q-I, X11(I,I+1), LDX11, X21(I,I+1), LDX21, C, + $ S ) + CALL DLARFGP( Q-I, X21(I,I+1), X21(I,I+2), LDX21, + $ TAUQ1(I) ) S = X21(I,I+1) - X21(I,I+1) = ONE - CALL DLARF( 'R', P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), + CALL DLARF1F( 'R', P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), $ X11(I+1,I+1), LDX11, WORK(ILARF) ) - CALL DLARF( 'R', M-P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), - $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + CALL DLARF1F( 'R', M-P-I, Q-I, X21(I,I+1), LDX21, + $ TAUQ1(I), X21(I+1,I+1), LDX21, + $ WORK(ILARF) ) C = SQRT( DNRM2( P-I, X11(I+1,I+1), 1 )**2 $ + DNRM2( M-P-I, X21(I+1,I+1), 1 )**2 ) PHI(I) = ATAN2( S, C ) diff --git a/lapack-netlib/SRC/dorbdb2.f b/lapack-netlib/SRC/dorbdb2.f index 0b4ad732c1..8a5b8d9bae 100644 --- a/lapack-netlib/SRC/dorbdb2.f +++ b/lapack-netlib/SRC/dorbdb2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DORBDB2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -173,7 +171,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup unbdb2 * *> \par Further Details: * ===================== @@ -197,8 +195,10 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE DORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE DORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -226,7 +226,8 @@ SUBROUTINE DORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL DLARF, DLARFGP, DORBDB5, DROT, DSCAL, XERBLA + EXTERNAL DLARF1F, DLARFGP, DORBDB5, DROT, DSCAL, + $ XERBLA * .. * .. External Functions .. DOUBLE PRECISION DNRM2 @@ -280,15 +281,15 @@ SUBROUTINE DORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, DO I = 1, P * IF( I .GT. 1 ) THEN - CALL DROT( Q-I+1, X11(I,I), LDX11, X21(I-1,I), LDX21, C, S ) + CALL DROT( Q-I+1, X11(I,I), LDX11, X21(I-1,I), LDX21, C, + $ S ) END IF CALL DLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) C = X11(I,I) - X11(I,I) = ONE - CALL DLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + CALL DLARF1F( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), $ X11(I+1,I), LDX11, WORK(ILARF) ) - CALL DLARF( 'R', M-P-I+1, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X21(I,I), LDX21, WORK(ILARF) ) + CALL DLARF1F( 'R', M-P-I+1, Q-I+1, X11(I,I), LDX11, + $ TAUQ1(I), X21(I,I), LDX21, WORK(ILARF) ) S = SQRT( DNRM2( P-I, X11(I+1,I), 1 )**2 $ + DNRM2( M-P-I+1, X21(I,I), 1 )**2 ) THETA(I) = ATAN2( S, C ) @@ -303,12 +304,10 @@ SUBROUTINE DORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, PHI(I) = ATAN2( X11(I+1,I), X21(I,I) ) C = COS( PHI(I) ) S = SIN( PHI(I) ) - X11(I+1,I) = ONE - CALL DLARF( 'L', P-I, Q-I, X11(I+1,I), 1, TAUP1(I), + CALL DLARF1F( 'L', P-I, Q-I, X11(I+1,I), 1, TAUP1(I), $ X11(I+1,I+1), LDX11, WORK(ILARF) ) END IF - X21(I,I) = ONE - CALL DLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + CALL DLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), $ X21(I,I+1), LDX21, WORK(ILARF) ) * END DO @@ -317,8 +316,7 @@ SUBROUTINE DORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * DO I = P + 1, Q CALL DLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) - X21(I,I) = ONE - CALL DLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + CALL DLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), $ X21(I,I+1), LDX21, WORK(ILARF) ) END DO * diff --git a/lapack-netlib/SRC/dorbdb3.f b/lapack-netlib/SRC/dorbdb3.f index 79b10a5d36..2bf96d21a2 100644 --- a/lapack-netlib/SRC/dorbdb3.f +++ b/lapack-netlib/SRC/dorbdb3.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DORBDB3 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -172,7 +170,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup unbdb3 * *> \par Further Details: * ===================== @@ -196,8 +194,10 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE DORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE DORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -214,10 +214,6 @@ SUBROUTINE DORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * * ==================================================================== * -* .. Parameters .. - DOUBLE PRECISION ONE - PARAMETER ( ONE = 1.0D0 ) -* .. * .. Local Scalars .. DOUBLE PRECISION C, S INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, @@ -225,7 +221,8 @@ SUBROUTINE DORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL DLARF, DLARFGP, DORBDB5, DROT, XERBLA + EXTERNAL DLARF1F, DLARFGP, DORBDB5, DROT, + $ XERBLA * .. * .. External Functions .. DOUBLE PRECISION DNRM2 @@ -279,15 +276,15 @@ SUBROUTINE DORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, DO I = 1, M-P * IF( I .GT. 1 ) THEN - CALL DROT( Q-I+1, X11(I-1,I), LDX11, X21(I,I), LDX11, C, S ) + CALL DROT( Q-I+1, X11(I-1,I), LDX11, X21(I,I), LDX11, C, + $ S ) END IF * CALL DLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) S = X21(I,I) - X21(I,I) = ONE - CALL DLARF( 'R', P-I+1, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + CALL DLARF1F( 'R', P-I+1, Q-I+1, X21(I,I), LDX21, TAUQ1(I), $ X11(I,I), LDX11, WORK(ILARF) ) - CALL DLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + CALL DLARF1F( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), $ X21(I+1,I), LDX21, WORK(ILARF) ) C = SQRT( DNRM2( P-I+1, X11(I,I), 1 )**2 $ + DNRM2( M-P-I, X21(I+1,I), 1 )**2 ) @@ -298,16 +295,16 @@ SUBROUTINE DORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ WORK(IORBDB5), LORBDB5, CHILDINFO ) CALL DLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) IF( I .LT. M-P ) THEN - CALL DLARFGP( M-P-I, X21(I+1,I), X21(I+2,I), 1, TAUP2(I) ) + CALL DLARFGP( M-P-I, X21(I+1,I), X21(I+2,I), 1, + $ TAUP2(I) ) PHI(I) = ATAN2( X21(I+1,I), X11(I,I) ) C = COS( PHI(I) ) S = SIN( PHI(I) ) - X21(I+1,I) = ONE - CALL DLARF( 'L', M-P-I, Q-I, X21(I+1,I), 1, TAUP2(I), + CALL DLARF1F( 'L', M-P-I, Q-I, X21(I+1,I), 1, TAUP2(I), $ X21(I+1,I+1), LDX21, WORK(ILARF) ) END IF - X11(I,I) = ONE - CALL DLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I,I+1), + CALL DLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I, + $ I+1), $ LDX11, WORK(ILARF) ) * END DO @@ -316,8 +313,8 @@ SUBROUTINE DORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * DO I = M-P + 1, Q CALL DLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) - X11(I,I) = ONE - CALL DLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I,I+1), + CALL DLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I, + $ I+1), $ LDX11, WORK(ILARF) ) END DO * diff --git a/lapack-netlib/SRC/dorbdb4.f b/lapack-netlib/SRC/dorbdb4.f index 985be3277e..6a218fa8ad 100644 --- a/lapack-netlib/SRC/dorbdb4.f +++ b/lapack-netlib/SRC/dorbdb4.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DORBDB4 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -183,7 +181,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup unbdb4 * *> \par Further Details: * ===================== @@ -207,9 +205,11 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE DORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE DORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, $ INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -237,7 +237,8 @@ SUBROUTINE DORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL DLARF, DLARFGP, DORBDB5, DROT, DSCAL, XERBLA + EXTERNAL DLARF, DLARFGP, DORBDB5, DROT, DSCAL, + $ XERBLA * .. * .. External Functions .. DOUBLE PRECISION DNRM2 @@ -300,42 +301,39 @@ SUBROUTINE DORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ LORBDB5, CHILDINFO ) CALL DSCAL( P, NEGONE, PHANTOM(1), 1 ) CALL DLARFGP( P, PHANTOM(1), PHANTOM(2), 1, TAUP1(1) ) - CALL DLARFGP( M-P, PHANTOM(P+1), PHANTOM(P+2), 1, TAUP2(1) ) + CALL DLARFGP( M-P, PHANTOM(P+1), PHANTOM(P+2), 1, + $ TAUP2(1) ) THETA(I) = ATAN2( PHANTOM(1), PHANTOM(P+1) ) C = COS( THETA(I) ) S = SIN( THETA(I) ) - PHANTOM(1) = ONE - PHANTOM(P+1) = ONE - CALL DLARF( 'L', P, Q, PHANTOM(1), 1, TAUP1(1), X11, LDX11, - $ WORK(ILARF) ) - CALL DLARF( 'L', M-P, Q, PHANTOM(P+1), 1, TAUP2(1), X21, - $ LDX21, WORK(ILARF) ) + CALL DLARF1F( 'L', P, Q, PHANTOM(1), 1, TAUP1(1), X11, + $ LDX11, WORK(ILARF) ) + CALL DLARF1F( 'L', M-P, Q, PHANTOM(P+1), 1, TAUP2(1), + $ X21, LDX21, WORK(ILARF) ) ELSE CALL DORBDB5( P-I+1, M-P-I+1, Q-I+1, X11(I,I-1), 1, $ X21(I,I-1), 1, X11(I,I), LDX11, X21(I,I), $ LDX21, WORK(IORBDB5), LORBDB5, CHILDINFO ) CALL DSCAL( P-I+1, NEGONE, X11(I,I-1), 1 ) - CALL DLARFGP( P-I+1, X11(I,I-1), X11(I+1,I-1), 1, TAUP1(I) ) + CALL DLARFGP( P-I+1, X11(I,I-1), X11(I+1,I-1), 1, + $ TAUP1(I) ) CALL DLARFGP( M-P-I+1, X21(I,I-1), X21(I+1,I-1), 1, $ TAUP2(I) ) THETA(I) = ATAN2( X11(I,I-1), X21(I,I-1) ) C = COS( THETA(I) ) S = SIN( THETA(I) ) - X11(I,I-1) = ONE - X21(I,I-1) = ONE - CALL DLARF( 'L', P-I+1, Q-I+1, X11(I,I-1), 1, TAUP1(I), - $ X11(I,I), LDX11, WORK(ILARF) ) - CALL DLARF( 'L', M-P-I+1, Q-I+1, X21(I,I-1), 1, TAUP2(I), - $ X21(I,I), LDX21, WORK(ILARF) ) + CALL DLARF1F( 'L', P-I+1, Q-I+1, X11(I,I-1), 1, TAUP1(I), + $ X11(I,I), LDX11, WORK(ILARF) ) + CALL DLARF1F( 'L', M-P-I+1, Q-I+1, X21(I,I-1), 1, + $ TAUP2(I), X21(I,I), LDX21, WORK(ILARF) ) END IF * CALL DROT( Q-I+1, X11(I,I), LDX11, X21(I,I), LDX21, S, -C ) CALL DLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) C = X21(I,I) - X21(I,I) = ONE - CALL DLARF( 'R', P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + CALL DLARF1F( 'R', P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), $ X11(I+1,I), LDX11, WORK(ILARF) ) - CALL DLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + CALL DLARF1F( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), $ X21(I+1,I), LDX21, WORK(ILARF) ) IF( I .LT. M-Q ) THEN S = SQRT( DNRM2( P-I, X11(I+1,I), 1 )**2 @@ -349,20 +347,20 @@ SUBROUTINE DORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * DO I = M - Q + 1, P CALL DLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) - X11(I,I) = ONE - CALL DLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + CALL DLARF1F( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), $ X11(I+1,I), LDX11, WORK(ILARF) ) - CALL DLARF( 'R', Q-P, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + CALL DLARF1F( 'R', Q-P, Q-I+1, X11(I,I), LDX11, TAUQ1(I), $ X21(M-Q+1,I), LDX21, WORK(ILARF) ) END DO * * Reduce the bottom-right portion of X21 to [ 0 I ] * DO I = P + 1, Q - CALL DLARFGP( Q-I+1, X21(M-Q+I-P,I), X21(M-Q+I-P,I+1), LDX21, + CALL DLARFGP( Q-I+1, X21(M-Q+I-P,I), X21(M-Q+I-P,I+1), + $ LDX21, $ TAUQ1(I) ) - X21(M-Q+I-P,I) = ONE - CALL DLARF( 'R', Q-I, Q-I+1, X21(M-Q+I-P,I), LDX21, TAUQ1(I), + CALL DLARF1F( 'R', Q-I, Q-I+1, X21(M-Q+I-P,I), LDX21, + $ TAUQ1(I), $ X21(M-Q+I-P+1,I), LDX21, WORK(ILARF) ) END DO * diff --git a/lapack-netlib/SRC/dorg2l.f b/lapack-netlib/SRC/dorg2l.f index 0a42d4cf5a..5111fa19ff 100644 --- a/lapack-netlib/SRC/dorg2l.f +++ b/lapack-netlib/SRC/dorg2l.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DORG2L + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,10 +105,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup ung2l * * ===================================================================== SUBROUTINE DORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -133,7 +132,7 @@ SUBROUTINE DORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, II, J, L * .. * .. External Subroutines .. - EXTERNAL DLARF, DSCAL, XERBLA + EXTERNAL DLARF1L, DSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -176,8 +175,9 @@ SUBROUTINE DORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) * * Apply H(i) to A(1:m-k+i,1:n-k+i) from the left * - A( M-N+II, II ) = ONE - CALL DLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + !A(M-N+II, II) = ONE + CALL DLARF1L( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), + $ A, $ LDA, WORK ) CALL DSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) A( M-N+II, II ) = ONE - TAU( I ) diff --git a/lapack-netlib/SRC/dorg2r.f b/lapack-netlib/SRC/dorg2r.f index c64ad4b0ac..213a2d54c5 100644 --- a/lapack-netlib/SRC/dorg2r.f +++ b/lapack-netlib/SRC/dorg2r.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DORG2R + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,10 +105,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup ung2r * * ===================================================================== SUBROUTINE DORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -133,7 +132,7 @@ SUBROUTINE DORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, J, L * .. * .. External Subroutines .. - EXTERNAL DLARF, DSCAL, XERBLA + EXTERNAL DLARF1F, DSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -176,8 +175,7 @@ SUBROUTINE DORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) * Apply H(i) to A(i:m,i:n) from the left * IF( I.LT.N ) THEN - A( I, I ) = ONE - CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + CALL DLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), $ A( I, I+1 ), LDA, WORK ) END IF IF( I.LT.M ) diff --git a/lapack-netlib/SRC/dorgl2.f b/lapack-netlib/SRC/dorgl2.f index ce1d2c6750..d8f10ebbe5 100644 --- a/lapack-netlib/SRC/dorgl2.f +++ b/lapack-netlib/SRC/dorgl2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DORGL2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -106,10 +104,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup ungl2 * * ===================================================================== SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -132,7 +131,7 @@ SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, J, L * .. * .. External Subroutines .. - EXTERNAL DLARF, DSCAL, XERBLA + EXTERNAL DLARF1F, DSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -180,8 +179,7 @@ SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) * IF( I.LT.N ) THEN IF( I.LT.M ) THEN - A( I, I ) = ONE - CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + CALL DLARF1F( 'Right', M-I, N-I+1, A( I, I ), LDA, $ TAU( I ), A( I+1, I ), LDA, WORK ) END IF CALL DSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) diff --git a/lapack-netlib/SRC/dorm2l.f b/lapack-netlib/SRC/dorm2l.f index c99039c541..f86a12f3a6 100644 --- a/lapack-netlib/SRC/dorm2l.f +++ b/lapack-netlib/SRC/dorm2l.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DORM2L + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,6 @@ *> The i-th column must contain the vector which defines the *> elementary reflector H(i), for i = 1,2,...,k, as returned by *> DGEQLF in the last k columns of its array argument A. -*> A is modified by the routine but restored on exit. *> \endverbatim *> *> \param[in] LDA @@ -151,11 +148,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup unm2l * * ===================================================================== SUBROUTINE DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -178,14 +176,13 @@ SUBROUTINE DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, MI, NI, NQ - DOUBLE PRECISION AII * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL DLARF, XERBLA + EXTERNAL DLARF1L, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -262,11 +259,8 @@ SUBROUTINE DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * Apply H(i) * - AII = A( NQ-K+I, I ) - A( NQ-K+I, I ) = ONE - CALL DLARF( SIDE, MI, NI, A( 1, I ), 1, TAU( I ), C, LDC, + CALL DLARF1L( SIDE, MI, NI, A( 1, I ), 1, TAU( I ), C, LDC, $ WORK ) - A( NQ-K+I, I ) = AII 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/dorm2r.f b/lapack-netlib/SRC/dorm2r.f index ac88eec8dc..0bda2b1497 100644 --- a/lapack-netlib/SRC/dorm2r.f +++ b/lapack-netlib/SRC/dorm2r.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DORM2R + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,6 @@ *> The i-th column must contain the vector which defines the *> elementary reflector H(i), for i = 1,2,...,k, as returned by *> DGEQRF in the first k columns of its array argument A. -*> A is modified by the routine but restored on exit. *> \endverbatim *> *> \param[in] LDA @@ -151,11 +148,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup unm2r * * ===================================================================== SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -178,14 +176,13 @@ SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ - DOUBLE PRECISION AII * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL DLARF, XERBLA + EXTERNAL XERBLA, DLARF1F * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -266,11 +263,9 @@ SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * Apply H(i) * - AII = A( I, I ) - A( I, I ) = ONE - CALL DLARF( SIDE, MI, NI, A( I, I ), 1, TAU( I ), C( IC, JC ), + CALL DLARF1F( SIDE, MI, NI, A( I, I ), 1, TAU( I ), C( IC, + $ JC ), $ LDC, WORK ) - A( I, I ) = AII 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/dorml2.f b/lapack-netlib/SRC/dorml2.f index a9ddd460d8..f5f8957b5b 100644 --- a/lapack-netlib/SRC/dorml2.f +++ b/lapack-netlib/SRC/dorml2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DORML2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -100,7 +98,6 @@ *> The i-th row must contain the vector which defines the *> elementary reflector H(i), for i = 1,2,...,k, as returned by *> DGELQF in the first k rows of its array argument A. -*> A is modified by the routine but restored on exit. *> \endverbatim *> *> \param[in] LDA @@ -151,11 +148,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup unml2 * * ===================================================================== SUBROUTINE DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -178,14 +176,13 @@ SUBROUTINE DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ - DOUBLE PRECISION AII * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL DLARF, XERBLA + EXTERNAL DLARF1F, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -266,11 +263,8 @@ SUBROUTINE DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * Apply H(i) * - AII = A( I, I ) - A( I, I ) = ONE - CALL DLARF( SIDE, MI, NI, A( I, I ), LDA, TAU( I ), + CALL DLARF1F( SIDE, MI, NI, A( I, I ), LDA, TAU( I ), $ C( IC, JC ), LDC, WORK ) - A( I, I ) = AII 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/dspgvd.f b/lapack-netlib/SRC/dspgvd.f index 3d5afbf8ee..d9a3936a3e 100644 --- a/lapack-netlib/SRC/dspgvd.f +++ b/lapack-netlib/SRC/dspgvd.f @@ -225,7 +225,8 @@ SUBROUTINE DSPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + DOUBLE PRECISION DROUNDUP_LWORK + EXTERNAL LSAME, DROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL DPPTRF, DSPEVD, DSPGST, DTPMV, DTPSV, XERBLA diff --git a/lapack-netlib/SRC/dsygvd.f b/lapack-netlib/SRC/dsygvd.f index 6b585bae34..d0f9633f5c 100644 --- a/lapack-netlib/SRC/dsygvd.f +++ b/lapack-netlib/SRC/dsygvd.f @@ -245,7 +245,8 @@ SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, * .. * .. External Functions .. LOGICAL LSAME - EXTERNAL LSAME + DOUBLE PRECISION DROUNDUP_LWORK + EXTERNAL LSAME, DROUNDUP_LWORK * .. * .. External Subroutines .. EXTERNAL DPOTRF, DSYEVD, DSYGST, DTRMM, DTRSM, XERBLA diff --git a/lapack-netlib/SRC/dtgsen.f b/lapack-netlib/SRC/dtgsen.f index 8de66b6a17..d793df7129 100644 --- a/lapack-netlib/SRC/dtgsen.f +++ b/lapack-netlib/SRC/dtgsen.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DTGSEN + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -256,7 +254,7 @@ *> \verbatim *> LWORK is INTEGER *> The dimension of the array WORK. LWORK >= 4*N+16. -*> If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M)). +*> If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M) + 1). *> If IJOB = 3 or 5, LWORK >= MAX(4*N+16, 4*M*(N-M)). *> *> If LWORK = -1, then a workspace query is assumed; the routine @@ -304,7 +302,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup tgsen * *> \par Further Details: * ===================== @@ -445,9 +443,11 @@ *> \endverbatim *> * ===================================================================== - SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, + SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, + $ LDB, $ ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, M, PL, $ PR, DIF, WORK, LWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -486,7 +486,8 @@ SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, INTEGER ISAVE( 3 ) * .. * .. External Subroutines .. - EXTERNAL DLACN2, DLACPY, DLAG2, DLASSQ, DTGEXC, DTGSYL, + EXTERNAL DLACN2, DLACPY, DLAG2, DLASSQ, DTGEXC, + $ DTGSYL, $ XERBLA * .. * .. External Functions .. @@ -561,7 +562,7 @@ SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, END IF * IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN - LWMIN = MAX( 1, 4*N+16, 2*M*( N-M ) ) + LWMIN = MAX( 1, 4*N+16, 2*M*( N-M ) + 1 ) LIWMIN = MAX( 1, N+6 ) ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN LWMIN = MAX( 1, 4*N+16, 4*M*( N-M ) ) @@ -634,7 +635,8 @@ SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * KK = K IF( K.NE.KS ) - $ CALL DTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ CALL DTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, + $ LDQ, $ Z, LDZ, KK, KS, WORK, LWORK, IERR ) * IF( IERR.GT.0 ) THEN @@ -668,7 +670,8 @@ SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, I = N1 + 1 IJB = 0 CALL DLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 ) - CALL DLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ), + CALL DLACPY( 'Full', N1, N2, B( 1, I ), LDB, + $ WORK( N1*N2+1 ), $ N1 ) CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1, @@ -710,14 +713,16 @@ SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Frobenius norm-based Difu-estimate. * - CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), $ N1, DSCALE, DIF( 1 ), WORK( 2*N1*N2+1 ), $ LWORK-2*N1*N2, IWORK, IERR ) * * Frobenius norm-based Difl-estimate. * - CALL DTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK, + CALL DTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, $ N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ), $ N2, DSCALE, DIF( 2 ), WORK( 2*N1*N2+1 ), $ LWORK-2*N1*N2, IWORK, IERR ) @@ -746,7 +751,8 @@ SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve generalized Sylvester equation. * - CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), + $ LDA, $ WORK, N1, B, LDB, B( I, I ), LDB, $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, @@ -755,7 +761,8 @@ SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve the transposed variant. * - CALL DTGSYL( 'T', IJB, N1, N2, A, LDA, A( I, I ), LDA, + CALL DTGSYL( 'T', IJB, N1, N2, A, LDA, A( I, I ), + $ LDA, $ WORK, N1, B, LDB, B( I, I ), LDB, $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, @@ -775,7 +782,8 @@ SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve generalized Sylvester equation. * - CALL DTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + CALL DTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, + $ LDA, $ WORK, N2, B( I, I ), LDB, B, LDB, $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, @@ -784,7 +792,8 @@ SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve the transposed variant. * - CALL DTGSYL( 'T', IJB, N2, N1, A( I, I ), LDA, A, LDA, + CALL DTGSYL( 'T', IJB, N2, N1, A( I, I ), LDA, A, + $ LDA, $ WORK, N2, B( I, I ), LDB, B, LDB, $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, @@ -826,7 +835,8 @@ SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, WORK( 6 ) = B( K+1, K ) WORK( 7 ) = B( K, K+1 ) WORK( 8 ) = B( K+1, K+1 ) - CALL DLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA( K ), + CALL DLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, + $ BETA( K ), $ BETA( K+1 ), ALPHAR( K ), ALPHAR( K+1 ), $ ALPHAI( K ) ) ALPHAI( K+1 ) = -ALPHAI( K ) diff --git a/lapack-netlib/SRC/dtrsyl3.f b/lapack-netlib/SRC/dtrsyl3.f index 31a5230ba5..f0805e584f 100644 --- a/lapack-netlib/SRC/dtrsyl3.f +++ b/lapack-netlib/SRC/dtrsyl3.f @@ -1,10 +1,27 @@ *> \brief \b DTRSYL3 * -* Definition: -* =========== -* -* -*> \par Purpose +* Definition: +* =========== +* +* SUBROUTINE DTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, +* C, +* LDC, SCALE, IWORK, LIWORK, SWORK, LDSWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANA, TRANB +* INTEGER INFO, ISGN, LDA, LDB, LDC, M, N, +* LIWORK, LDSWORK +* DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ), +* SWORK( LDSWORK, * ) +* .. +* +* +*> \par Purpose: * ============= *> *> \verbatim @@ -27,8 +44,8 @@ *> This is the block version of the algorithm. *> \endverbatim * -* Arguments -* ========= +* Arguments: +* ========== * *> \param[in] TRANA *> \verbatim @@ -161,6 +178,8 @@ *> A and B are unchanged). *> \endverbatim * +*> \ingroup trsyl3 +* * ===================================================================== * References: * E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of @@ -175,9 +194,9 @@ * Angelika Schwarz, Umea University, Sweden. * * ===================================================================== - SUBROUTINE DTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, - $ LDC, SCALE, IWORK, LIWORK, SWORK, LDSWORK, - $ INFO ) + SUBROUTINE DTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, + $ C, LDC, SCALE, IWORK, LIWORK, SWORK, + $ LDSWORK, INFO ) IMPLICIT NONE * * .. Scalar Arguments .. @@ -209,10 +228,12 @@ SUBROUTINE DTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, LOGICAL LSAME INTEGER ILAENV DOUBLE PRECISION DLANGE, DLAMCH, DLARMM - EXTERNAL DLANGE, DLAMCH, DLARMM, ILAENV, LSAME + EXTERNAL DLANGE, DLAMCH, DLARMM, ILAENV, + $ LSAME * .. * .. External Subroutines .. - EXTERNAL DGEMM, DLASCL, DSCAL, DTRSYL, XERBLA + EXTERNAL DGEMM, DLASCL, DSCAL, DTRSYL, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, EXPONENT, MAX, MIN @@ -239,7 +260,6 @@ SUBROUTINE DTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, LQUERY = ( LIWORK.EQ.-1 .OR. LDSWORK.EQ.-1 ) IWORK( 1 ) = NBA + NBB + 2 IF( LQUERY ) THEN - LDSWORK = 2 SWORK( 1, 1 ) = MAX( NBA, NBB ) SWORK( 2, 1 ) = 2 * NBB + NBA END IF @@ -1220,7 +1240,8 @@ SUBROUTINE DTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, * SCALOC = MIN( BIGNUM / SCAL, ONE / BUF ) BUF = BUF * SCALOC - CALL DLASCL( 'G', -1, -1, ONE, SCALOC, M, N, C, LDC, IWORK(1) ) + CALL DLASCL( 'G', -1, -1, ONE, SCALOC, M, N, C, LDC, + $ IWORK(1) ) END IF * * Combine with buffer scaling factor. SCALE will be flushed if diff --git a/lapack-netlib/SRC/lapacke_cgesvdq_work.c b/lapack-netlib/SRC/lapacke_cgesvdq_work.c new file mode 100644 index 0000000000..6601c8a248 --- /dev/null +++ b/lapack-netlib/SRC/lapacke_cgesvdq_work.c @@ -0,0 +1,165 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function cgesvdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_cgesvdq_work( int matrix_layout, char joba, char jobp, + char jobr, char jobu, char jobv, + lapack_int m, lapack_int n, lapack_complex_float* a, + lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, + lapack_complex_float* v, lapack_int ldv, lapack_int* numrank, + lapack_int* iwork, lapack_int liwork, + lapack_complex_float* cwork, lapack_int lcwork, + float* rwork, lapack_int lrwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda, s, u, &ldu, v, &ldv, + numrank, iwork, &liwork, cwork, &lcwork, rwork, &lrwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int nrows_u = LAPACKE_lsame( jobu, 'a' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) || + LAPACKE_lsame( jobu, 's' ) ? m : 1; + lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m : + ( LAPACKE_lsame( jobu, 's' ) || + (LAPACKE_lsame( jobu, 'u' ) ) ? MIN(m,n) : 1); + lapack_int nrows_v = ( LAPACKE_lsame( jobv, 'a' ) || + LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' )) ? n : 1; + lapack_int lda_t = MAX(1,m); + lapack_int ldu_t = MAX(1,nrows_u); + lapack_int ldv_t = MAX(1,nrows_v); + lapack_complex_float* a_t = NULL; + lapack_complex_float* u_t = NULL; + lapack_complex_float* v_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); + return info; + } + if( ldu < ncols_u ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); + return info; + } + if( ldv < n ) { + info = -14; + LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lcwork == -1 ) { + LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, + s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, + cwork, &lcwork, rwork, &lrwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + u_t = (lapack_complex_float*) + LAPACKE_malloc( sizeof(lapack_complex_float) * ldu_t * MAX(1,ncols_u) ); + if( u_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + } + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' ) ) { + v_t = (lapack_complex_float*) + LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) ); + if( v_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + } + /* Transpose input matrices */ + LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, + s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, + cwork, &lcwork, rwork, &lrwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + if( LAPACKE_lsame( jobu, 'a' ) ||LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, + u, ldu ); + } + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' )) { + LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v, + ldv ); + } + /* Release memory and exit */ + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' ) ) { + LAPACKE_free( v_t ); + } +exit_level_2: + if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + LAPACKE_free( u_t ); + } +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); + } + return info; +} diff --git a/lapack-netlib/SRC/lapacke_dgesvdq_work.c b/lapack-netlib/SRC/lapacke_dgesvdq_work.c new file mode 100644 index 0000000000..3a3e764508 --- /dev/null +++ b/lapack-netlib/SRC/lapacke_dgesvdq_work.c @@ -0,0 +1,165 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function dgesvdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dgesvdq_work( int matrix_layout, char joba, char jobp, + char jobr, char jobu, char jobv, + lapack_int m, lapack_int n, double* a, + lapack_int lda, double* s, double* u, lapack_int ldu, + double* v, lapack_int ldv, lapack_int* numrank, + lapack_int* iwork, lapack_int liwork, + double* work, lapack_int lwork, + double* rwork, lapack_int lrwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_dgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda, s, u, &ldu, v, &ldv, + numrank, iwork, &liwork, work, &lwork, rwork, &lrwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) || + LAPACKE_lsame( jobu, 's' ) ) ? m : 1; + lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m : + ( (LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) ) ? MIN(m,n) : 1); + lapack_int nrows_v = ( LAPACKE_lsame( jobv, 'a' ) || + LAPACKE_lsame( jobu, 'v' ) || + LAPACKE_lsame( jobu, 'r' )) ? n : 1; + lapack_int lda_t = MAX(1,m); + lapack_int ldu_t = MAX(1,nrows_u); + lapack_int ldv_t = MAX(1,nrows_v); + double* a_t = NULL; + double* u_t = NULL; + double* v_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_dgesvdq_work", info ); + return info; + } + if( ldu < ncols_u ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_dgesvdq_work", info ); + return info; + } + if( ldv < n ) { + info = -14; + LAPACKE_xerbla( "LAPACKE_dgesvdq_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_dgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, + s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, + work, &lwork, rwork, &lrwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + u_t = (double*) + LAPACKE_malloc( sizeof(double) * ldu_t * MAX(1,ncols_u) ); + if( u_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + } + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobu, 'r' ) ) { + v_t = (double*) + LAPACKE_malloc( sizeof(double) * ldv_t * MAX(1,n) ); + if( v_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + } + /* Transpose input matrices */ + LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_dgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, + s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, + work, &lwork, rwork, &lrwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + LAPACKE_dge_trans)( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, + u, ldu ); + } + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobu, 'r' )) { + LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v, + ldv ); + } + /* Release memory and exit */ + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobu, 'r' ) ) { + LAPACKE_free( v_t ); + } +exit_level_2: + if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + LAPACKE_free( u_t ); + } +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dgesvdq_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_dgesvdq_work", info ); + } + return info; +} diff --git a/lapack-netlib/SRC/lapacke_sgesvdq_work.c b/lapack-netlib/SRC/lapacke_sgesvdq_work.c new file mode 100644 index 0000000000..f786d75f76 --- /dev/null +++ b/lapack-netlib/SRC/lapacke_sgesvdq_work.c @@ -0,0 +1,165 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function sgesvdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_sgesvdq_work( int matrix_layout, char joba, char jobp, + char jobr, char jobu, char jobv, + lapack_int m, lapack_int n, float* a, + lapack_int lda, float* s, float* u, lapack_int ldu, + float* v, lapack_int ldv, lapack_int* numrank, + lapack_int* iwork, lapack_int liwork, + float* work, lapack_int lwork, + float* rwork, lapack_int lrwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_sgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda, s, u, &ldu, v, &ldv, + numrank, iwork, &liwork, work, &lwork, rwork, &lrwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) || + LAPACKE_lsame( jobu, 's' ) ) ? m : 1; + lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m : + ( (LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) ) ? MIN(m,n) : 1); + lapack_int nrows_v = ( LAPACKE_lsame( jobv, 'a' ) || + LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' )) ? n : 1; + lapack_int lda_t = MAX(1,m); + lapack_int ldu_t = MAX(1,nrows_u); + lapack_int ldv_t = MAX(1,nrows_v); + float* a_t = NULL; + float* u_t = NULL; + float* v_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_sgesvdq_work", info ); + return info; + } + if( ldu < ncols_u ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_sgesvdq_work", info ); + return info; + } + if( ldv < n ) { + info = -14; + LAPACKE_xerbla( "LAPACKE_sgesvdq_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_sgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, + s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, + work, &lwork, rwork, &lrwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + u_t = (float*) + LAPACKE_malloc( sizeof(float) * ldu_t * MAX(1,ncols_u) ); + if( u_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + } + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' ) ) { + v_t = (float*) + LAPACKE_malloc( sizeof(float) * ldv_t * MAX(1,n) ); + if( v_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + } + /* Transpose input matrices */ + LAPACKE_sge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_sgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, + s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, + work, &lwork, rwork, &lrwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, + u, ldu ); + } + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' )) { + LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v, + ldv ); + } + /* Release memory and exit */ + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' ) ) { + LAPACKE_free( v_t ); + } +exit_level_2: + if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + LAPACKE_free( u_t ); + } +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_sgesvdq_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_sgesvdq_work", info ); + } + return info; +} diff --git a/lapack-netlib/SRC/lapacke_zgesvdq_work.c b/lapack-netlib/SRC/lapacke_zgesvdq_work.c new file mode 100644 index 0000000000..32f6c02968 --- /dev/null +++ b/lapack-netlib/SRC/lapacke_zgesvdq_work.c @@ -0,0 +1,165 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zgesvdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zgesvdq_work( int matrix_layout, char joba, char jobp, + char jobr, char jobu, char jobv, + lapack_int m, lapack_int n, lapack_complex_double* a, + lapack_int lda, double* s, lapack_complex_double* u, lapack_int ldu, + lapack_complex_double* v, lapack_int ldv, lapack_int* numrank, + lapack_int* iwork, lapack_int liwork, + lapack_complex_double* cwork, lapack_int lcwork, + double* rwork, lapack_int lrwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_zgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda, s, u, &ldu, v, &ldv, + numrank, iwork, &liwork, cwork, &lcwork, rwork, &lrwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) || + LAPACKE_lsame( jobu, 's' ) ) ? m : 1; + lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m : + ( (LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) ) ? MIN(m,n) : 1); + lapack_int nrows_v = ( LAPACKE_lsame( jobv, 'a' ) || + LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' )) ? n : 1; + lapack_int lda_t = MAX(1,m); + lapack_int ldu_t = MAX(1,nrows_u); + lapack_int ldv_t = MAX(1,nrows_v); + lapack_complex_double* a_t = NULL; + lapack_complex_double* u_t = NULL; + lapack_complex_double* v_t = NULL; + /* Check leading dimension(s) */ + if( lda < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_zgesvdq_work", info ); + return info; + } + if( ldu < ncols_u ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_zgesvdq_work", info ); + return info; + } + if( ldv < n ) { + info = -14; + LAPACKE_xerbla( "LAPACKE_zgesvdq_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lcwork == -1 ) { + LAPACK_zgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, + s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, + cwork, &lcwork, rwork, &lrwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + a_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); + if( a_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + u_t = (lapack_complex_double*) + LAPACKE_malloc( sizeof(lapack_complex_double) * ldu_t * MAX(1,ncols_u) ); + if( u_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + } + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' ) ) { + v_t = (lapack_complex_double*) + LAPACKE_malloc( sizeof(lapack_complex_double) * ldv_t * MAX(1,n) ); + if( v_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + } + /* Transpose input matrices */ + LAPACKE_zge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, + s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, + cwork, &lcwork, rwork, &lrwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); + if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, + u, ldu ); + } + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' )) { + LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v, + ldv ); + } + /* Release memory and exit */ + if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 'v' ) || + LAPACKE_lsame( jobv, 'r' ) ) { + LAPACKE_free( v_t ); + } +exit_level_2: + if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) || + LAPACKE_lsame( jobu, 'u' ) || + LAPACKE_lsame( jobu, 'r' ) || + LAPACKE_lsame( jobu, 'f' ) ) { + LAPACKE_free( u_t ); + } +exit_level_1: + LAPACKE_free( a_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zgesvdq_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zgesvdq_work", info ); + } + return info; +} diff --git a/lapack-netlib/SRC/sgebd2.f b/lapack-netlib/SRC/sgebd2.f index cb57ed6780..b49421b261 100644 --- a/lapack-netlib/SRC/sgebd2.f +++ b/lapack-netlib/SRC/sgebd2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SGEBD2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -132,7 +130,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup gebd2 * *> \par Further Details: * ===================== @@ -186,6 +184,7 @@ *> * ===================================================================== SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -209,7 +208,7 @@ SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) INTEGER I * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFG, XERBLA + EXTERNAL SLARF1F, SLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -242,14 +241,12 @@ SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, $ TAUQ( I ) ) D( I ) = A( I, I ) - A( I, I ) = ONE * * Apply H(i) to A(i:m,i+1:n) from the left * IF( I.LT.N ) - $ CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ), - $ A( I, I+1 ), LDA, WORK ) - A( I, I ) = D( I ) + $ CALL SLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, + $ TAUQ( I ), A( I, I+1 ), LDA, WORK ) * IF( I.LT.N ) THEN * @@ -259,13 +256,11 @@ SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) CALL SLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), $ LDA, TAUP( I ) ) E( I ) = A( I, I+1 ) - A( I, I+1 ) = ONE * * Apply G(i) to A(i+1:m,i+1:n) from the right * - CALL SLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, - $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) - A( I, I+1 ) = E( I ) + CALL SLARF1F( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) ELSE TAUP( I ) = ZERO END IF @@ -278,33 +273,31 @@ SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) * * Generate elementary reflector G(i) to annihilate A(i,i+1:n) * - CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), + $ LDA, $ TAUP( I ) ) D( I ) = A( I, I ) - A( I, I ) = ONE * * Apply G(i) to A(i+1:m,i:n) from the right * IF( I.LT.M ) - $ CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, - $ TAUP( I ), A( I+1, I ), LDA, WORK ) - A( I, I ) = D( I ) + $ CALL SLARF1F( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAUP( I ), A( I+1, I ), LDA, WORK ) * IF( I.LT.M ) THEN * * Generate elementary reflector H(i) to annihilate * A(i+2:m,i) * - CALL SLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + CALL SLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), + $ 1, $ TAUQ( I ) ) E( I ) = A( I+1, I ) - A( I+1, I ) = ONE * * Apply H(i) to A(i+1:m,i+1:n) from the left * - CALL SLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ), - $ A( I+1, I+1 ), LDA, WORK ) - A( I+1, I ) = E( I ) + CALL SLARF1F( 'Left', M-I, N-I, A( I+1, I ), 1, + $ TAUQ( I ), A( I+1, I+1 ), LDA, WORK ) ELSE TAUQ( I ) = ZERO END IF diff --git a/lapack-netlib/SRC/sgeev.f b/lapack-netlib/SRC/sgeev.f index 93f9932651..52ce3b442f 100644 --- a/lapack-netlib/SRC/sgeev.f +++ b/lapack-netlib/SRC/sgeev.f @@ -227,7 +227,7 @@ SUBROUTINE SGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, $ SLASCL, SORGHR, SROT, SSCAL, STREVC3, XERBLA * .. * .. External Functions .. - LOGICAL LSAME + LOGICAL LSAME, SISNAN INTEGER ISAMAX, ILAENV REAL SLAMCH, SLANGE, SLAPY2, SNRM2, SROUNDUP_LWORK EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SLANGE, SLAPY2, @@ -348,6 +348,10 @@ SUBROUTINE SGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, ELSE IF( ANRM.GT.BIGNUM ) THEN SCALEA = .TRUE. CSCALE = BIGNUM + ELSE IF ( SISNAN ( ANRM ) ) THEN + INFO = -4 + CALL XERBLA ( 'SGEEV', -INFO ) + RETURN END IF IF( SCALEA ) $ CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) diff --git a/lapack-netlib/SRC/sgehd2.f b/lapack-netlib/SRC/sgehd2.f index c7d8db19ec..bd3ff718b2 100644 --- a/lapack-netlib/SRC/sgehd2.f +++ b/lapack-netlib/SRC/sgehd2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SGEHD2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -106,7 +104,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup gehd2 * *> \par Further Details: * ===================== @@ -146,6 +144,7 @@ *> * ===================================================================== SUBROUTINE SGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -160,16 +159,11 @@ SUBROUTINE SGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) * * ===================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E+0 ) -* .. * .. Local Scalars .. INTEGER I - REAL AII * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFG, XERBLA + EXTERNAL SLARF1F, SLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -199,20 +193,17 @@ SUBROUTINE SGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) * CALL SLARFG( IHI-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, $ TAU( I ) ) - AII = A( I+1, I ) - A( I+1, I ) = ONE * * Apply H(i) to A(1:ihi,i+1:ihi) from the right * - CALL SLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), - $ A( 1, I+1 ), LDA, WORK ) + CALL SLARF1F( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) * * Apply H(i) to A(i+1:ihi,i+1:n) from the left * - CALL SLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, TAU( I ), - $ A( I+1, I+1 ), LDA, WORK ) + CALL SLARF1F( 'Left', IHI-I, N-I, A( I+1, I ), 1, TAU( I ), + $ A( I+1, I+1 ), LDA, WORK ) * - A( I+1, I ) = AII 10 CONTINUE * RETURN diff --git a/lapack-netlib/SRC/sgelq2.f b/lapack-netlib/SRC/sgelq2.f index 3e50beb13e..f0562432bf 100644 --- a/lapack-netlib/SRC/sgelq2.f +++ b/lapack-netlib/SRC/sgelq2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SGELQ2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -104,7 +102,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup gelq2 * *> \par Further Details: * ===================== @@ -126,6 +124,7 @@ *> * ===================================================================== SUBROUTINE SGELQ2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -140,16 +139,11 @@ SUBROUTINE SGELQ2( M, N, A, LDA, TAU, WORK, INFO ) * * ===================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E+0 ) -* .. * .. Local Scalars .. INTEGER I, K - REAL AII * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFG, XERBLA + EXTERNAL SLARF1F, SLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -183,11 +177,8 @@ SUBROUTINE SGELQ2( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i) to A(i+1:m,i:n) from the right * - AII = A( I, I ) - A( I, I ) = ONE - CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), - $ A( I+1, I ), LDA, WORK ) - A( I, I ) = AII + CALL SLARF1F( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAU( I ), A( I+1, I ), LDA, WORK ) END IF 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/sgeql2.f b/lapack-netlib/SRC/sgeql2.f index ea5ad6b82d..99d9f49ed4 100644 --- a/lapack-netlib/SRC/sgeql2.f +++ b/lapack-netlib/SRC/sgeql2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SGEQL2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup geql2 * *> \par Further Details: * ===================== @@ -120,6 +118,7 @@ *> * ===================================================================== SUBROUTINE SGEQL2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -134,16 +133,11 @@ SUBROUTINE SGEQL2( M, N, A, LDA, TAU, WORK, INFO ) * * ===================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E+0 ) -* .. * .. Local Scalars .. INTEGER I, K - REAL AII * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFG, XERBLA + EXTERNAL SLARF1L, SLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -177,11 +171,8 @@ SUBROUTINE SGEQL2( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left * - AII = A( M-K+I, N-K+I ) - A( M-K+I, N-K+I ) = ONE - CALL SLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, TAU( I ), - $ A, LDA, WORK ) - A( M-K+I, N-K+I ) = AII + CALL SLARF1L( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, + $ TAU( I ), A, LDA, WORK ) 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/sgeqp3rk.f b/lapack-netlib/SRC/sgeqp3rk.f index d3a335b88e..9f0b76328d 100644 --- a/lapack-netlib/SRC/sgeqp3rk.f +++ b/lapack-netlib/SRC/sgeqp3rk.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SGEQP3RK + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -546,27 +544,19 @@ *> C. H. Bischof, Math. and Comp. Sci. Div., Argonne National Lab, USA. *> A BLAS-3 version of the QR factorization with column pivoting. *> LAPACK Working Note 114 -*> \htmlonly *> https://www.netlib.org/lapack/lawnspdf/lawn114.pdf -*> \endhtmlonly *> and in *> SIAM J. Sci. Comput., 19(5):1486-1494, Sept. 1998. -*> \htmlonly *> https://doi.org/10.1137/S1064827595296732 -*> \endhtmlonly *> *> [2] A partial column norm updating strategy developed in 2006. *> Z. Drmac and Z. Bujanovic, Dept. of Math., University of Zagreb, Croatia. *> On the failure of rank revealing QR factorization software – a case study. *> LAPACK Working Note 176. -*> \htmlonly *> http://www.netlib.org/lapack/lawnspdf/lawn176.pdf -*> \endhtmlonly *> and in *> ACM Trans. Math. Softw. 35, 2, Article 12 (July 2008), 28 pages. -*> \htmlonly *> https://doi.org/10.1145/1377612.1377616 -*> \endhtmlonly * *> \par Contributors: * ================== @@ -671,7 +661,7 @@ SUBROUTINE SGEQP3RK( M, N, NRHS, KMAX, ABSTOL, RELTOL, A, LDA, * 1) SGEQP3RK and SLAQP2RK: 2*N to store full and partial * column 2-norms. * 2) SLAQP2RK: N+NRHS-1 to use in WORK array that is used -* in SLARF subroutine inside SLAQP2RK to apply an +* in SLARF1F subroutine inside SLAQP2RK to apply an * elementary reflector from the left. * TOTAL_WORK_SIZE = 3*N + NRHS - 1 * @@ -687,7 +677,7 @@ SUBROUTINE SGEQP3RK( M, N, NRHS, KMAX, ABSTOL, RELTOL, A, LDA, * 1) SGEQP3RK, SLAQP2RK, SLAQP3RK: 2*N to store full and * partial column 2-norms. * 2) SLAQP2RK: N+NRHS-1 to use in WORK array that is used -* in SLARF subroutine to apply an elementary reflector +* in SLARF1F subroutine to apply an elementary reflector * from the left. * 3) SLAQP3RK: NB*(N+NRHS) to use in the work array F that * is used to apply a block reflector from @@ -887,7 +877,8 @@ SUBROUTINE SGEQP3RK( M, N, NRHS, KMAX, ABSTOL, RELTOL, A, LDA, * Determine when to cross over from blocked to unblocked code. * (for N less than NX, unblocked code should be used). * - NX = MAX( 0, ILAENV( IXOVER, 'SGEQP3RK', ' ', M, N, -1, -1 )) + NX = MAX( 0, ILAENV( IXOVER, 'SGEQP3RK', ' ', M, N, -1, + $ -1 )) * IF( NX.LT.MINMN ) THEN * diff --git a/lapack-netlib/SRC/sgeqr2.f b/lapack-netlib/SRC/sgeqr2.f index 5eef521f27..0a9c3936df 100644 --- a/lapack-netlib/SRC/sgeqr2.f +++ b/lapack-netlib/SRC/sgeqr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SGEQR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -105,7 +103,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup geqr2 * *> \par Further Details: * ===================== @@ -127,6 +125,7 @@ *> * ===================================================================== SUBROUTINE SGEQR2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -141,16 +140,11 @@ SUBROUTINE SGEQR2( M, N, A, LDA, TAU, WORK, INFO ) * * ===================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E+0 ) -* .. * .. Local Scalars .. INTEGER I, K - REAL AII * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFG, XERBLA + EXTERNAL SLARF1F, SLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -184,11 +178,8 @@ SUBROUTINE SGEQR2( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i) to A(i:m,i+1:n) from the left * - AII = A( I, I ) - A( I, I ) = ONE - CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), - $ A( I, I+1 ), LDA, WORK ) - A( I, I ) = AII + CALL SLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) END IF 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/sgeqr2p.f b/lapack-netlib/SRC/sgeqr2p.f index 0d270e9aa8..1e8c1f3e43 100644 --- a/lapack-netlib/SRC/sgeqr2p.f +++ b/lapack-netlib/SRC/sgeqr2p.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SGEQR2P + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,7 +105,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup geqr2p * *> \par Further Details: * ===================== @@ -131,6 +129,7 @@ *> * ===================================================================== SUBROUTINE SGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -145,16 +144,11 @@ SUBROUTINE SGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) * * ===================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E+0 ) -* .. * .. Local Scalars .. INTEGER I, K - REAL AII * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFGP, XERBLA + EXTERNAL SLARF1F, SLARFGP, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -188,11 +182,8 @@ SUBROUTINE SGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i) to A(i:m,i+1:n) from the left * - AII = A( I, I ) - A( I, I ) = ONE - CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), - $ A( I, I+1 ), LDA, WORK ) - A( I, I ) = AII + CALL SLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) END IF 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/sgerq2.f b/lapack-netlib/SRC/sgerq2.f index d86905c033..14c8cf0517 100644 --- a/lapack-netlib/SRC/sgerq2.f +++ b/lapack-netlib/SRC/sgerq2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SGERQ2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEcomputational +*> \ingroup gerq2 * *> \par Further Details: * ===================== @@ -120,6 +118,7 @@ *> * ===================================================================== SUBROUTINE SGERQ2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -134,16 +133,11 @@ SUBROUTINE SGERQ2( M, N, A, LDA, TAU, WORK, INFO ) * * ===================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E+0 ) -* .. * .. Local Scalars .. INTEGER I, K - REAL AII * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFG, XERBLA + EXTERNAL SLARF1L, SLARFG, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -177,11 +171,8 @@ SUBROUTINE SGERQ2( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right * - AII = A( M-K+I, N-K+I ) - A( M-K+I, N-K+I ) = ONE - CALL SLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, - $ TAU( I ), A, LDA, WORK ) - A( M-K+I, N-K+I ) = AII + CALL SLARF1L( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, + $ TAU( I ), A, LDA, WORK ) 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/sgetc2.f b/lapack-netlib/SRC/sgetc2.f index a871a03ff3..55511ac713 100644 --- a/lapack-netlib/SRC/sgetc2.f +++ b/lapack-netlib/SRC/sgetc2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SGETC2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEauxiliary +*> \ingroup getc2 * *> \par Contributors: * ================== @@ -108,6 +106,7 @@ * * ===================================================================== SUBROUTINE SGETC2( N, A, LDA, IPIV, JPIV, INFO ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -132,7 +131,7 @@ SUBROUTINE SGETC2( N, A, LDA, IPIV, JPIV, INFO ) REAL BIGNUM, EPS, SMIN, SMLNUM, XMAX * .. * .. External Subroutines .. - EXTERNAL SGER, SLABAD, SSWAP + EXTERNAL SGER, SSWAP * .. * .. External Functions .. REAL SLAMCH @@ -155,7 +154,6 @@ SUBROUTINE SGETC2( N, A, LDA, IPIV, JPIV, INFO ) EPS = SLAMCH( 'P' ) SMLNUM = SLAMCH( 'S' ) / EPS BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) * * Handle the case N=1 by itself * @@ -177,8 +175,8 @@ SUBROUTINE SGETC2( N, A, LDA, IPIV, JPIV, INFO ) * Find max element in matrix A * XMAX = ZERO - DO 20 IP = I, N - DO 10 JP = I, N + DO 20 JP = I, N + DO 10 IP = I, N IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN XMAX = ABS( A( IP, JP ) ) IPV = IP diff --git a/lapack-netlib/SRC/slacpy.f b/lapack-netlib/SRC/slacpy.f index a33a3c67b7..8214332dc4 100644 --- a/lapack-netlib/SRC/slacpy.f +++ b/lapack-netlib/SRC/slacpy.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SLACPY + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -96,10 +94,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup OTHERauxiliary +*> \ingroup lacpy * * ===================================================================== SUBROUTINE SLACPY( UPLO, M, N, A, LDA, B, LDB ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -134,7 +133,7 @@ SUBROUTINE SLACPY( UPLO, M, N, A, LDA, B, LDB ) 10 CONTINUE 20 CONTINUE ELSE IF( LSAME( UPLO, 'L' ) ) THEN - DO 40 J = 1, N + DO 40 J = 1, MIN( M, N ) DO 30 I = J, M B( I, J ) = A( I, J ) 30 CONTINUE diff --git a/lapack-netlib/SRC/slantr.f b/lapack-netlib/SRC/slantr.f index 384f58550b..b27b3b671d 100644 --- a/lapack-netlib/SRC/slantr.f +++ b/lapack-netlib/SRC/slantr.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SLANTR + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -133,11 +131,13 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERauxiliary +*> \ingroup lantr * * ===================================================================== - REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, + $ LDA, $ WORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -186,14 +186,16 @@ REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 20 J = 1, N DO 10 I = 1, MIN( M, J-1 ) SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ SISNAN( SUM ) ) VALUE = SUM 10 CONTINUE 20 CONTINUE ELSE - DO 40 J = 1, N + DO 40 J = 1, MIN( M, N ) DO 30 I = J + 1, M SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ SISNAN( SUM ) ) VALUE = SUM 30 CONTINUE 40 CONTINUE END IF @@ -203,14 +205,16 @@ REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 60 J = 1, N DO 50 I = 1, MIN( M, J ) SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ SISNAN( SUM ) ) VALUE = SUM 50 CONTINUE 60 CONTINUE ELSE - DO 80 J = 1, N + DO 80 J = 1, MIN( M, N ) DO 70 I = J, M SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ SISNAN( SUM ) ) VALUE = SUM 70 CONTINUE 80 CONTINUE END IF @@ -237,7 +241,7 @@ REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM 110 CONTINUE ELSE - DO 140 J = 1, N + DO 140 J = 1, MIN( M, N ) IF( UDIAG ) THEN SUM = ONE DO 120 I = J + 1, M @@ -284,7 +288,7 @@ REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 220 I = N + 1, M WORK( I ) = ZERO 220 CONTINUE - DO 240 J = 1, N + DO 240 J = 1, MIN( M, N ) DO 230 I = J + 1, M WORK( I ) = WORK( I ) + ABS( A( I, J ) ) 230 CONTINUE @@ -293,7 +297,7 @@ REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 250 I = 1, M WORK( I ) = ZERO 250 CONTINUE - DO 270 J = 1, N + DO 270 J = 1, MIN( M, N ) DO 260 I = J, M WORK( I ) = WORK( I ) + ABS( A( I, J ) ) 260 CONTINUE @@ -305,36 +309,39 @@ REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, SUM = WORK( I ) IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM 280 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. + $ ( LSAME( NORM, 'E' ) ) ) THEN * * Find normF(A). * IF( LSAME( UPLO, 'U' ) ) THEN IF( LSAME( DIAG, 'U' ) ) THEN SCALE = ONE - SUM = MIN( M, N ) + SUM = REAL( MIN( M, N ) ) DO 290 J = 2, N - CALL SLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + CALL SLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, + $ SUM ) 290 CONTINUE ELSE SCALE = ZERO SUM = ONE DO 300 J = 1, N - CALL SLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + CALL SLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, + $ SUM ) 300 CONTINUE END IF ELSE IF( LSAME( DIAG, 'U' ) ) THEN SCALE = ONE - SUM = MIN( M, N ) - DO 310 J = 1, N + SUM = REAL( MIN( M, N ) ) + DO 310 J = 1, MIN ( M, N ) CALL SLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, $ SUM ) 310 CONTINUE ELSE SCALE = ZERO SUM = ONE - DO 320 J = 1, N + DO 320 J = 1, MIN( M, N ) CALL SLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) 320 CONTINUE END IF diff --git a/lapack-netlib/SRC/slaqp2.f b/lapack-netlib/SRC/slaqp2.f index 595fb8c340..530d1913d6 100644 --- a/lapack-netlib/SRC/slaqp2.f +++ b/lapack-netlib/SRC/slaqp2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SLAQP2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -122,7 +120,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERauxiliary +*> \ingroup laqp2 * *> \par Contributors: * ================== @@ -139,13 +137,12 @@ *> *> LAPACK Working Note 176 * -*> \htmlonly *> [PDF] -*> \endhtmlonly * * ===================================================================== SUBROUTINE SLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, $ WORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -168,10 +165,10 @@ SUBROUTINE SLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, * .. * .. Local Scalars .. INTEGER I, ITEMP, J, MN, OFFPI, PVT - REAL AII, TEMP, TEMP2, TOL3Z + REAL TEMP, TEMP2, TOL3Z * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFG, SSWAP + EXTERNAL SLARF1F, SLARFG, SSWAP * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, SQRT @@ -208,7 +205,8 @@ SUBROUTINE SLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, * Generate elementary reflector H(i). * IF( OFFPI.LT.M ) THEN - CALL SLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + CALL SLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), + $ 1, $ TAU( I ) ) ELSE CALL SLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) @@ -218,11 +216,8 @@ SUBROUTINE SLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, * * Apply H(i)**T to A(offset+i:m,i+1:n) from the left. * - AII = A( OFFPI, I ) - A( OFFPI, I ) = ONE - CALL SLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, - $ TAU( I ), A( OFFPI, I+1 ), LDA, WORK( 1 ) ) - A( OFFPI, I ) = AII + CALL SLARF1F( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ TAU( I ), A( OFFPI, I+1 ), LDA, WORK( 1 ) ) END IF * * Update partial column norms. diff --git a/lapack-netlib/SRC/slaqp2rk.f b/lapack-netlib/SRC/slaqp2rk.f index f88b0ce909..3825e25106 100644 --- a/lapack-netlib/SRC/slaqp2rk.f +++ b/lapack-netlib/SRC/slaqp2rk.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SLAQP2RK + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -253,7 +251,7 @@ *> \param[out] WORK *> \verbatim *> WORK is REAL array, dimension (N-1) -*> Used in SLARF subroutine to apply an elementary +*> Used in SLARF1F subroutine to apply an elementary *> reflector from the left. *> \endverbatim *> @@ -303,27 +301,19 @@ *> C. H. Bischof, Math. and Comp. Sci. Div., Argonne National Lab, USA. *> A BLAS-3 version of the QR factorization with column pivoting. *> LAPACK Working Note 114 -*> \htmlonly *> https://www.netlib.org/lapack/lawnspdf/lawn114.pdf -*> \endhtmlonly *> and in *> SIAM J. Sci. Comput., 19(5):1486-1494, Sept. 1998. -*> \htmlonly *> https://doi.org/10.1137/S1064827595296732 -*> \endhtmlonly *> *> [2] A partial column norm updating strategy developed in 2006. *> Z. Drmac and Z. Bujanovic, Dept. of Math., University of Zagreb, Croatia. *> On the failure of rank revealing QR factorization software – a case study. *> LAPACK Working Note 176. -*> \htmlonly *> http://www.netlib.org/lapack/lawnspdf/lawn176.pdf -*> \endhtmlonly *> and in *> ACM Trans. Math. Softw. 35, 2, Article 12 (July 2008), 28 pages. -*> \htmlonly *> https://doi.org/10.1145/1377612.1377616 -*> \endhtmlonly * *> \par Contributors: * ================== @@ -365,12 +355,12 @@ SUBROUTINE SLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) * .. * .. Local Scalars .. - INTEGER I, ITEMP, J, JMAXC2NRM, KK, KP, MINMNFACT, - $ MINMNUPDT - REAL AIKK, HUGEVAL, TEMP, TEMP2, TOL3Z + INTEGER I, ITEMP, J, JMAXC2NRM, KK, KP, + $ KBOUND, MINMNFACT, MINMNUPDT + REAL HUGEVAL, TEMP, TEMP2, TOL3Z * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFG, SSWAP + EXTERNAL SLARF1F, SLARFG, SSWAP * .. * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, SQRT @@ -397,13 +387,13 @@ SUBROUTINE SLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * MINMNFACT = MIN( M-IOFFSET, N ) MINMNUPDT = MIN( M-IOFFSET, N+NRHS ) - KMAX = MIN( KMAX, MINMNFACT ) + KBOUND = MIN( KMAX, MINMNFACT ) TOL3Z = SQRT( SLAMCH( 'Epsilon' ) ) HUGEVAL = SLAMCH( 'Overflow' ) * * Compute the factorization, KK is the lomn loop index. * - DO KK = 1, KMAX + DO KK = 1, KBOUND * I = IOFFSET + KK * @@ -621,11 +611,8 @@ SUBROUTINE SLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * condition is satisfied, not only KK < N+NRHS ) * IF( KK.LT.MINMNUPDT ) THEN - AIKK = A( I, KK ) - A( I, KK ) = ONE - CALL SLARF( 'Left', M-I+1, N+NRHS-KK, A( I, KK ), 1, - $ TAU( KK ), A( I, KK+1 ), LDA, WORK( 1 ) ) - A( I, KK ) = AIKK + CALL SLARF1F( 'Left', M-I+1, N+NRHS-KK, A( I, KK ), 1, + $ TAU( KK ), A( I, KK+1 ), LDA, WORK( 1 ) ) END IF * IF( KK.LT.MINMNFACT ) THEN @@ -676,7 +663,7 @@ SUBROUTINE SLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * i.e. no condition was triggered to exit the routine. * Set the number of factorized columns. * - K = KMAX + K = KBOUND * * We reached the end of the loop, i.e. all KMAX columns were * factorized, we need to set MAXC2NRMK and RELMAXC2NRMK before diff --git a/lapack-netlib/SRC/slaqr2.f b/lapack-netlib/SRC/slaqr2.f index caf79fd1c0..9ee1f8b6e2 100644 --- a/lapack-netlib/SRC/slaqr2.f +++ b/lapack-netlib/SRC/slaqr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SLAQR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -272,9 +270,11 @@ *> University of Kansas, USA *> * ===================================================================== - SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, + $ ILOZ, $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, $ LDT, NV, WV, LDWV, WORK, LWORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -297,7 +297,7 @@ SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) * .. * .. Local Scalars .. - REAL AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S, + REAL AA, BB, CC, CS, DD, EVI, EVK, FOO, S, $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL, $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, @@ -309,8 +309,10 @@ SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, EXTERNAL SLAMCH, SROUNDUP_LWORK * .. * .. External Subroutines .. - EXTERNAL SCOPY, SGEHRD, SGEMM, SLACPY, SLAHQR, - $ SLANV2, SLARF, SLARFG, SLASET, SORMHR, STREXC + EXTERNAL SCOPY, SGEHRD, SGEMM, SLACPY, + $ SLAHQR, + $ SLANV2, SLARF1L, SLARFG, SLASET, SORMHR, + $ STREXC * .. * .. Intrinsic Functions .. INTRINSIC ABS, INT, MAX, MIN, REAL, SQRT @@ -331,7 +333,8 @@ SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * * ==== Workspace query call to SORMHR ==== * - CALL SORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + CALL SORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, + $ LDV, $ WORK, -1, INFO ) LWK2 = INT( WORK( 1 ) ) * @@ -401,7 +404,8 @@ SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . here and there to keep track.) ==== * CALL SLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) - CALL SCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) + CALL SCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), + $ LDT+1 ) * CALL SLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) CALL SLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), @@ -448,7 +452,8 @@ SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . (STREXC can not fail in this case.) ==== * IFST = NS - CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ WORK, $ INFO ) ILST = ILST + 1 END IF @@ -473,7 +478,8 @@ SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . ILST in case of a rare exchange failure. ==== * IFST = NS - CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ WORK, $ INFO ) ILST = ILST + 2 END IF @@ -535,7 +541,8 @@ SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, SORTED = .false. IFST = I ILST = K - CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ WORK, $ INFO ) IF( INFO.EQ.0 ) THEN I = ILST @@ -588,18 +595,17 @@ SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * ==== Reflect spike back into lower triangle ==== * CALL SCOPY( NS, V, LDV, WORK, 1 ) - BETA = WORK( 1 ) - CALL SLARFG( NS, BETA, WORK( 2 ), 1, TAU ) - WORK( 1 ) = ONE + CALL SLARFG( NS, WORK( 1 ), WORK( 2 ), 1, TAU ) * - CALL SLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) + CALL SLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), + $ LDT ) * - CALL SLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT, - $ WORK( JW+1 ) ) - CALL SLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, - $ WORK( JW+1 ) ) - CALL SLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, - $ WORK( JW+1 ) ) + CALL SLARF1F( 'L', NS, JW, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL SLARF1F( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL SLARF1F( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) * CALL SGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), $ LWORK-JW, INFO ) @@ -617,7 +623,8 @@ SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . H and Z, if requested. ==== * IF( NS.GT.1 .AND. S.NE.ZERO ) - $ CALL SORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ CALL SORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, + $ LDV, $ WORK( JW+1 ), LWORK-JW, INFO ) * * ==== Update vertical slab in H ==== @@ -631,7 +638,8 @@ SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, KLN = MIN( NV, KWTOP-KROW ) CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), $ LDH, V, LDV, ZERO, WV, LDWV ) - CALL SLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + CALL SLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), + $ LDH ) 70 CONTINUE * * ==== Update horizontal slab in H ==== @@ -651,7 +659,8 @@ SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, IF( WANTZ ) THEN DO 90 KROW = ILOZ, IHIZ, NV KLN = MIN( NV, IHIZ-KROW+1 ) - CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, + $ KWTOP ), $ LDZ, V, LDV, ZERO, WV, LDWV ) CALL SLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), $ LDZ ) diff --git a/lapack-netlib/SRC/slaqr3.f b/lapack-netlib/SRC/slaqr3.f index d3ffb0f969..f9f8090a3a 100644 --- a/lapack-netlib/SRC/slaqr3.f +++ b/lapack-netlib/SRC/slaqr3.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SLAQR3 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -269,9 +267,11 @@ *> University of Kansas, USA *> * ===================================================================== - SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, + $ ILOZ, $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, $ LDT, NV, WV, LDWV, WORK, LWORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -294,7 +294,7 @@ SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) * .. * .. Local Scalars .. - REAL AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S, + REAL AA, BB, CC, CS, DD, EVI, EVK, FOO, S, $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL, $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3, @@ -307,8 +307,10 @@ SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, EXTERNAL SLAMCH, SROUNDUP_LWORK, ILAENV * .. * .. External Subroutines .. - EXTERNAL SCOPY, SGEHRD, SGEMM, SLACPY, SLAHQR, SLANV2, - $ SLAQR4, SLARF, SLARFG, SLASET, SORMHR, STREXC + EXTERNAL SCOPY, SGEHRD, SGEMM, SLACPY, SLAHQR, + $ SLANV2, + $ SLAQR4, SLARF1F, SLARFG, SLASET, SORMHR, + $ STREXC * .. * .. Intrinsic Functions .. INTRINSIC ABS, INT, MAX, MIN, REAL, SQRT @@ -329,13 +331,15 @@ SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * * ==== Workspace query call to SORMHR ==== * - CALL SORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + CALL SORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, + $ LDV, $ WORK, -1, INFO ) LWK2 = INT( WORK( 1 ) ) * * ==== Workspace query call to SLAQR4 ==== * - CALL SLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, JW, + CALL SLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, + $ JW, $ V, LDV, WORK, -1, INFQR ) LWK3 = INT( WORK( 1 ) ) * @@ -405,7 +409,8 @@ SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . here and there to keep track.) ==== * CALL SLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) - CALL SCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) + CALL SCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), + $ LDT+1 ) * CALL SLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) NMIN = ILAENV( 12, 'SLAQR3', 'SV', JW, 1, JW, LWORK ) @@ -458,7 +463,8 @@ SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . (STREXC can not fail in this case.) ==== * IFST = NS - CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ WORK, $ INFO ) ILST = ILST + 1 END IF @@ -483,7 +489,8 @@ SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . ILST in case of a rare exchange failure. ==== * IFST = NS - CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ WORK, $ INFO ) ILST = ILST + 2 END IF @@ -545,7 +552,8 @@ SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, SORTED = .false. IFST = I ILST = K - CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ WORK, $ INFO ) IF( INFO.EQ.0 ) THEN I = ILST @@ -598,18 +606,17 @@ SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * ==== Reflect spike back into lower triangle ==== * CALL SCOPY( NS, V, LDV, WORK, 1 ) - BETA = WORK( 1 ) - CALL SLARFG( NS, BETA, WORK( 2 ), 1, TAU ) - WORK( 1 ) = ONE + CALL SLARFG( NS, WORK( 1 ), WORK( 2 ), 1, TAU ) * - CALL SLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) + CALL SLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), + $ LDT ) * - CALL SLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT, - $ WORK( JW+1 ) ) - CALL SLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, - $ WORK( JW+1 ) ) - CALL SLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, - $ WORK( JW+1 ) ) + CALL SLARF1F( 'L', NS, JW, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL SLARF1F( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL SLARF1F( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) * CALL SGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), $ LWORK-JW, INFO ) @@ -627,7 +634,8 @@ SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . H and Z, if requested. ==== * IF( NS.GT.1 .AND. S.NE.ZERO ) - $ CALL SORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ CALL SORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, + $ LDV, $ WORK( JW+1 ), LWORK-JW, INFO ) * * ==== Update vertical slab in H ==== @@ -641,7 +649,8 @@ SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, KLN = MIN( NV, KWTOP-KROW ) CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), $ LDH, V, LDV, ZERO, WV, LDWV ) - CALL SLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + CALL SLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), + $ LDH ) 70 CONTINUE * * ==== Update horizontal slab in H ==== @@ -661,7 +670,8 @@ SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, IF( WANTZ ) THEN DO 90 KROW = ILOZ, IHIZ, NV KLN = MIN( NV, IHIZ-KROW+1 ) - CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, + $ KWTOP ), $ LDZ, V, LDV, ZERO, WV, LDWV ) CALL SLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), $ LDZ ) diff --git a/lapack-netlib/SRC/slarf1f.c b/lapack-netlib/SRC/slarf1f.c new file mode 100644 index 0000000000..498201ffa4 --- /dev/null +++ b/lapack-netlib/SRC/slarf1f.c @@ -0,0 +1,528 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* Table of constant values */ + +static real c_b4 = 1.f; +static real c_b5 = 0.f; +static integer c__1 = 1; + +/* > \brief \b SLARF1F applies an elementary reflector to a general rectangular */ +/* matrix assuming v(1) = 1. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > Download SLARF1F + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SLARF1F( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) */ + +/* CHARACTER SIDE */ +/* INTEGER INCV, LDC, M, N */ +/* REAL TAU */ +/* REAL C( LDC, * ), V( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SLARF1F applies a real elementary reflector H to a real m by n matrix */ +/* > C, from either the left or the right. H is represented in the form */ +/* > */ +/* > H = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar and v is a real vector assuming v(1) = 1. */ +/* > */ +/* > If tau = 0, then H is taken to be the unit matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': form H * C */ +/* > = 'R': form C * H */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is REAL array, dimension */ +/* > (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* > or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* > The vector v in the representation of H. V is not used if */ +/* > TAU = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCV */ +/* > \verbatim */ +/* > INCV is INTEGER */ +/* > The increment between elements of v. INCV <> 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is REAL */ +/* > The value tau in the representation of H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* > or C * H if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension */ +/* > (N) if SIDE = 'L' */ +/* > or (M) if SIDE = 'R' */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup larf1f */ + +/* ===================================================================== */ +/* Subroutine */ int slarf1f_(char *side, integer *m, integer *n, real *v, + integer *incv, real *tau, real *c__, integer *ldc, real *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1; + real r__1; + + /* Local variables */ + integer i__; + logical applyleft; + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + integer lastc; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + integer lastv; + extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, + real *, integer *); + extern integer ilaslc_(integer *, integer *, real *, integer *), ilaslr_( + integer *, integer *, real *, integer *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, "L"); + lastv = 1; + lastc = 0; + if (*tau != 0.f) { +/* Set up variables for scanning V. LASTV begins pointing to the end */ +/* of V up to V(1). */ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + if (*incv > 0) { + i__ = (lastv - 1) * *incv + 1; + } else { + i__ = 1; + } +/* Look for the last non-zero row in V. */ + while(lastv > 1 && v[i__] == 0.f) { + --lastv; + i__ -= *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc); + } + } + if (lastc == 0) { + return 0; + } + if (applyleft) { + +/* Form H * C */ + + if (lastv == 1) { + +/* C(1,1:lastc) := ( 1 - tau ) * C(1,1:lastc) */ + + r__1 = 1.f - *tau; + sscal_(&lastc, &r__1, &c__[c_offset], ldc); + } else { + +/* w(1:lastc,1) := C(2:lastv,1:lastc)**T * v(2:lastv,1) */ + + i__1 = lastv - 1; + sgemv_("Transpose", &i__1, &lastc, &c_b4, &c__[c_dim1 + 2], ldc, & + v[*incv + 1], incv, &c_b5, &work[1], &c__1); + +/* w(1:lastc,1) += v(1,1) * C(1,1:lastc)**T */ + + saxpy_(&lastc, &c_b4, &c__[c_offset], ldc, &work[1], &c__1); + +/* C(1, 1:lastc) += - tau * v(1,1) * w(1:lastc,1)**T */ + + r__1 = -(*tau); + saxpy_(&lastc, &r__1, &work[1], &c__1, &c__[c_offset], ldc); + +/* C(2:lastv,1:lastc) += - tau * v(2:lastv,1) * w(1:lastc,1)**T */ + + i__1 = lastv - 1; + r__1 = -(*tau); + sger_(&i__1, &lastc, &r__1, &v[*incv + 1], incv, &work[1], &c__1, + &c__[c_dim1 + 2], ldc); + } + } else { + +/* Form C * H */ + + if (lastv == 1) { + +/* C(1:lastc,1) := ( 1 - tau ) * C(1:lastc,1) */ + + r__1 = 1.f - *tau; + sscal_(&lastc, &r__1, &c__[c_offset], &c__1); + } else { + +/* w(1:lastc,1) := C(1:lastc,2:lastv) * v(2:lastv,1) */ + + i__1 = lastv - 1; + sgemv_("No transpose", &lastc, &i__1, &c_b4, &c__[(c_dim1 << 1) + + 1], ldc, &v[*incv + 1], incv, &c_b5, &work[1], &c__1); + +/* w(1:lastc,1) += v(1,1) * C(1:lastc,1) */ + + saxpy_(&lastc, &c_b4, &c__[c_offset], &c__1, &work[1], &c__1); + +/* C(1:lastc,1) += - tau * v(1,1) * w(1:lastc,1) */ + + r__1 = -(*tau); + saxpy_(&lastc, &r__1, &work[1], &c__1, &c__[c_offset], &c__1); + +/* C(1:lastc,2:lastv) += - tau * w(1:lastc,1) * v(2:lastv)**T */ + + i__1 = lastv - 1; + r__1 = -(*tau); + sger_(&lastc, &i__1, &r__1, &work[1], &c__1, &v[*incv + 1], incv, + &c__[(c_dim1 << 1) + 1], ldc); + } + } + return 0; + +/* End of SLARF1F */ + +} /* slarf1f_ */ + diff --git a/lapack-netlib/SRC/slarf1f.f b/lapack-netlib/SRC/slarf1f.f new file mode 100644 index 0000000000..d0c015eacf --- /dev/null +++ b/lapack-netlib/SRC/slarf1f.f @@ -0,0 +1,254 @@ +*> \brief \b SLARF1F applies an elementary reflector to a general rectangular +* matrix assuming v(1) = 1. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> Download SLARF1F + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +* +* Definition: +* =========== +* +* SUBROUTINE SLARF1F( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* REAL TAU +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARF1F applies a real elementary reflector H to a real m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector assuming v(1) = 1. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larf1f +* +* ===================================================================== + SUBROUTINE SLARF1F( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) + IMPLICIT NONE +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER, SAXPY, SSCAL +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILASLR, ILASLC + EXTERNAL LSAME, ILASLR, ILASLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 1 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V up to V(1). + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.1 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILASLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILASLR(M, LASTV, C, LDC) + END IF + END IF + IF( LASTC.EQ.0 ) THEN + RETURN + END IF + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.EQ.1 ) THEN +* +* C(1,1:lastc) := ( 1 - tau ) * C(1,1:lastc) +* + CALL SSCAL( LASTC, ONE - TAU, C, LDC ) + ELSE +* +* w(1:lastc,1) := C(2:lastv,1:lastc)**T * v(2:lastv,1) +* + CALL SGEMV( 'Transpose', LASTV - 1, LASTC, ONE, C( 2, 1 ), + $ LDC, V( 1 + INCV ), INCV, ZERO, WORK, 1 ) +* +* w(1:lastc,1) += v(1,1) * C(1,1:lastc)**T +* + CALL SAXPY( LASTC, ONE, C, LDC, WORK, 1 ) +* +* C(1, 1:lastc) += - tau * v(1,1) * w(1:lastc,1)**T +* + CALL SAXPY( LASTC, -TAU, WORK, 1, C, LDC ) +* +* C(2:lastv,1:lastc) += - tau * v(2:lastv,1) * w(1:lastc,1)**T +* + CALL SGER( LASTV - 1, LASTC, -TAU, V( 1 + INCV ), INCV, + $ WORK, 1, C( 2, 1 ), LDC ) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.EQ.1 ) THEN +* +* C(1:lastc,1) := ( 1 - tau ) * C(1:lastc,1) +* + CALL SSCAL( LASTC, ONE - TAU, C, 1 ) + ELSE +* +* w(1:lastc,1) := C(1:lastc,2:lastv) * v(2:lastv,1) +* + CALL SGEMV( 'No transpose', LASTC, LASTV - 1, ONE, + $ C( 1, 2 ), LDC, V( 1 + INCV ), INCV, ZERO, + $ WORK, 1 ) +* +* w(1:lastc,1) += v(1,1) * C(1:lastc,1) +* + CALL SAXPY( LASTC, ONE, C, 1, WORK, 1 ) +* +* C(1:lastc,1) += - tau * v(1,1) * w(1:lastc,1) +* + CALL SAXPY( LASTC, -TAU, WORK, 1, C, 1 ) +* +* C(1:lastc,2:lastv) += - tau * w(1:lastc,1) * v(2:lastv)**T +* + CALL SGER( LASTC, LASTV - 1, -TAU, WORK, 1, + $ V( 1 + INCV ), INCV, C( 1, 2 ), LDC ) + END IF + END IF + RETURN +* +* End of SLARF1F +* + END diff --git a/lapack-netlib/SRC/slarf1l.c b/lapack-netlib/SRC/slarf1l.c new file mode 100644 index 0000000000..ce7a8c4f85 --- /dev/null +++ b/lapack-netlib/SRC/slarf1l.c @@ -0,0 +1,530 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + + +/* Table of constant values */ + +static real c_b4 = 1.f; +static real c_b5 = 0.f; +static integer c__1 = 1; + +/* > \brief \b SLARF1L applies an elementary reflector to a general rectangular */ +/* matrix assuming v(lastv) = 1, where lastv is the last non-zero */ +/* element */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > Download SLARF1L + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SLARF1L( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) */ + +/* CHARACTER SIDE */ +/* INTEGER INCV, LDC, M, N */ +/* REAL TAU */ +/* REAL C( LDC, * ), V( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SLARF1L applies a real elementary reflector H to a real m by n matrix */ +/* > C, from either the left or the right. H is represented in the form */ +/* > */ +/* > H = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar and v is a real vector assuming v(lastv) = 1, */ +/* > where lastv is the last non-zero element. */ +/* > */ +/* > If tau = 0, then H is taken to be the unit matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': form H * C */ +/* > = 'R': form C * H */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is REAL array, dimension */ +/* > (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* > or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* > The vector v in the representation of H. V is not used if */ +/* > TAU = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCV */ +/* > \verbatim */ +/* > INCV is INTEGER */ +/* > The increment between elements of v. INCV > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is REAL */ +/* > The value tau in the representation of H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* > or C * H if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension */ +/* > (N) if SIDE = 'L' */ +/* > or (M) if SIDE = 'R' */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup larf1l */ + +/* ===================================================================== */ +/* Subroutine */ int slarf1l_(char *side, integer *m, integer *n, real *v, + integer *incv, real *tau, real *c__, integer *ldc, real *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1; + real r__1; + + /* Local variables */ + integer i__; + logical applyleft; + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + integer lastc; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + integer lastv; + extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, + real *, integer *); + extern integer ilaslc_(integer *, integer *, real *, integer *), ilaslr_( + integer *, integer *, real *, integer *); + integer firstv; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, "L"); + firstv = 1; + lastc = 0; + if (*tau != 0.f) { +/* Set up variables for scanning V. LASTV begins pointing to the end */ +/* of V up to V(1). */ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + i__ = 1; +/* Look for the last non-zero row in V. */ + while(lastv > firstv && v[i__] == 0.f) { + ++firstv; + i__ += *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc); + } + } + if (lastc == 0) { + return 0; + } + if (applyleft) { + +/* Form H * C */ + + if (lastv == firstv) { + +/* C(lastv,1:lastc) := ( 1 - tau ) * C(lastv,1:lastc) */ + + r__1 = 1.f - *tau; + sscal_(&lastc, &r__1, &c__[lastv + c_dim1], ldc); + } else { + +/* w(1:lastc,1) := C(firstv:lastv-1,1:lastc)**T * v(firstv:lastv-1,1) */ + + i__1 = lastv - firstv; + sgemv_("Transpose", &i__1, &lastc, &c_b4, &c__[firstv + c_dim1], + ldc, &v[i__], incv, &c_b5, &work[1], &c__1); + +/* w(1:lastc,1) += C(lastv,1:lastc)**T * v(lastv,1) */ + + saxpy_(&lastc, &c_b4, &c__[lastv + c_dim1], ldc, &work[1], &c__1); + +/* C(lastv,1:lastc) += - tau * v(lastv,1) * w(1:lastc,1)**T */ + + r__1 = -(*tau); + saxpy_(&lastc, &r__1, &work[1], &c__1, &c__[lastv + c_dim1], ldc); + +/* C(firstv:lastv-1,1:lastc) += - tau * v(firstv:lastv-1,1) * w(1:lastc,1)**T */ + + i__1 = lastv - firstv; + r__1 = -(*tau); + sger_(&i__1, &lastc, &r__1, &v[i__], incv, &work[1], &c__1, &c__[ + firstv + c_dim1], ldc); + } + } else { + +/* Form C * H */ + + if (lastv == firstv) { + +/* C(1:lastc,lastv) := ( 1 - tau ) * C(1:lastc,lastv) */ + + r__1 = 1.f - *tau; + sscal_(&lastc, &r__1, &c__[lastv * c_dim1 + 1], &c__1); + } else { + +/* w(1:lastc,1) := C(1:lastc,firstv:lastv-1) * v(firstv:lastv-1,1) */ + + i__1 = lastv - firstv; + sgemv_("No transpose", &lastc, &i__1, &c_b4, &c__[firstv * c_dim1 + + 1], ldc, &v[i__], incv, &c_b5, &work[1], &c__1); + +/* w(1:lastc,1) += C(1:lastc,lastv) * v(lastv,1) */ + + saxpy_(&lastc, &c_b4, &c__[lastv * c_dim1 + 1], &c__1, &work[1], & + c__1); + +/* C(1:lastc,lastv) += - tau * v(lastv,1) * w(1:lastc,1) */ + + r__1 = -(*tau); + saxpy_(&lastc, &r__1, &work[1], &c__1, &c__[lastv * c_dim1 + 1], & + c__1); + +/* C(1:lastc,firstv:lastv-1) += - tau * w(1:lastc,1) * v(firstv:lastv-1)**T */ + + i__1 = lastv - firstv; + r__1 = -(*tau); + sger_(&lastc, &i__1, &r__1, &work[1], &c__1, &v[i__], incv, &c__[ + firstv * c_dim1 + 1], ldc); + } + } + return 0; + +/* End of SLARF1L */ + +} /* slarf1l_ */ + diff --git a/lapack-netlib/SRC/slarf1l.f b/lapack-netlib/SRC/slarf1l.f new file mode 100644 index 0000000000..d4fbb60108 --- /dev/null +++ b/lapack-netlib/SRC/slarf1l.f @@ -0,0 +1,253 @@ +*> \brief \b SLARF1L applies an elementary reflector to a general rectangular +* matrix assuming v(lastv) = 1, where lastv is the last non-zero +* element +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> Download SLARF1L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +* +* Definition: +* =========== +* +* SUBROUTINE SLARF1L( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* REAL TAU +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARF1L applies a real elementary reflector H to a real m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector assuming v(lastv) = 1, +*> where lastv is the last non-zero element. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV > 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larf1l +* +* ===================================================================== + SUBROUTINE SLARF1L( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) + IMPLICIT NONE +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC, FIRSTV +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER, SAXPY, SSCAL +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILASLR, ILASLC + EXTERNAL LSAME, ILASLR, ILASLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + FIRSTV = 1 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V up to V(1). + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + I = 1 +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.FIRSTV .AND. V( I ).EQ.ZERO ) + FIRSTV = FIRSTV + 1 + I = I + INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILASLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILASLR(M, LASTV, C, LDC) + END IF + END IF + IF( LASTC.EQ.0 ) THEN + RETURN + END IF + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.EQ.FIRSTV ) THEN +* +* C(lastv,1:lastc) := ( 1 - tau ) * C(lastv,1:lastc) +* + CALL SSCAL( LASTC, ONE - TAU, C( LASTV, 1 ), LDC ) + ELSE +* +* w(1:lastc,1) := C(firstv:lastv-1,1:lastc)**T * v(firstv:lastv-1,1) +* + CALL SGEMV( 'Transpose', LASTV - FIRSTV, LASTC, ONE, + $ C( FIRSTV, 1 ), LDC, V( I ), INCV, ZERO, + $ WORK, 1 ) +* +* w(1:lastc,1) += C(lastv,1:lastc)**T * v(lastv,1) +* + CALL SAXPY( LASTC, ONE, C( LASTV, 1 ), LDC, WORK, 1 ) +* +* C(lastv,1:lastc) += - tau * v(lastv,1) * w(1:lastc,1)**T +* + CALL SAXPY( LASTC, -TAU, WORK, 1, C( LASTV, 1 ), LDC ) +* +* C(firstv:lastv-1,1:lastc) += - tau * v(firstv:lastv-1,1) * w(1:lastc,1)**T +* + CALL SGER( LASTV - FIRSTV, LASTC, -TAU, V( I ), INCV, + $ WORK, 1, C( FIRSTV, 1 ), LDC) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.EQ.FIRSTV ) THEN +* +* C(1:lastc,lastv) := ( 1 - tau ) * C(1:lastc,lastv) +* + CALL SSCAL( LASTC, ONE - TAU, C( 1, LASTV ), 1 ) + ELSE +* +* w(1:lastc,1) := C(1:lastc,firstv:lastv-1) * v(firstv:lastv-1,1) +* + CALL SGEMV( 'No transpose', LASTC, LASTV - FIRSTV, ONE, + $ C( 1, FIRSTV ), LDC, V( I ), INCV, ZERO, + $ WORK, 1 ) +* +* w(1:lastc,1) += C(1:lastc,lastv) * v(lastv,1) +* + CALL SAXPY( LASTC, ONE, C( 1, LASTV ), 1, WORK, 1 ) +* +* C(1:lastc,lastv) += - tau * v(lastv,1) * w(1:lastc,1) +* + CALL SAXPY( LASTC, -TAU, WORK, 1, C( 1, LASTV ), 1 ) +* +* C(1:lastc,firstv:lastv-1) += - tau * w(1:lastc,1) * v(firstv:lastv-1)**T +* + CALL SGER( LASTC, LASTV - FIRSTV, -TAU, WORK, 1, V( I ), + $ INCV, C( 1, FIRSTV ), LDC ) + END IF + END IF + RETURN +* +* End of SLARF1L +* + END diff --git a/lapack-netlib/SRC/slascl.f b/lapack-netlib/SRC/slascl.f index 28cbd6514b..84b6010227 100644 --- a/lapack-netlib/SRC/slascl.f +++ b/lapack-netlib/SRC/slascl.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SLASCL + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -136,10 +134,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup OTHERauxiliary +*> \ingroup lascl * * ===================================================================== - SUBROUTINE SLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) + SUBROUTINE SLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, + $ INFO ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -291,7 +291,7 @@ SUBROUTINE SLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) * * Lower triangular matrix * - DO 50 J = 1, N + DO 50 J = 1, MIN( M, N ) DO 40 I = J, M A( I, J ) = A( I, J )*MUL 40 CONTINUE diff --git a/lapack-netlib/SRC/sopmtr.f b/lapack-netlib/SRC/sopmtr.f index c1148e01f4..19e9f6af3a 100644 --- a/lapack-netlib/SRC/sopmtr.f +++ b/lapack-netlib/SRC/sopmtr.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SOPMTR + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -142,11 +140,13 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup upmtr * * ===================================================================== - SUBROUTINE SOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, + SUBROUTINE SOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, + $ WORK, $ INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -162,21 +162,16 @@ SUBROUTINE SOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, * * ===================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E+0 ) -* .. * .. Local Scalars .. LOGICAL FORWRD, LEFT, NOTRAN, UPPER INTEGER I, I1, I2, I3, IC, II, JC, MI, NI, NQ - REAL AII * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL SLARF, XERBLA + EXTERNAL SLARF1F, SLARF1L, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -260,11 +255,8 @@ SUBROUTINE SOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, * * Apply H(i) * - AII = AP( II ) - AP( II ) = ONE - CALL SLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAU( I ), C, LDC, - $ WORK ) - AP( II ) = AII + CALL SLARF1L( SIDE, MI, NI, AP( II-I+1 ), 1, TAU( I ), C, + $ LDC, WORK ) * IF( FORWRD ) THEN II = II + I + 2 @@ -300,8 +292,6 @@ SUBROUTINE SOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, END IF * DO 20 I = I1, I2, I3 - AII = AP( II ) - AP( II ) = ONE IF( LEFT ) THEN * * H(i) is applied to C(i+1:m,1:n) @@ -318,9 +308,8 @@ SUBROUTINE SOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, * * Apply H(i) * - CALL SLARF( SIDE, MI, NI, AP( II ), 1, TAU( I ), - $ C( IC, JC ), LDC, WORK ) - AP( II ) = AII + CALL SLARF1F( SIDE, MI, NI, AP( II ), 1, TAU( I ), + $ C( IC, JC ), LDC, WORK ) * IF( FORWRD ) THEN II = II + NQ - I + 1 diff --git a/lapack-netlib/SRC/sorbdb.f b/lapack-netlib/SRC/sorbdb.f index 351172ff16..17aba5db49 100644 --- a/lapack-netlib/SRC/sorbdb.f +++ b/lapack-netlib/SRC/sorbdb.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORBDB + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -255,7 +253,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unbdb * *> \par Further Details: * ===================== @@ -281,9 +279,11 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, + SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, + $ LDX12, $ X21, LDX21, X22, LDX22, THETA, PHI, TAUP1, $ TAUP2, TAUQ1, TAUQ2, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -306,8 +306,6 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, * .. Parameters .. REAL REALONE PARAMETER ( REALONE = 1.0E0 ) - REAL ONE - PARAMETER ( ONE = 1.0E0 ) * .. * .. Local Scalars .. LOGICAL COLMAJOR, LQUERY @@ -315,7 +313,8 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, REAL Z1, Z2, Z3, Z4 * .. * .. External Subroutines .. - EXTERNAL SAXPY, SLARF, SLARFGP, SSCAL, XERBLA + EXTERNAL SAXPY, SLARF1F, SLARFGP, SSCAL, + $ XERBLA * .. * .. External Functions .. REAL SNRM2 @@ -374,7 +373,7 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, IF( INFO .EQ. 0 ) THEN LWORKOPT = M - Q LWORKMIN = M - Q - WORK(1) = LWORKOPT + WORK(1) = REAL( LWORKOPT ) IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN INFO = -21 END IF @@ -398,14 +397,16 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL SSCAL( P-I+1, Z1, X11(I,I), 1 ) ELSE CALL SSCAL( P-I+1, Z1*COS(PHI(I-1)), X11(I,I), 1 ) - CALL SAXPY( P-I+1, -Z1*Z3*Z4*SIN(PHI(I-1)), X12(I,I-1), + CALL SAXPY( P-I+1, -Z1*Z3*Z4*SIN(PHI(I-1)), X12(I, + $ I-1), $ 1, X11(I,I), 1 ) END IF IF( I .EQ. 1 ) THEN CALL SSCAL( M-P-I+1, Z2, X21(I,I), 1 ) ELSE CALL SSCAL( M-P-I+1, Z2*COS(PHI(I-1)), X21(I,I), 1 ) - CALL SAXPY( M-P-I+1, -Z2*Z3*Z4*SIN(PHI(I-1)), X22(I,I-1), + CALL SAXPY( M-P-I+1, -Z2*Z3*Z4*SIN(PHI(I-1)), X22(I, + $ I-1), $ 1, X21(I,I), 1 ) END IF * @@ -413,44 +414,47 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ SNRM2( P-I+1, X11(I,I), 1 ) ) * IF( P .GT. I ) THEN - CALL SLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + CALL SLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, + $ TAUP1(I) ) ELSE IF( P .EQ. I ) THEN CALL SLARFGP( P-I+1, X11(I,I), X11(I,I), 1, TAUP1(I) ) END IF - X11(I,I) = ONE IF ( M-P .GT. I ) THEN CALL SLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, $ TAUP2(I) ) ELSE IF ( M-P .EQ. I ) THEN - CALL SLARFGP( M-P-I+1, X21(I,I), X21(I,I), 1, TAUP2(I) ) + CALL SLARFGP( M-P-I+1, X21(I,I), X21(I,I), 1, + $ TAUP2(I) ) END IF - X21(I,I) = ONE * IF ( Q .GT. I ) THEN - CALL SLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), - $ X11(I,I+1), LDX11, WORK ) + CALL SLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), + $ X11(I,I+1), LDX11, WORK ) END IF IF ( M-Q+1 .GT. I ) THEN - CALL SLARF( 'L', P-I+1, M-Q-I+1, X11(I,I), 1, TAUP1(I), - $ X12(I,I), LDX12, WORK ) + CALL SLARF1F( 'L', P-I+1, M-Q-I+1, X11(I,I), 1, + $ TAUP1(I), X12(I,I), LDX12, WORK ) END IF IF ( Q .GT. I ) THEN - CALL SLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), - $ X21(I,I+1), LDX21, WORK ) + CALL SLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, + $ TAUP2(I), X21(I,I+1), LDX21, WORK ) END IF IF ( M-Q+1 .GT. I ) THEN - CALL SLARF( 'L', M-P-I+1, M-Q-I+1, X21(I,I), 1, TAUP2(I), - $ X22(I,I), LDX22, WORK ) + CALL SLARF1F( 'L', M-P-I+1, M-Q-I+1, X21(I,I), 1, + $ TAUP2(I), X22(I,I), LDX22, WORK ) END IF * IF( I .LT. Q ) THEN CALL SSCAL( Q-I, -Z1*Z3*SIN(THETA(I)), X11(I,I+1), $ LDX11 ) - CALL SAXPY( Q-I, Z2*Z3*COS(THETA(I)), X21(I,I+1), LDX21, + CALL SAXPY( Q-I, Z2*Z3*COS(THETA(I)), X21(I,I+1), + $ LDX21, $ X11(I,I+1), LDX11 ) END IF - CALL SSCAL( M-Q-I+1, -Z1*Z4*SIN(THETA(I)), X12(I,I), LDX12 ) - CALL SAXPY( M-Q-I+1, Z2*Z4*COS(THETA(I)), X22(I,I), LDX22, + CALL SSCAL( M-Q-I+1, -Z1*Z4*SIN(THETA(I)), X12(I,I), + $ LDX12 ) + CALL SAXPY( M-Q-I+1, Z2*Z4*COS(THETA(I)), X22(I,I), + $ LDX22, $ X12(I,I), LDX12 ) * IF( I .LT. Q ) @@ -465,7 +469,6 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL SLARFGP( Q-I, X11(I,I+1), X11(I,I+2), LDX11, $ TAUQ1(I) ) END IF - X11(I,I+1) = ONE END IF IF ( Q+I-1 .LT. M ) THEN IF ( M-Q .EQ. I ) THEN @@ -476,21 +479,20 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ TAUQ2(I) ) END IF END IF - X12(I,I) = ONE * IF( I .LT. Q ) THEN - CALL SLARF( 'R', P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), - $ X11(I+1,I+1), LDX11, WORK ) - CALL SLARF( 'R', M-P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), - $ X21(I+1,I+1), LDX21, WORK ) + CALL SLARF1F( 'R', P-I, Q-I, X11(I,I+1), LDX11, + $ TAUQ1(I), X11(I+1,I+1), LDX11, WORK ) + CALL SLARF1F( 'R', M-P-I, Q-I, X11(I,I+1), LDX11, + $ TAUQ1(I), X21(I+1,I+1), LDX21, WORK ) END IF IF ( P .GT. I ) THEN - CALL SLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), - $ X12(I+1,I), LDX12, WORK ) + CALL SLARF1F( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X12(I+1,I), LDX12, WORK ) END IF IF ( M-P .GT. I ) THEN - CALL SLARF( 'R', M-P-I, M-Q-I+1, X12(I,I), LDX12, - $ TAUQ2(I), X22(I+1,I), LDX22, WORK ) + CALL SLARF1F( 'R', M-P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(I+1,I), LDX22, WORK ) END IF * END DO @@ -507,15 +509,14 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL SLARFGP( M-Q-I+1, X12(I,I), X12(I,I+1), LDX12, $ TAUQ2(I) ) END IF - X12(I,I) = ONE * IF ( P .GT. I ) THEN - CALL SLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), - $ X12(I+1,I), LDX12, WORK ) + CALL SLARF1F( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X12(I+1,I), LDX12, WORK ) END IF IF( M-P-Q .GE. 1 ) - $ CALL SLARF( 'R', M-P-Q, M-Q-I+1, X12(I,I), LDX12, - $ TAUQ2(I), X22(Q+1,I), LDX22, WORK ) + $ CALL SLARF1F( 'R', M-P-Q, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(Q+1,I), LDX22, WORK ) * END DO * @@ -531,10 +532,10 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL SLARFGP( M-P-Q-I+1, X22(Q+I,P+I), X22(Q+I,P+I+1), $ LDX22, TAUQ2(P+I) ) END IF - X22(Q+I,P+I) = ONE IF ( I .LT. M-P-Q ) THEN - CALL SLARF( 'R', M-P-Q-I, M-P-Q-I+1, X22(Q+I,P+I), LDX22, - $ TAUQ2(P+I), X22(Q+I+1,P+I), LDX22, WORK ) + CALL SLARF1F( 'R', M-P-Q-I, M-P-Q-I+1, X22(Q+I,P+I), + $ LDX22, TAUQ2(P+I), X22(Q+I+1,P+I), + $ LDX22, WORK ) END IF * END DO @@ -549,22 +550,25 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL SSCAL( P-I+1, Z1, X11(I,I), LDX11 ) ELSE CALL SSCAL( P-I+1, Z1*COS(PHI(I-1)), X11(I,I), LDX11 ) - CALL SAXPY( P-I+1, -Z1*Z3*Z4*SIN(PHI(I-1)), X12(I-1,I), + CALL SAXPY( P-I+1, -Z1*Z3*Z4*SIN(PHI(I-1)), X12(I-1, + $ I), $ LDX12, X11(I,I), LDX11 ) END IF IF( I .EQ. 1 ) THEN CALL SSCAL( M-P-I+1, Z2, X21(I,I), LDX21 ) ELSE - CALL SSCAL( M-P-I+1, Z2*COS(PHI(I-1)), X21(I,I), LDX21 ) - CALL SAXPY( M-P-I+1, -Z2*Z3*Z4*SIN(PHI(I-1)), X22(I-1,I), + CALL SSCAL( M-P-I+1, Z2*COS(PHI(I-1)), X21(I,I), + $ LDX21 ) + CALL SAXPY( M-P-I+1, -Z2*Z3*Z4*SIN(PHI(I-1)), X22(I-1, + $ I), $ LDX22, X21(I,I), LDX21 ) END IF * THETA(I) = ATAN2( SNRM2( M-P-I+1, X21(I,I), LDX21 ), $ SNRM2( P-I+1, X11(I,I), LDX11 ) ) * - CALL SLARFGP( P-I+1, X11(I,I), X11(I,I+1), LDX11, TAUP1(I) ) - X11(I,I) = ONE + CALL SLARFGP( P-I+1, X11(I,I), X11(I,I+1), LDX11, + $ TAUP1(I) ) IF ( I .EQ. M-P ) THEN CALL SLARFGP( M-P-I+1, X21(I,I), X21(I,I), LDX21, $ TAUP2(I) ) @@ -572,23 +576,22 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL SLARFGP( M-P-I+1, X21(I,I), X21(I,I+1), LDX21, $ TAUP2(I) ) END IF - X21(I,I) = ONE * IF ( Q .GT. I ) THEN - CALL SLARF( 'R', Q-I, P-I+1, X11(I,I), LDX11, TAUP1(I), - $ X11(I+1,I), LDX11, WORK ) + CALL SLARF1F( 'R', Q-I, P-I+1, X11(I,I), LDX11, + $ TAUP1(I), X11(I+1,I), LDX11, WORK ) END IF IF ( M-Q+1 .GT. I ) THEN - CALL SLARF( 'R', M-Q-I+1, P-I+1, X11(I,I), LDX11, - $ TAUP1(I), X12(I,I), LDX12, WORK ) + CALL SLARF1F( 'R', M-Q-I+1, P-I+1, X11(I,I), LDX11, + $ TAUP1(I), X12(I,I), LDX12, WORK ) END IF IF ( Q .GT. I ) THEN - CALL SLARF( 'R', Q-I, M-P-I+1, X21(I,I), LDX21, TAUP2(I), - $ X21(I+1,I), LDX21, WORK ) + CALL SLARF1F( 'R', Q-I, M-P-I+1, X21(I,I), LDX21, + $ TAUP2(I), X21(I+1,I), LDX21, WORK ) END IF IF ( M-Q+1 .GT. I ) THEN - CALL SLARF( 'R', M-Q-I+1, M-P-I+1, X21(I,I), LDX21, - $ TAUP2(I), X22(I,I), LDX22, WORK ) + CALL SLARF1F( 'R', M-Q-I+1, M-P-I+1, X21(I,I), LDX21, + $ TAUP2(I), X22(I,I), LDX22, WORK ) END IF * IF( I .LT. Q ) THEN @@ -612,7 +615,6 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL SLARFGP( Q-I, X11(I+1,I), X11(I+2,I), 1, $ TAUQ1(I) ) END IF - X11(I+1,I) = ONE END IF IF ( M-Q .GT. I ) THEN CALL SLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, @@ -621,19 +623,18 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL SLARFGP( M-Q-I+1, X12(I,I), X12(I,I), 1, $ TAUQ2(I) ) END IF - X12(I,I) = ONE * IF( I .LT. Q ) THEN - CALL SLARF( 'L', Q-I, P-I, X11(I+1,I), 1, TAUQ1(I), - $ X11(I+1,I+1), LDX11, WORK ) - CALL SLARF( 'L', Q-I, M-P-I, X11(I+1,I), 1, TAUQ1(I), - $ X21(I+1,I+1), LDX21, WORK ) + CALL SLARF1F( 'L', Q-I, P-I, X11(I+1,I), 1, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK ) + CALL SLARF1F( 'L', Q-I, M-P-I, X11(I+1,I), 1, + $ TAUQ1(I), X21(I+1,I+1), LDX21, WORK ) END IF - CALL SLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, TAUQ2(I), - $ X12(I,I+1), LDX12, WORK ) + CALL SLARF1F( 'L', M-Q-I+1, P-I, X12(I,I), 1, TAUQ2(I), + $ X12(I,I+1), LDX12, WORK ) IF ( M-P-I .GT. 0 ) THEN - CALL SLARF( 'L', M-Q-I+1, M-P-I, X12(I,I), 1, TAUQ2(I), - $ X22(I,I+1), LDX22, WORK ) + CALL SLARF1F( 'L', M-Q-I+1, M-P-I, X12(I,I), 1, + $ TAUQ2(I), X22(I,I+1), LDX22, WORK ) END IF * END DO @@ -643,16 +644,16 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, DO I = Q + 1, P * CALL SSCAL( M-Q-I+1, -Z1*Z4, X12(I,I), 1 ) - CALL SLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, TAUQ2(I) ) - X12(I,I) = ONE + CALL SLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, + $ TAUQ2(I) ) * IF ( P .GT. I ) THEN - CALL SLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, TAUQ2(I), - $ X12(I,I+1), LDX12, WORK ) + CALL SLARF1F( 'L', M-Q-I+1, P-I, X12(I,I), 1, + $ TAUQ2(I), X12(I,I+1), LDX12, WORK ) END IF IF( M-P-Q .GE. 1 ) - $ CALL SLARF( 'L', M-Q-I+1, M-P-Q, X12(I,I), 1, TAUQ2(I), - $ X22(I,Q+1), LDX22, WORK ) + $ CALL SLARF1F( 'L', M-Q-I+1, M-P-Q, X12(I,I), 1, + $ TAUQ2(I), X22(I,Q+1), LDX22, WORK ) * END DO * @@ -662,15 +663,16 @@ SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, * CALL SSCAL( M-P-Q-I+1, Z2*Z4, X22(P+I,Q+I), 1 ) IF ( M-P-Q .EQ. I ) THEN - CALL SLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I,Q+I), 1, + CALL SLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I,Q+I), + $ 1, $ TAUQ2(P+I) ) - X22(P+I,Q+I) = ONE ELSE - CALL SLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I+1,Q+I), 1, + CALL SLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I+1,Q+I), + $ 1, $ TAUQ2(P+I) ) - X22(P+I,Q+I) = ONE - CALL SLARF( 'L', M-P-Q-I+1, M-P-Q-I, X22(P+I,Q+I), 1, - $ TAUQ2(P+I), X22(P+I,Q+I+1), LDX22, WORK ) + CALL SLARF1F( 'L', M-P-Q-I+1, M-P-Q-I, X22(P+I,Q+I), + $ 1, TAUQ2(P+I), X22(P+I,Q+I+1), LDX22, + $ WORK ) END IF * * diff --git a/lapack-netlib/SRC/sorbdb1.f b/lapack-netlib/SRC/sorbdb1.f index 191e5742a4..52fb3c7b58 100644 --- a/lapack-netlib/SRC/sorbdb1.f +++ b/lapack-netlib/SRC/sorbdb1.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORBDB1 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -174,7 +172,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unbdb1 * *> \par Further Details: * ===================== @@ -198,8 +196,10 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE SORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE SORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -216,10 +216,6 @@ SUBROUTINE SORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * * ==================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E0 ) -* .. * .. Local Scalars .. REAL C, S INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, @@ -227,7 +223,8 @@ SUBROUTINE SORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFGP, SORBDB5, SROT, XERBLA + EXTERNAL SLARF1F, SLARFGP, SORBDB5, SROT, + $ XERBLA * .. * .. External Functions .. REAL SNRM2 @@ -264,7 +261,7 @@ SUBROUTINE SORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LORBDB5 = Q-2 LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) LWORKMIN = LWORKOPT - WORK(1) = LWORKOPT + WORK(1) = REAL( LWORKOPT ) IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN INFO = -14 END IF @@ -285,22 +282,22 @@ SUBROUTINE SORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, THETA(I) = ATAN2( X21(I,I), X11(I,I) ) C = COS( THETA(I) ) S = SIN( THETA(I) ) - X11(I,I) = ONE - X21(I,I) = ONE - CALL SLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I,I+1), - $ LDX11, WORK(ILARF) ) - CALL SLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), - $ X21(I,I+1), LDX21, WORK(ILARF) ) + CALL SLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I, + $ I+1), LDX11, WORK(ILARF) ) + CALL SLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + $ X21(I,I+1), LDX21, WORK(ILARF) ) * IF( I .LT. Q ) THEN - CALL SROT( Q-I, X11(I,I+1), LDX11, X21(I,I+1), LDX21, C, S ) - CALL SLARFGP( Q-I, X21(I,I+1), X21(I,I+2), LDX21, TAUQ1(I) ) + CALL SROT( Q-I, X11(I,I+1), LDX11, X21(I,I+1), LDX21, C, + $ S ) + CALL SLARFGP( Q-I, X21(I,I+1), X21(I,I+2), LDX21, + $ TAUQ1(I) ) S = X21(I,I+1) - X21(I,I+1) = ONE - CALL SLARF( 'R', P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), - $ X11(I+1,I+1), LDX11, WORK(ILARF) ) - CALL SLARF( 'R', M-P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), - $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + CALL SLARF1F( 'R', P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + CALL SLARF1F( 'R', M-P-I, Q-I, X21(I,I+1), LDX21, + $ TAUQ1(I), X21(I+1,I+1), LDX21, + $ WORK(ILARF) ) C = SQRT( SNRM2( P-I, X11(I+1,I+1), 1 )**2 $ + SNRM2( M-P-I, X21(I+1,I+1), 1 )**2 ) PHI(I) = ATAN2( S, C ) diff --git a/lapack-netlib/SRC/sorbdb2.f b/lapack-netlib/SRC/sorbdb2.f index b2ff34bb1e..f4107d0d1f 100644 --- a/lapack-netlib/SRC/sorbdb2.f +++ b/lapack-netlib/SRC/sorbdb2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORBDB2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -172,7 +170,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unbdb2 * *> \par Further Details: * ===================== @@ -196,8 +194,10 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -215,8 +215,8 @@ SUBROUTINE SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * ==================================================================== * * .. Parameters .. - REAL NEGONE, ONE - PARAMETER ( NEGONE = -1.0E0, ONE = 1.0E0 ) + REAL NEGONE + PARAMETER ( NEGONE = -1.0E0 ) * .. * .. Local Scalars .. REAL C, S @@ -225,7 +225,8 @@ SUBROUTINE SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFGP, SORBDB5, SROT, SSCAL, XERBLA + EXTERNAL SLARF1F, SLARFGP, SORBDB5, SROT, SSCAL, + $ XERBLA * .. * .. External Functions .. REAL SNRM2 @@ -262,7 +263,7 @@ SUBROUTINE SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LORBDB5 = Q-1 LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) LWORKMIN = LWORKOPT - WORK(1) = LWORKOPT + WORK(1) = REAL( LWORKOPT ) IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN INFO = -14 END IF @@ -279,15 +280,15 @@ SUBROUTINE SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, DO I = 1, P * IF( I .GT. 1 ) THEN - CALL SROT( Q-I+1, X11(I,I), LDX11, X21(I-1,I), LDX21, C, S ) + CALL SROT( Q-I+1, X11(I,I), LDX11, X21(I-1,I), LDX21, C, + $ S ) END IF CALL SLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) C = X11(I,I) - X11(I,I) = ONE - CALL SLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X11(I+1,I), LDX11, WORK(ILARF) ) - CALL SLARF( 'R', M-P-I+1, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X21(I,I), LDX21, WORK(ILARF) ) + CALL SLARF1F( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL SLARF1F( 'R', M-P-I+1, Q-I+1, X11(I,I), LDX11, + $ TAUQ1(I), X21(I,I), LDX21, WORK(ILARF) ) S = SQRT( SNRM2( P-I, X11(I+1,I), 1 )**2 $ + SNRM2( M-P-I+1, X21(I,I), 1 )**2 ) THETA(I) = ATAN2( S, C ) @@ -302,13 +303,11 @@ SUBROUTINE SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, PHI(I) = ATAN2( X11(I+1,I), X21(I,I) ) C = COS( PHI(I) ) S = SIN( PHI(I) ) - X11(I+1,I) = ONE - CALL SLARF( 'L', P-I, Q-I, X11(I+1,I), 1, TAUP1(I), - $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + CALL SLARF1F( 'L', P-I, Q-I, X11(I+1,I), 1, TAUP1(I), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) END IF - X21(I,I) = ONE - CALL SLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), - $ X21(I,I+1), LDX21, WORK(ILARF) ) + CALL SLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + $ X21(I,I+1), LDX21, WORK(ILARF) ) * END DO * @@ -316,9 +315,8 @@ SUBROUTINE SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * DO I = P + 1, Q CALL SLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) - X21(I,I) = ONE - CALL SLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), - $ X21(I,I+1), LDX21, WORK(ILARF) ) + CALL SLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + $ X21(I,I+1), LDX21, WORK(ILARF) ) END DO * RETURN diff --git a/lapack-netlib/SRC/sorbdb3.f b/lapack-netlib/SRC/sorbdb3.f index 99478c5d0d..3cf8f97355 100644 --- a/lapack-netlib/SRC/sorbdb3.f +++ b/lapack-netlib/SRC/sorbdb3.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORBDB3 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -173,7 +171,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unbdb3 * *> \par Further Details: * ===================== @@ -197,8 +195,10 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -215,10 +215,6 @@ SUBROUTINE SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * * ==================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E0 ) -* .. * .. Local Scalars .. REAL C, S INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, @@ -226,7 +222,8 @@ SUBROUTINE SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFGP, SORBDB5, SROT, XERBLA + EXTERNAL SLARF1F, SLARFGP, SORBDB5, SROT, + $ XERBLA * .. * .. External Functions .. REAL SNRM2 @@ -263,7 +260,7 @@ SUBROUTINE SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LORBDB5 = Q-1 LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) LWORKMIN = LWORKOPT - WORK(1) = LWORKOPT + WORK(1) = REAL( LWORKOPT ) IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN INFO = -14 END IF @@ -280,16 +277,16 @@ SUBROUTINE SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, DO I = 1, M-P * IF( I .GT. 1 ) THEN - CALL SROT( Q-I+1, X11(I-1,I), LDX11, X21(I,I), LDX11, C, S ) + CALL SROT( Q-I+1, X11(I-1,I), LDX11, X21(I,I), LDX11, C, + $ S ) END IF * CALL SLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) S = X21(I,I) - X21(I,I) = ONE - CALL SLARF( 'R', P-I+1, Q-I+1, X21(I,I), LDX21, TAUQ1(I), - $ X11(I,I), LDX11, WORK(ILARF) ) - CALL SLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), - $ X21(I+1,I), LDX21, WORK(ILARF) ) + CALL SLARF1F( 'R', P-I+1, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I,I), LDX11, WORK(ILARF) ) + CALL SLARF1F( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) C = SQRT( SNRM2( P-I+1, X11(I,I), 1 )**2 $ + SNRM2( M-P-I, X21(I+1,I), 1 )**2 ) THETA(I) = ATAN2( S, C ) @@ -299,17 +296,16 @@ SUBROUTINE SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ WORK(IORBDB5), LORBDB5, CHILDINFO ) CALL SLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) IF( I .LT. M-P ) THEN - CALL SLARFGP( M-P-I, X21(I+1,I), X21(I+2,I), 1, TAUP2(I) ) + CALL SLARFGP( M-P-I, X21(I+1,I), X21(I+2,I), 1, + $ TAUP2(I) ) PHI(I) = ATAN2( X21(I+1,I), X11(I,I) ) C = COS( PHI(I) ) S = SIN( PHI(I) ) - X21(I+1,I) = ONE - CALL SLARF( 'L', M-P-I, Q-I, X21(I+1,I), 1, TAUP2(I), - $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + CALL SLARF1F( 'L', M-P-I, Q-I, X21(I+1,I), 1, TAUP2(I), + $ X21(I+1,I+1), LDX21, WORK(ILARF) ) END IF - X11(I,I) = ONE - CALL SLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I,I+1), - $ LDX11, WORK(ILARF) ) + CALL SLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I, + $ I+1), LDX11, WORK(ILARF) ) * END DO * @@ -317,9 +313,8 @@ SUBROUTINE SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * DO I = M-P + 1, Q CALL SLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) - X11(I,I) = ONE - CALL SLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I,I+1), - $ LDX11, WORK(ILARF) ) + CALL SLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I, + $ I+1), LDX11, WORK(ILARF) ) END DO * RETURN diff --git a/lapack-netlib/SRC/sorbdb4.f b/lapack-netlib/SRC/sorbdb4.f index 0fef5b759b..4bd1affa45 100644 --- a/lapack-netlib/SRC/sorbdb4.f +++ b/lapack-netlib/SRC/sorbdb4.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORBDB4 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -184,7 +182,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unbdb4 * *> \par Further Details: * ===================== @@ -208,9 +206,11 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE SORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE SORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, $ INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -228,8 +228,8 @@ SUBROUTINE SORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * ==================================================================== * * .. Parameters .. - REAL NEGONE, ONE, ZERO - PARAMETER ( NEGONE = -1.0E0, ONE = 1.0E0, ZERO = 0.0E0 ) + REAL NEGONE, ZERO + PARAMETER ( NEGONE = -1.0E0, ZERO = 0.0E0 ) * .. * .. Local Scalars .. REAL C, S @@ -238,7 +238,8 @@ SUBROUTINE SORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL SLARF, SLARFGP, SORBDB5, SROT, SSCAL, XERBLA + EXTERNAL SLARF1F, SLARFGP, SORBDB5, SROT, SSCAL, + $ XERBLA * .. * .. External Functions .. REAL SNRM2 @@ -276,7 +277,7 @@ SUBROUTINE SORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LWORKOPT = ILARF + LLARF - 1 LWORKOPT = MAX( LWORKOPT, IORBDB5 + LORBDB5 - 1 ) LWORKMIN = LWORKOPT - WORK(1) = LWORKOPT + WORK(1) = REAL( LWORKOPT ) IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN INFO = -14 END IF @@ -301,43 +302,40 @@ SUBROUTINE SORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ LORBDB5, CHILDINFO ) CALL SSCAL( P, NEGONE, PHANTOM(1), 1 ) CALL SLARFGP( P, PHANTOM(1), PHANTOM(2), 1, TAUP1(1) ) - CALL SLARFGP( M-P, PHANTOM(P+1), PHANTOM(P+2), 1, TAUP2(1) ) + CALL SLARFGP( M-P, PHANTOM(P+1), PHANTOM(P+2), 1, + $ TAUP2(1) ) THETA(I) = ATAN2( PHANTOM(1), PHANTOM(P+1) ) C = COS( THETA(I) ) S = SIN( THETA(I) ) - PHANTOM(1) = ONE - PHANTOM(P+1) = ONE - CALL SLARF( 'L', P, Q, PHANTOM(1), 1, TAUP1(1), X11, LDX11, - $ WORK(ILARF) ) - CALL SLARF( 'L', M-P, Q, PHANTOM(P+1), 1, TAUP2(1), X21, - $ LDX21, WORK(ILARF) ) + CALL SLARF1F( 'L', P, Q, PHANTOM(1), 1, TAUP1(1), X11, + $ LDX11, WORK(ILARF) ) + CALL SLARF1F( 'L', M-P, Q, PHANTOM(P+1), 1, TAUP2(1), + $ X21, LDX21, WORK(ILARF) ) ELSE CALL SORBDB5( P-I+1, M-P-I+1, Q-I+1, X11(I,I-1), 1, $ X21(I,I-1), 1, X11(I,I), LDX11, X21(I,I), $ LDX21, WORK(IORBDB5), LORBDB5, CHILDINFO ) CALL SSCAL( P-I+1, NEGONE, X11(I,I-1), 1 ) - CALL SLARFGP( P-I+1, X11(I,I-1), X11(I+1,I-1), 1, TAUP1(I) ) + CALL SLARFGP( P-I+1, X11(I,I-1), X11(I+1,I-1), 1, + $ TAUP1(I) ) CALL SLARFGP( M-P-I+1, X21(I,I-1), X21(I+1,I-1), 1, $ TAUP2(I) ) THETA(I) = ATAN2( X11(I,I-1), X21(I,I-1) ) C = COS( THETA(I) ) S = SIN( THETA(I) ) - X11(I,I-1) = ONE - X21(I,I-1) = ONE - CALL SLARF( 'L', P-I+1, Q-I+1, X11(I,I-1), 1, TAUP1(I), - $ X11(I,I), LDX11, WORK(ILARF) ) - CALL SLARF( 'L', M-P-I+1, Q-I+1, X21(I,I-1), 1, TAUP2(I), - $ X21(I,I), LDX21, WORK(ILARF) ) + CALL SLARF1F( 'L', P-I+1, Q-I+1, X11(I,I-1), 1, TAUP1(I), + $ X11(I,I), LDX11, WORK(ILARF) ) + CALL SLARF1F( 'L', M-P-I+1, Q-I+1, X21(I,I-1), 1, + $ TAUP2(I), X21(I,I), LDX21, WORK(ILARF) ) END IF * CALL SROT( Q-I+1, X11(I,I), LDX11, X21(I,I), LDX21, S, -C ) CALL SLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) C = X21(I,I) - X21(I,I) = ONE - CALL SLARF( 'R', P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), - $ X11(I+1,I), LDX11, WORK(ILARF) ) - CALL SLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), - $ X21(I+1,I), LDX21, WORK(ILARF) ) + CALL SLARF1F( 'R', P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL SLARF1F( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) IF( I .LT. M-Q ) THEN S = SQRT( SNRM2( P-I, X11(I+1,I), 1 )**2 $ + SNRM2( M-P-I, X21(I+1,I), 1 )**2 ) @@ -350,21 +348,21 @@ SUBROUTINE SORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * DO I = M - Q + 1, P CALL SLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) - X11(I,I) = ONE - CALL SLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X11(I+1,I), LDX11, WORK(ILARF) ) - CALL SLARF( 'R', Q-P, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X21(M-Q+1,I), LDX21, WORK(ILARF) ) + CALL SLARF1F( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL SLARF1F( 'R', Q-P, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X21(M-Q+1,I), LDX21, WORK(ILARF) ) END DO * * Reduce the bottom-right portion of X21 to [ 0 I ] * DO I = P + 1, Q - CALL SLARFGP( Q-I+1, X21(M-Q+I-P,I), X21(M-Q+I-P,I+1), LDX21, + CALL SLARFGP( Q-I+1, X21(M-Q+I-P,I), X21(M-Q+I-P,I+1), + $ LDX21, $ TAUQ1(I) ) - X21(M-Q+I-P,I) = ONE - CALL SLARF( 'R', Q-I, Q-I+1, X21(M-Q+I-P,I), LDX21, TAUQ1(I), - $ X21(M-Q+I-P+1,I), LDX21, WORK(ILARF) ) + CALL SLARF1F( 'R', Q-I, Q-I+1, X21(M-Q+I-P,I), LDX21, + $ TAUQ1(I), X21(M-Q+I-P+1,I), LDX21, + $ WORK(ILARF) ) END DO * RETURN diff --git a/lapack-netlib/SRC/sorg2l.f b/lapack-netlib/SRC/sorg2l.f index aa0dd0a08e..bf5c9b0ec7 100644 --- a/lapack-netlib/SRC/sorg2l.f +++ b/lapack-netlib/SRC/sorg2l.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORG2L + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,10 +105,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup ung2l * * ===================================================================== SUBROUTINE SORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -133,7 +132,7 @@ SUBROUTINE SORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, II, J, L * .. * .. External Subroutines .. - EXTERNAL SLARF, SSCAL, XERBLA + EXTERNAL SLARF1L, SSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -177,8 +176,8 @@ SUBROUTINE SORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) * Apply H(i) to A(1:m-k+i,1:n-k+i) from the left * A( M-N+II, II ) = ONE - CALL SLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, - $ LDA, WORK ) + CALL SLARF1L( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), + $ A, LDA, WORK ) CALL SSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) A( M-N+II, II ) = ONE - TAU( I ) * diff --git a/lapack-netlib/SRC/sorg2r.f b/lapack-netlib/SRC/sorg2r.f index 3a8aa33a02..c6bbe7506a 100644 --- a/lapack-netlib/SRC/sorg2r.f +++ b/lapack-netlib/SRC/sorg2r.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORG2R + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,10 +105,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup ung2r * * ===================================================================== SUBROUTINE SORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -133,7 +132,7 @@ SUBROUTINE SORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, J, L * .. * .. External Subroutines .. - EXTERNAL SLARF, SSCAL, XERBLA + EXTERNAL SLARF1F, SSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -176,9 +175,8 @@ SUBROUTINE SORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) * Apply H(i) to A(i:m,i:n) from the left * IF( I.LT.N ) THEN - A( I, I ) = ONE - CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), - $ A( I, I+1 ), LDA, WORK ) + CALL SLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) END IF IF( I.LT.M ) $ CALL SSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) diff --git a/lapack-netlib/SRC/sorgl2.f b/lapack-netlib/SRC/sorgl2.f index d85c388749..2bea2836f9 100644 --- a/lapack-netlib/SRC/sorgl2.f +++ b/lapack-netlib/SRC/sorgl2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORGL2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -106,10 +104,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup ungl2 * * ===================================================================== SUBROUTINE SORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -132,7 +131,7 @@ SUBROUTINE SORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, J, L * .. * .. External Subroutines .. - EXTERNAL SLARF, SSCAL, XERBLA + EXTERNAL SLARF1F, SSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -180,9 +179,8 @@ SUBROUTINE SORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) * IF( I.LT.N ) THEN IF( I.LT.M ) THEN - A( I, I ) = ONE - CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, - $ TAU( I ), A( I+1, I ), LDA, WORK ) + CALL SLARF1F( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAU( I ), A( I+1, I ), LDA, WORK ) END IF CALL SSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) END IF diff --git a/lapack-netlib/SRC/sorgr2.f b/lapack-netlib/SRC/sorgr2.f index 12bb90c782..aca697e0cd 100644 --- a/lapack-netlib/SRC/sorgr2.f +++ b/lapack-netlib/SRC/sorgr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORGR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,10 +105,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup ungr2 * * ===================================================================== SUBROUTINE SORGR2( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -133,7 +132,7 @@ SUBROUTINE SORGR2( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, II, J, L * .. * .. External Subroutines .. - EXTERNAL SLARF, SSCAL, XERBLA + EXTERNAL SLARF1L, SSCAL, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -181,8 +180,8 @@ SUBROUTINE SORGR2( M, N, K, A, LDA, TAU, WORK, INFO ) * Apply H(i) to A(1:m-k+i,1:n-k+i) from the right * A( II, N-M+II ) = ONE - CALL SLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, TAU( I ), - $ A, LDA, WORK ) + CALL SLARF1L( 'Right', II-1, N-M+II, A( II, 1 ), LDA, + $ TAU( I ), A, LDA, WORK ) CALL SSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA ) A( II, N-M+II ) = ONE - TAU( I ) * diff --git a/lapack-netlib/SRC/sorm2l.f b/lapack-netlib/SRC/sorm2l.f index 2f6e3abbc3..6de9c5d8e2 100644 --- a/lapack-netlib/SRC/sorm2l.f +++ b/lapack-netlib/SRC/sorm2l.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORM2L + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -151,11 +149,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unm2l * * ===================================================================== SUBROUTINE SORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -171,21 +170,16 @@ SUBROUTINE SORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * ===================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E+0 ) -* .. * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, MI, NI, NQ - REAL AII * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL SLARF, XERBLA + EXTERNAL SLARF1L, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -262,11 +256,8 @@ SUBROUTINE SORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * Apply H(i) * - AII = A( NQ-K+I, I ) - A( NQ-K+I, I ) = ONE - CALL SLARF( SIDE, MI, NI, A( 1, I ), 1, TAU( I ), C, LDC, + CALL SLARF1L( SIDE, MI, NI, A( 1, I ), 1, TAU( I ), C, LDC, $ WORK ) - A( NQ-K+I, I ) = AII 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/sorm2r.f b/lapack-netlib/SRC/sorm2r.f index 0e0747a005..b1fd6263f6 100644 --- a/lapack-netlib/SRC/sorm2r.f +++ b/lapack-netlib/SRC/sorm2r.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORM2R + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -151,11 +149,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unm2r * * ===================================================================== SUBROUTINE SORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -171,21 +170,16 @@ SUBROUTINE SORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * ===================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E+0 ) -* .. * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ - REAL AII * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL SLARF, XERBLA + EXTERNAL SLARF1F, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -266,11 +260,8 @@ SUBROUTINE SORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * Apply H(i) * - AII = A( I, I ) - A( I, I ) = ONE - CALL SLARF( SIDE, MI, NI, A( I, I ), 1, TAU( I ), C( IC, JC ), - $ LDC, WORK ) - A( I, I ) = AII + CALL SLARF1F( SIDE, MI, NI, A( I, I ), 1, TAU( I ), C( IC, + $ JC ), LDC, WORK ) 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/sorml2.f b/lapack-netlib/SRC/sorml2.f index c5705c799e..0f79de7df1 100644 --- a/lapack-netlib/SRC/sorml2.f +++ b/lapack-netlib/SRC/sorml2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORML2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -151,11 +149,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unml2 * * ===================================================================== SUBROUTINE SORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -171,21 +170,16 @@ SUBROUTINE SORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * ===================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E+0 ) -* .. * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ - REAL AII * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL SLARF, XERBLA + EXTERNAL SLARF1F, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -266,11 +260,8 @@ SUBROUTINE SORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * Apply H(i) * - AII = A( I, I ) - A( I, I ) = ONE - CALL SLARF( SIDE, MI, NI, A( I, I ), LDA, TAU( I ), - $ C( IC, JC ), LDC, WORK ) - A( I, I ) = AII + CALL SLARF1F( SIDE, MI, NI, A( I, I ), LDA, TAU( I ), + $ C( IC, JC ), LDC, WORK ) 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/sormr2.f b/lapack-netlib/SRC/sormr2.f index cefe1d3092..c170f63c77 100644 --- a/lapack-netlib/SRC/sormr2.f +++ b/lapack-netlib/SRC/sormr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SORMR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -151,11 +149,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup unmr2 * * ===================================================================== SUBROUTINE SORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -171,21 +170,16 @@ SUBROUTINE SORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * ===================================================================== * -* .. Parameters .. - REAL ONE - PARAMETER ( ONE = 1.0E+0 ) -* .. * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, MI, NI, NQ - REAL AII * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL SLARF, XERBLA + EXTERNAL SLARF1L, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -262,11 +256,8 @@ SUBROUTINE SORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * * Apply H(i) * - AII = A( I, NQ-K+I ) - A( I, NQ-K+I ) = ONE - CALL SLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAU( I ), C, LDC, - $ WORK ) - A( I, NQ-K+I ) = AII + CALL SLARF1L( SIDE, MI, NI, A( I, 1 ), LDA, TAU( I ), C, + $ LDC, WORK ) 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/stgsen.f b/lapack-netlib/SRC/stgsen.f index ac9c4677ad..6584cebdab 100644 --- a/lapack-netlib/SRC/stgsen.f +++ b/lapack-netlib/SRC/stgsen.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download STGSEN + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -256,7 +254,7 @@ *> \verbatim *> LWORK is INTEGER *> The dimension of the array WORK. LWORK >= 4*N+16. -*> If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M)). +*> If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M) + 1). *> If IJOB = 3 or 5, LWORK >= MAX(4*N+16, 4*M*(N-M)). *> *> If LWORK = -1, then a workspace query is assumed; the routine @@ -445,9 +443,11 @@ *> \endverbatim *> * ===================================================================== - SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, + SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, + $ LDB, $ ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, M, PL, $ PR, DIF, WORK, LWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -486,7 +486,8 @@ SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, INTEGER ISAVE( 3 ) * .. * .. External Subroutines .. - EXTERNAL SLACN2, SLACPY, SLAG2, SLASSQ, STGEXC, STGSYL, + EXTERNAL SLACN2, SLACPY, SLAG2, SLASSQ, STGEXC, + $ STGSYL, $ XERBLA * .. * .. External Functions .. @@ -561,7 +562,7 @@ SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, END IF * IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN - LWMIN = MAX( 1, 4*N+16, 2*M*(N-M) ) + LWMIN = MAX( 1, 4*N+16, 2*M*(N-M) + 1 ) LIWMIN = MAX( 1, N+6 ) ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN LWMIN = MAX( 1, 4*N+16, 4*M*(N-M) ) @@ -634,7 +635,8 @@ SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * KK = K IF( K.NE.KS ) - $ CALL STGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ CALL STGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, + $ LDQ, $ Z, LDZ, KK, KS, WORK, LWORK, IERR ) * IF( IERR.GT.0 ) THEN @@ -668,7 +670,8 @@ SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, I = N1 + 1 IJB = 0 CALL SLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 ) - CALL SLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ), + CALL SLACPY( 'Full', N1, N2, B( 1, I ), LDB, + $ WORK( N1*N2+1 ), $ N1 ) CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1, @@ -710,14 +713,16 @@ SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Frobenius norm-based Difu-estimate. * - CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), $ N1, DSCALE, DIF( 1 ), WORK( 2*N1*N2+1 ), $ LWORK-2*N1*N2, IWORK, IERR ) * * Frobenius norm-based Difl-estimate. * - CALL STGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK, + CALL STGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, $ N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ), $ N2, DSCALE, DIF( 2 ), WORK( 2*N1*N2+1 ), $ LWORK-2*N1*N2, IWORK, IERR ) @@ -746,7 +751,8 @@ SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve generalized Sylvester equation. * - CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), + $ LDA, $ WORK, N1, B, LDB, B( I, I ), LDB, $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, @@ -755,7 +761,8 @@ SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve the transposed variant. * - CALL STGSYL( 'T', IJB, N1, N2, A, LDA, A( I, I ), LDA, + CALL STGSYL( 'T', IJB, N1, N2, A, LDA, A( I, I ), + $ LDA, $ WORK, N1, B, LDB, B( I, I ), LDB, $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, @@ -775,7 +782,8 @@ SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve generalized Sylvester equation. * - CALL STGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + CALL STGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, + $ LDA, $ WORK, N2, B( I, I ), LDB, B, LDB, $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, @@ -784,7 +792,8 @@ SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve the transposed variant. * - CALL STGSYL( 'T', IJB, N2, N1, A( I, I ), LDA, A, LDA, + CALL STGSYL( 'T', IJB, N2, N1, A( I, I ), LDA, A, + $ LDA, $ WORK, N2, B( I, I ), LDB, B, LDB, $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, @@ -826,7 +835,8 @@ SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, WORK( 6 ) = B( K+1, K ) WORK( 7 ) = B( K, K+1 ) WORK( 8 ) = B( K+1, K+1 ) - CALL SLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA( K ), + CALL SLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, + $ BETA( K ), $ BETA( K+1 ), ALPHAR( K ), ALPHAR( K+1 ), $ ALPHAI( K ) ) ALPHAI( K+1 ) = -ALPHAI( K ) diff --git a/lapack-netlib/SRC/strsyl3.f b/lapack-netlib/SRC/strsyl3.f index ef3f2da830..4189adb788 100644 --- a/lapack-netlib/SRC/strsyl3.f +++ b/lapack-netlib/SRC/strsyl3.f @@ -1,10 +1,26 @@ *> \brief \b STRSYL3 * -* Definition: -* =========== -* -* -*> \par Purpose +* Definition: +* =========== +* +* SUBROUTINE STRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, +* C, LDC, SCALE, IWORK, LIWORK, SWORK, +* LDSWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANA, TRANB +* INTEGER INFO, ISGN, LDA, LDB, LDC, M, N, +* LIWORK, LDSWORK +* REAL SCALE +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), B( LDB, * ), C( LDC, * ), +* SWORK( LDSWORK, * ) +* .. +* +* +*> \par Purpose: * ============= *> *> \verbatim @@ -27,8 +43,8 @@ *> This is the block version of the algorithm. *> \endverbatim * -* Arguments -* ========= +* Arguments: +* ========== * *> \param[in] TRANA *> \verbatim @@ -161,6 +177,8 @@ *> A and B are unchanged). *> \endverbatim * +*> \ingroup trsyl3 +* * ===================================================================== * References: * E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of @@ -175,9 +193,9 @@ * Angelika Schwarz, Umea University, Sweden. * * ===================================================================== - SUBROUTINE STRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, - $ LDC, SCALE, IWORK, LIWORK, SWORK, LDSWORK, - $ INFO ) + SUBROUTINE STRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, + $ C, LDC, SCALE, IWORK, LIWORK, SWORK, + $ LDSWORK, INFO ) IMPLICIT NONE * * .. Scalar Arguments .. @@ -209,10 +227,12 @@ SUBROUTINE STRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, LOGICAL LSAME INTEGER ILAENV REAL SLANGE, SLAMCH, SLARMM - EXTERNAL SLANGE, SLAMCH, SLARMM, ILAENV, LSAME + EXTERNAL SLANGE, SLAMCH, SLARMM, ILAENV, + $ LSAME * .. * .. External Subroutines .. - EXTERNAL SGEMM, SLASCL, SSCAL, STRSYL, XERBLA + EXTERNAL SGEMM, SLASCL, SSCAL, STRSYL, + $ XERBLA * .. * .. Intrinsic Functions .. INTRINSIC ABS, EXPONENT, MAX, MIN, REAL @@ -239,9 +259,8 @@ SUBROUTINE STRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, LQUERY = ( LIWORK.EQ.-1 .OR. LDSWORK.EQ.-1 ) IWORK( 1 ) = NBA + NBB + 2 IF( LQUERY ) THEN - LDSWORK = 2 - SWORK( 1, 1 ) = MAX( NBA, NBB ) - SWORK( 2, 1 ) = 2 * NBB + NBA + SWORK( 1, 1 ) = REAL( MAX( NBA, NBB ) ) + SWORK( 2, 1 ) = REAL( 2 * NBB + NBA ) END IF * * Test the input arguments @@ -1171,8 +1190,8 @@ SUBROUTINE STRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, * form (1/SCALE)*X if SCALE is REAL. Set SCALE to zero and give up. * IWORK(1) = NBA + NBB + 2 - SWORK(1,1) = MAX( NBA, NBB ) - SWORK(2,1) = 2 * NBB + NBA + SWORK(1,1) = REAL( MAX( NBA, NBB ) ) + SWORK(2,1) = REAL( 2 * NBB + NBA ) RETURN END IF * @@ -1223,7 +1242,8 @@ SUBROUTINE STRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, * SCALOC = MIN( BIGNUM / SCAL, ONE / BUF ) BUF = BUF * SCALOC - CALL SLASCL( 'G', -1, -1, ONE, SCALOC, M, N, C, LDC, IWORK(1) ) + CALL SLASCL( 'G', -1, -1, ONE, SCALOC, M, N, C, LDC, + $ IWORK(1) ) END IF * * Combine with buffer scaling factor. SCALE will be flushed if @@ -1234,8 +1254,8 @@ SUBROUTINE STRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, * Restore workspace dimensions * IWORK(1) = NBA + NBB + 2 - SWORK(1,1) = MAX( NBA, NBB ) - SWORK(2,1) = 2 * NBB + NBA + SWORK(1,1) = REAL( MAX( NBA, NBB ) ) + SWORK(2,1) = REAL( 2 * NBB + NBA ) * RETURN * diff --git a/lapack-netlib/SRC/zgebd2.f b/lapack-netlib/SRC/zgebd2.f index 9a403e4008..465409943a 100644 --- a/lapack-netlib/SRC/zgebd2.f +++ b/lapack-netlib/SRC/zgebd2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZGEBD2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -132,7 +130,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16GEcomputational +*> \ingroup gebd2 * *> \par Further Details: * ===================== @@ -186,6 +184,7 @@ *> * ===================================================================== SUBROUTINE ZGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -202,16 +201,14 @@ SUBROUTINE ZGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) * ===================================================================== * * .. Parameters .. - COMPLEX*16 ZERO, ONE - PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), - $ ONE = ( 1.0D+0, 0.0D+0 ) ) -* .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) * .. Local Scalars .. INTEGER I COMPLEX*16 ALPHA * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLACGV, ZLARF, ZLARFG + EXTERNAL XERBLA, ZLACGV, ZLARF1F, ZLARFG * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX, MIN @@ -245,12 +242,11 @@ SUBROUTINE ZGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) CALL ZLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1, $ TAUQ( I ) ) D( I ) = DBLE( ALPHA ) - A( I, I ) = ONE * * Apply H(i)**H to A(i:m,i+1:n) from the left * IF( I.LT.N ) - $ CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CALL ZLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, $ DCONJG( TAUQ( I ) ), A( I, I+1 ), LDA, WORK ) A( I, I ) = D( I ) * @@ -264,11 +260,10 @@ SUBROUTINE ZGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) CALL ZLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), LDA, $ TAUP( I ) ) E( I ) = DBLE( ALPHA ) - A( I, I+1 ) = ONE * * Apply G(i) to A(i+1:m,i+1:n) from the right * - CALL ZLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + CALL ZLARF1F( 'Right', M-I, N-I, A( I, I+1 ), LDA, $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) CALL ZLACGV( N-I, A( I, I+1 ), LDA ) A( I, I+1 ) = E( I ) @@ -289,12 +284,11 @@ SUBROUTINE ZGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) CALL ZLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, $ TAUP( I ) ) D( I ) = DBLE( ALPHA ) - A( I, I ) = ONE * * Apply G(i) to A(i+1:m,i:n) from the right * IF( I.LT.M ) - $ CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ CALL ZLARF1F( 'Right', M-I, N-I+1, A( I, I ), LDA, $ TAUP( I ), A( I+1, I ), LDA, WORK ) CALL ZLACGV( N-I+1, A( I, I ), LDA ) A( I, I ) = D( I ) @@ -308,11 +302,10 @@ SUBROUTINE ZGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) CALL ZLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1, $ TAUQ( I ) ) E( I ) = DBLE( ALPHA ) - A( I+1, I ) = ONE * * Apply H(i)**H to A(i+1:m,i+1:n) from the left * - CALL ZLARF( 'Left', M-I, N-I, A( I+1, I ), 1, + CALL ZLARF1F( 'Left', M-I, N-I, A( I+1, I ), 1, $ DCONJG( TAUQ( I ) ), A( I+1, I+1 ), LDA, $ WORK ) A( I+1, I ) = E( I ) diff --git a/lapack-netlib/SRC/zgeev.f b/lapack-netlib/SRC/zgeev.f index b968900e2f..ad54ca7d72 100644 --- a/lapack-netlib/SRC/zgeev.f +++ b/lapack-netlib/SRC/zgeev.f @@ -216,7 +216,7 @@ SUBROUTINE ZGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR, $ ZHSEQR, ZLACPY, ZLASCL, ZSCAL, ZTREVC3, ZUNGHR * .. * .. External Functions .. - LOGICAL LSAME + LOGICAL LSAME, DISNAN INTEGER IDAMAX, ILAENV DOUBLE PRECISION DLAMCH, DZNRM2, ZLANGE EXTERNAL LSAME, IDAMAX, ILAENV, DLAMCH, DZNRM2, ZLANGE @@ -329,6 +329,10 @@ SUBROUTINE ZGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR, ELSE IF( ANRM.GT.BIGNUM ) THEN SCALEA = .TRUE. CSCALE = BIGNUM + ELSE IF ( DISNAN ( ANRM ) ) THEN + INFO = -4 + CALL XERBLA ( 'ZGEEV', -INFO ) + RETURN END IF IF( SCALEA ) $ CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) diff --git a/lapack-netlib/SRC/zgehd2.f b/lapack-netlib/SRC/zgehd2.f index e4d79f1b83..4250de42b5 100644 --- a/lapack-netlib/SRC/zgehd2.f +++ b/lapack-netlib/SRC/zgehd2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZGEHD2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -106,7 +104,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16GEcomputational +*> \ingroup gehd2 * *> \par Further Details: * ===================== @@ -146,6 +144,7 @@ *> * ===================================================================== SUBROUTINE ZGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -166,10 +165,9 @@ SUBROUTINE ZGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) * .. * .. Local Scalars .. INTEGER I - COMPLEX*16 ALPHA * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLARF, ZLARFG + EXTERNAL XERBLA, ZLARF1F, ZLARFG * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX, MIN @@ -197,21 +195,19 @@ SUBROUTINE ZGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) * * Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) * - ALPHA = A( I+1, I ) - CALL ZLARFG( IHI-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAU( I ) ) - A( I+1, I ) = ONE + CALL ZLARFG( IHI-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) * * Apply H(i) to A(1:ihi,i+1:ihi) from the right * - CALL ZLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), - $ A( 1, I+1 ), LDA, WORK ) + CALL ZLARF1F( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) * * Apply H(i)**H to A(i+1:ihi,i+1:n) from the left * - CALL ZLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, - $ DCONJG( TAU( I ) ), A( I+1, I+1 ), LDA, WORK ) + CALL ZLARF1F( 'Left', IHI-I, N-I, A( I+1, I ), 1, + $ CONJG( TAU( I ) ), A( I+1, I+1 ), LDA, WORK ) * - A( I+1, I ) = ALPHA 10 CONTINUE * RETURN diff --git a/lapack-netlib/SRC/zgelq2.f b/lapack-netlib/SRC/zgelq2.f index 7604556a5f..19bac3142d 100644 --- a/lapack-netlib/SRC/zgelq2.f +++ b/lapack-netlib/SRC/zgelq2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZGELQ2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -104,7 +102,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16GEcomputational +*> \ingroup gelq2 * *> \par Further Details: * ===================== @@ -126,6 +124,7 @@ *> * ===================================================================== SUBROUTINE ZGELQ2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -146,10 +145,9 @@ SUBROUTINE ZGELQ2( M, N, A, LDA, TAU, WORK, INFO ) * .. * .. Local Scalars .. INTEGER I, K - COMPLEX*16 ALPHA * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLACGV, ZLARF, ZLARFG + EXTERNAL XERBLA, ZLACGV, ZLARF1F, ZLARFG * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -178,18 +176,16 @@ SUBROUTINE ZGELQ2( M, N, A, LDA, TAU, WORK, INFO ) * Generate elementary reflector H(i) to annihilate A(i,i+1:n) * CALL ZLACGV( N-I+1, A( I, I ), LDA ) - ALPHA = A( I, I ) - CALL ZLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + CALL ZLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, $ TAU( I ) ) IF( I.LT.M ) THEN * * Apply H(i) to A(i+1:m,i:n) from the right * - A( I, I ) = ONE - CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), - $ A( I+1, I ), LDA, WORK ) + CALL ZLARF1F( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAU( I ), + $ A( I+1, I ), LDA, WORK ) END IF - A( I, I ) = ALPHA CALL ZLACGV( N-I+1, A( I, I ), LDA ) 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/zgeql2.f b/lapack-netlib/SRC/zgeql2.f index 35bd61d412..6cd9afe8cb 100644 --- a/lapack-netlib/SRC/zgeql2.f +++ b/lapack-netlib/SRC/zgeql2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZGEQL2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16GEcomputational +*> \ingroup geql2 * *> \par Further Details: * ===================== @@ -120,6 +118,7 @@ *> * ===================================================================== SUBROUTINE ZGEQL2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -140,10 +139,9 @@ SUBROUTINE ZGEQL2( M, N, A, LDA, TAU, WORK, INFO ) * .. * .. Local Scalars .. INTEGER I, K - COMPLEX*16 ALPHA * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLARF, ZLARFG + EXTERNAL XERBLA, ZLARF1L, ZLARFG * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX, MIN @@ -172,15 +170,13 @@ SUBROUTINE ZGEQL2( M, N, A, LDA, TAU, WORK, INFO ) * Generate elementary reflector H(i) to annihilate * A(1:m-k+i-1,n-k+i) * - ALPHA = A( M-K+I, N-K+I ) - CALL ZLARFG( M-K+I, ALPHA, A( 1, N-K+I ), 1, TAU( I ) ) + CALL ZLARFG( M-K+I, A( M-K+I, N-K+I ), A( 1, N-K+I ), 1, + $ TAU( I ) ) * * Apply H(i)**H to A(1:m-k+i,1:n-k+i-1) from the left * - A( M-K+I, N-K+I ) = ONE - CALL ZLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, - $ DCONJG( TAU( I ) ), A, LDA, WORK ) - A( M-K+I, N-K+I ) = ALPHA + CALL ZLARF1L( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, + $ CONJG( TAU( I ) ), A, LDA, WORK ) 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/zgeqp3rk.f b/lapack-netlib/SRC/zgeqp3rk.f index f637966c8d..989ce69cc7 100644 --- a/lapack-netlib/SRC/zgeqp3rk.f +++ b/lapack-netlib/SRC/zgeqp3rk.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZGEQP3RK + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -552,27 +550,19 @@ *> C. H. Bischof, Math. and Comp. Sci. Div., Argonne National Lab, USA. *> A BLAS-3 version of the QR factorization with column pivoting. *> LAPACK Working Note 114 -*> \htmlonly *> https://www.netlib.org/lapack/lawnspdf/lawn114.pdf -*> \endhtmlonly *> and in *> SIAM J. Sci. Comput., 19(5):1486-1494, Sept. 1998. -*> \htmlonly *> https://doi.org/10.1137/S1064827595296732 -*> \endhtmlonly *> *> [2] A partial column norm updating strategy developed in 2006. *> Z. Drmac and Z. Bujanovic, Dept. of Math., University of Zagreb, Croatia. *> On the failure of rank revealing QR factorization software – a case study. *> LAPACK Working Note 176. -*> \htmlonly *> http://www.netlib.org/lapack/lawnspdf/lawn176.pdf -*> \endhtmlonly *> and in *> ACM Trans. Math. Softw. 35, 2, Article 12 (July 2008), 28 pages. -*> \htmlonly *> https://doi.org/10.1145/1377612.1377616 -*> \endhtmlonly * *> \par Contributors: * ================== @@ -677,7 +667,7 @@ SUBROUTINE ZGEQP3RK( M, N, NRHS, KMAX, ABSTOL, RELTOL, A, LDA, * Minimal workspace size in case of using only unblocked * BLAS 2 code in ZLAQP2RK. * 1) ZLAQP2RK: N+NRHS-1 to use in WORK array that is used -* in ZLARF subroutine inside ZLAQP2RK to apply an +* in ZLARF1F subroutine inside ZLAQP2RK to apply an * elementary reflector from the left. * TOTAL_WORK_SIZE = 3*N + NRHS - 1 * @@ -693,7 +683,7 @@ SUBROUTINE ZGEQP3RK( M, N, NRHS, KMAX, ABSTOL, RELTOL, A, LDA, * 1) ZGEQP3RK, ZLAQP2RK, ZLAQP3RK: 2*N to store full and * partial column 2-norms. * 2) ZLAQP2RK: N+NRHS-1 to use in WORK array that is used -* in ZLARF subroutine to apply an elementary reflector +* in ZLARF1F subroutine to apply an elementary reflector * from the left. * 3) ZLAQP3RK: NB*(N+NRHS) to use in the work array F that * is used to apply a block reflector from @@ -893,7 +883,8 @@ SUBROUTINE ZGEQP3RK( M, N, NRHS, KMAX, ABSTOL, RELTOL, A, LDA, * Determine when to cross over from blocked to unblocked code. * (for N less than NX, unblocked code should be used). * - NX = MAX( 0, ILAENV( IXOVER, 'ZGEQP3RK', ' ', M, N, -1, -1 ) ) + NX = MAX( 0, ILAENV( IXOVER, 'ZGEQP3RK', ' ', M, N, -1, + $ -1 ) ) * IF( NX.LT.MINMN ) THEN * diff --git a/lapack-netlib/SRC/zgeqr2.f b/lapack-netlib/SRC/zgeqr2.f index 511a81ef8a..958e606b73 100644 --- a/lapack-netlib/SRC/zgeqr2.f +++ b/lapack-netlib/SRC/zgeqr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZGEQR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -105,7 +103,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16GEcomputational +*> \ingroup geqr2 * *> \par Further Details: * ===================== @@ -127,6 +125,7 @@ *> * ===================================================================== SUBROUTINE ZGEQR2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -147,10 +146,9 @@ SUBROUTINE ZGEQR2( M, N, A, LDA, TAU, WORK, INFO ) * .. * .. Local Scalars .. INTEGER I, K - COMPLEX*16 ALPHA * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLARF, ZLARFG + EXTERNAL XERBLA, ZLARF1F, ZLARFG * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX, MIN @@ -184,11 +182,8 @@ SUBROUTINE ZGEQR2( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i)**H to A(i:m,i+1:n) from the left * - ALPHA = A( I, I ) - A( I, I ) = ONE - CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, - $ DCONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) - A( I, I ) = ALPHA + CALL ZLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) END IF 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/zgeqr2p.f b/lapack-netlib/SRC/zgeqr2p.f index 136d75f85c..aef5a2b015 100644 --- a/lapack-netlib/SRC/zgeqr2p.f +++ b/lapack-netlib/SRC/zgeqr2p.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZGEQR2P + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,7 +105,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16GEcomputational +*> \ingroup geqr2p * *> \par Further Details: * ===================== @@ -131,6 +129,7 @@ *> * ===================================================================== SUBROUTINE ZGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -151,10 +150,9 @@ SUBROUTINE ZGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) * .. * .. Local Scalars .. INTEGER I, K - COMPLEX*16 ALPHA * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLARF, ZLARFGP + EXTERNAL XERBLA, ZLARF1F, ZLARFGP * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX, MIN @@ -188,11 +186,8 @@ SUBROUTINE ZGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) * * Apply H(i)**H to A(i:m,i+1:n) from the left * - ALPHA = A( I, I ) - A( I, I ) = ONE - CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, - $ DCONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) - A( I, I ) = ALPHA + CALL ZLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) END IF 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/zgerq2.f b/lapack-netlib/SRC/zgerq2.f index 8a987dd0a0..dbd33d6b16 100644 --- a/lapack-netlib/SRC/zgerq2.f +++ b/lapack-netlib/SRC/zgerq2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZGERQ2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16GEcomputational +*> \ingroup gerq2 * *> \par Further Details: * ===================== @@ -120,6 +118,7 @@ *> * ===================================================================== SUBROUTINE ZGERQ2( M, N, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -140,10 +139,9 @@ SUBROUTINE ZGERQ2( M, N, A, LDA, TAU, WORK, INFO ) * .. * .. Local Scalars .. INTEGER I, K - COMPLEX*16 ALPHA * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLACGV, ZLARF, ZLARFG + EXTERNAL XERBLA, ZLACGV, ZLARF1L, ZLARFG * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -173,15 +171,13 @@ SUBROUTINE ZGERQ2( M, N, A, LDA, TAU, WORK, INFO ) * A(m-k+i,1:n-k+i-1) * CALL ZLACGV( N-K+I, A( M-K+I, 1 ), LDA ) - ALPHA = A( M-K+I, N-K+I ) - CALL ZLARFG( N-K+I, ALPHA, A( M-K+I, 1 ), LDA, TAU( I ) ) + CALL ZLARFG( N-K+I, A( M-K+I, N-K+I ), A( M-K+I, 1 ), LDA, + $ TAU( I ) ) * * Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right * - A( M-K+I, N-K+I ) = ONE - CALL ZLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, - $ TAU( I ), A, LDA, WORK ) - A( M-K+I, N-K+I ) = ALPHA + CALL ZLARF1L( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, + $ TAU( I ), A, LDA, WORK ) CALL ZLACGV( N-K+I-1, A( M-K+I, 1 ), LDA ) 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/zgesvdq.f b/lapack-netlib/SRC/zgesvdq.f index b990f73893..573db69a9d 100644 --- a/lapack-netlib/SRC/zgesvdq.f +++ b/lapack-netlib/SRC/zgesvdq.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZGESVDQ + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -40,7 +38,7 @@ *> *> \verbatim *> -*> ZCGESVDQ computes the singular value decomposition (SVD) of a complex +*> ZGESVDQ computes the singular value decomposition (SVD) of a complex *> M-by-N matrix A, where M >= N. The SVD of A is written as *> [++] [xx] [x0] [xx] *> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] @@ -51,8 +49,8 @@ *> left and the right singular vectors of A, respectively. *> \endverbatim * -* Arguments -* ========= +* Arguments: +* ========== * *> \param[in] JOBA *> \verbatim @@ -92,7 +90,7 @@ *> the adjoint R**H of the computed triangular factor R. This involves *> some extra data movement (matrix transpositions). Useful for *> experiments, research and development. -*> = 'N' The triangular factor R is given as input to CGESVD. This may be +*> = 'N' The triangular factor R is given as input to ZGESVD. This may be *> preferred as it involves less data movement. *> \endverbatim *> @@ -206,7 +204,7 @@ *> revealing QR factorization, following the strategy specified by the *> value of JOBA. If JOBV = 'R' and JOBU = 'R', only NUMRANK *> leading singular values and vectors are then requested in the call -*> of CGESVD. The final value of NUMRANK might be further reduced if +*> of ZGESVD. The final value of NUMRANK might be further reduced if *> some singular values are computed as zeros. *> \endverbatim *> @@ -358,10 +356,10 @@ *> nested DO loops should be replaced with calls to an optimized subroutine. *> 2. This code scales A by 1/SQRT(M) if the largest ABS(A(i,j)) could cause *> column norm overflow. This is the minial precaution and it is left to the -*> SVD routine (CGESVD) to do its own preemptive scaling if potential over- +*> SVD routine (ZGESVD) to do its own preemptive scaling if potential over- *> or underflows are detected. To avoid repeated scanning of the array A, *> an optimal implementation would do all necessary scaling before calling -*> CGESVD and the scaling in CGESVD can be switched off. +*> ZGESVD and the scaling in ZGESVD can be switched off. *> 3. Other comments related to code optimization are given in comments in the *> code, enclosed in [[double brackets]]. *> \endverbatim @@ -404,14 +402,14 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16GEsing +*> \ingroup gesvdq * * ===================================================================== SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, $ S, U, LDU, V, LDV, NUMRANK, IWORK, LIWORK, $ CWORK, LCWORK, RWORK, LRWORK, INFO ) + IMPLICIT NONE * .. Scalar Arguments .. - IMPLICIT NONE CHARACTER JOBA, JOBP, JOBR, JOBU, JOBV INTEGER M, N, LDA, LDU, LDV, NUMRANK, LIWORK, LCWORK, LRWORK, $ INFO @@ -447,7 +445,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, DOUBLE PRECISION RDUMMY(1) * .. * .. External Subroutines (BLAS, LAPACK) - EXTERNAL ZGELQF, ZGEQP3, ZGEQRF, ZGESVD, ZLACPY, ZLAPMT, + EXTERNAL ZGELQF, ZGEQP3, ZGEQRF, ZGESVD, ZLACPY, + $ ZLAPMT, $ ZLASCL, ZLASET, ZLASWP, ZDSCAL, DLASET, DLASCL, $ ZPOCON, ZUNMLQ, ZUNMQR, XERBLA * .. @@ -672,10 +671,12 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, IF ( WNTVA ) THEN CALL ZGEQRF(N,N/2,U,LDU,CDUMMY,CDUMMY,-1,IERR) LWRK_ZGEQRF = INT( CDUMMY(1) ) - CALL ZGESVD( 'S', 'O', N/2,N/2, V,LDV, S, U,LDU, + CALL ZGESVD( 'S', 'O', N/2,N/2, V,LDV, S, U, + $ LDU, $ V, LDV, CDUMMY, -1, RDUMMY, IERR ) LWRK_ZGESVD2 = INT( CDUMMY(1) ) - CALL ZUNMQR( 'R', 'C', N, N, N/2, U, LDU, CDUMMY, + CALL ZUNMQR( 'R', 'C', N, N, N/2, U, LDU, + $ CDUMMY, $ V, LDV, CDUMMY, -1, IERR ) LWRK_ZUNMQR2 = INT( CDUMMY(1) ) OPTWRK2 = MAX( LWRK_ZGEQP3, N/2+LWRK_ZGEQRF, @@ -694,10 +695,12 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, IF ( WNTVA ) THEN CALL ZGELQF(N/2,N,U,LDU,CDUMMY,CDUMMY,-1,IERR) LWRK_ZGELQF = INT( CDUMMY(1) ) - CALL ZGESVD( 'S','O', N/2,N/2, V, LDV, S, U, LDU, + CALL ZGESVD( 'S','O', N/2,N/2, V, LDV, S, U, + $ LDU, $ V, LDV, CDUMMY, -1, RDUMMY, IERR ) LWRK_ZGESVD2 = INT( CDUMMY(1) ) - CALL ZUNMLQ( 'R', 'N', N, N, N/2, U, LDU, CDUMMY, + CALL ZUNMLQ( 'R', 'N', N, N, N/2, U, LDU, + $ CDUMMY, $ V, LDV, CDUMMY,-1,IERR ) LWRK_ZUNMLQ = INT( CDUMMY(1) ) OPTWRK2 = MAX( LWRK_ZGEQP3, N/2+LWRK_ZGELQF, @@ -772,9 +775,12 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, * Quick return: A is the M x N zero matrix. NUMRANK = 0 CALL DLASET( 'G', N, 1, ZERO, ZERO, S, N ) - IF ( WNTUS ) CALL ZLASET('G', M, N, CZERO, CONE, U, LDU) - IF ( WNTUA ) CALL ZLASET('G', M, M, CZERO, CONE, U, LDU) - IF ( WNTVA ) CALL ZLASET('G', N, N, CZERO, CONE, V, LDV) + IF ( WNTUS ) CALL ZLASET('G', M, N, CZERO, CONE, U, + $ LDU) + IF ( WNTUA ) CALL ZLASET('G', M, M, CZERO, CONE, U, + $ LDU) + IF ( WNTVA ) CALL ZLASET('G', N, N, CZERO, CONE, V, + $ LDV) IF ( WNTUF ) THEN CALL ZLASET( 'G', N, 1, CZERO, CZERO, CWORK, N ) CALL ZLASET( 'G', M, N, CZERO, CONE, U, LDU ) @@ -795,7 +801,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, IF ( RWORK(1) .GT. BIG / SQRT(DBLE(M)) ) THEN * .. to prevent overflow in the QR factorization, scale the * matrix by 1/sqrt(M) if too large entry detected - CALL ZLASCL('G',0,0,SQRT(DBLE(M)),ONE, M,N, A,LDA, IERR) + CALL ZLASCL('G',0,0,SQRT(DBLE(M)),ONE, M,N, A,LDA, + $ IERR) ASCALED = .TRUE. END IF CALL ZLASWP( N, A, LDA, 1, M-1, IWORK(N+1), 1 ) @@ -817,7 +824,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, IF ( RTMP .GT. BIG / SQRT(DBLE(M)) ) THEN * .. to prevent overflow in the QR factorization, scale the * matrix by 1/sqrt(M) if too large entry detected - CALL ZLASCL('G',0,0, SQRT(DBLE(M)),ONE, M,N, A,LDA, IERR) + CALL ZLASCL('G',0,0, SQRT(DBLE(M)),ONE, M,N, A,LDA, + $ IERR) ASCALED = .TRUE. END IF END IF @@ -946,7 +954,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, * .. compute the singular values of R = [A](1:NR,1:N) * IF ( NR .GT. 1 ) - $ CALL ZLASET( 'L', NR-1,NR-1, CZERO,CZERO, A(2,1), LDA ) + $ CALL ZLASET( 'L', NR-1,NR-1, CZERO,CZERO, A(2,1), + $ LDA ) CALL ZGESVD( 'N', 'N', NR, N, A, LDA, S, U, LDU, $ V, LDV, CWORK, LCWORK, RWORK, INFO ) * @@ -966,7 +975,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, 1193 CONTINUE 1192 CONTINUE IF ( NR .GT. 1 ) - $ CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, U(1,2), LDU ) + $ CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, U(1,2), + $ LDU ) * .. the left singular vectors not computed, the NR right singular * vectors overwrite [U](1:NR,1:NR) as conjugate transposed. These * will be pre-multiplied by Q to build the left singular vectors of A. @@ -987,7 +997,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, * .. copy R into [U] and overwrite [U] with the left singular vectors CALL ZLACPY( 'U', NR, N, A, LDA, U, LDU ) IF ( NR .GT. 1 ) - $ CALL ZLASET( 'L', NR-1, NR-1, CZERO, CZERO, U(2,1), LDU ) + $ CALL ZLASET( 'L', NR-1, NR-1, CZERO, CZERO, U(2,1), + $ LDU ) * .. the right singular vectors not computed, the NR left singular * vectors overwrite [U](1:NR,1:NR) CALL ZGESVD( 'O', 'N', NR, N, U, LDU, S, U, LDU, @@ -1002,7 +1013,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, IF ( ( NR .LT. M ) .AND. ( .NOT.WNTUF ) ) THEN CALL ZLASET('A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU) IF ( NR .LT. N1 ) THEN - CALL ZLASET( 'A',NR,N1-NR,CZERO,CZERO,U(1,NR+1), LDU ) + CALL ZLASET( 'A',NR,N1-NR,CZERO,CZERO,U(1,NR+1), + $ LDU ) CALL ZLASET( 'A',M-NR,N1-NR,CZERO,CONE, $ U(NR+1,NR+1), LDU ) END IF @@ -1030,7 +1042,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, 1166 CONTINUE 1165 CONTINUE IF ( NR .GT. 1 ) - $ CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, V(1,2), LDV ) + $ CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, V(1,2), + $ LDV ) * .. the left singular vectors of R**H overwrite V, the right singular * vectors not computed IF ( WNTVR .OR. ( NR .EQ. N ) ) THEN @@ -1060,7 +1073,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, * by padding a zero block. In the case NR << N, a more efficient * way is to first use the QR factorization. For more details * how to implement this, see the " FULL SVD " branch. - CALL ZLASET('G', N, N-NR, CZERO, CZERO, V(1,NR+1), LDV) + CALL ZLASET('G', N, N-NR, CZERO, CZERO, V(1,NR+1), + $ LDV) CALL ZGESVD( 'O', 'N', N, N, V, LDV, S, U, LDU, $ U, LDU, CWORK(N+1), LCWORK-N, RWORK, INFO ) * @@ -1080,7 +1094,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, * .. copy R into V and overwrite V with the right singular vectors CALL ZLACPY( 'U', NR, N, A, LDA, V, LDV ) IF ( NR .GT. 1 ) - $ CALL ZLASET( 'L', NR-1, NR-1, CZERO, CZERO, V(2,1), LDV ) + $ CALL ZLASET( 'L', NR-1, NR-1, CZERO, CZERO, V(2,1), + $ LDV ) * .. the right singular vectors overwrite V, the NR left singular * vectors stored in U(1:NR,1:NR) IF ( WNTVR .OR. ( NR .EQ. N ) ) THEN @@ -1094,7 +1109,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, * by padding a zero block. In the case NR << N, a more efficient * way is to first use the LQ factorization. For more details * how to implement this, see the " FULL SVD " branch. - CALL ZLASET('G', N-NR, N, CZERO,CZERO, V(NR+1,1), LDV) + CALL ZLASET('G', N-NR, N, CZERO,CZERO, V(NR+1,1), + $ LDV) CALL ZGESVD( 'N', 'O', N, N, V, LDV, S, U, LDU, $ V, LDV, CWORK(N+1), LCWORK-N, RWORK, INFO ) CALL ZLAPMT( .FALSE., N, N, V, LDV, IWORK ) @@ -1120,7 +1136,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, 1169 CONTINUE 1168 CONTINUE IF ( NR .GT. 1 ) - $ CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, V(1,2), LDV ) + $ CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, V(1,2), + $ LDV ) * * .. the left singular vectors of R**H overwrite [V], the NR right * singular vectors of R**H stored in [U](1:NR,1:NR) as conjugate @@ -1155,9 +1172,11 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, 1117 CONTINUE * IF ( ( NR .LT. M ) .AND. .NOT.(WNTUF)) THEN - CALL ZLASET('A', M-NR,NR, CZERO,CZERO, U(NR+1,1), LDU) + CALL ZLASET('A', M-NR,NR, CZERO,CZERO, U(NR+1,1), + $ LDU) IF ( NR .LT. N1 ) THEN - CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1),LDU) + CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1), + $ LDU) CALL ZLASET( 'A',M-NR,N1-NR,CZERO,CONE, $ U(NR+1,NR+1), LDU ) END IF @@ -1180,7 +1199,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, 1199 CONTINUE 1198 CONTINUE IF ( NR .GT. 1 ) - $ CALL ZLASET('U',NR-1,NR-1, CZERO,CZERO, V(1,2),LDV) + $ CALL ZLASET('U',NR-1,NR-1, CZERO,CZERO, V(1,2), + $ LDV) * CALL ZLASET('A',N,N-NR,CZERO,CZERO,V(1,NR+1),LDV) CALL ZGESVD( 'O', 'A', N, N, V, LDV, S, V, LDV, @@ -1210,7 +1230,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, IF ( ( N .LT. M ) .AND. .NOT.(WNTUF)) THEN CALL ZLASET('A',M-N,N,CZERO,CZERO,U(N+1,1),LDU) IF ( N .LT. N1 ) THEN - CALL ZLASET('A',N,N1-N,CZERO,CZERO,U(1,N+1),LDU) + CALL ZLASET('A',N,N1-N,CZERO,CZERO,U(1,N+1), + $ LDU) CALL ZLASET('A',M-N,N1-N,CZERO,CONE, $ U(N+1,N+1), LDU ) END IF @@ -1224,7 +1245,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, 1197 CONTINUE 1196 CONTINUE IF ( NR .GT. 1 ) - $ CALL ZLASET('U',NR-1,NR-1,CZERO,CZERO,U(1,NR+2),LDU) + $ CALL ZLASET('U',NR-1,NR-1,CZERO,CZERO,U(1,NR+2), + $ LDU) CALL ZGEQRF( N, NR, U(1,NR+1), LDU, CWORK(N+1), $ CWORK(N+NR+1), LCWORK-N-NR, IERR ) DO 1143 p = 1, NR @@ -1237,16 +1259,19 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, $ V,LDV, CWORK(N+NR+1),LCWORK-N-NR,RWORK, INFO ) CALL ZLASET('A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV) CALL ZLASET('A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV) - CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1), + $ LDV) CALL ZUNMQR('R','C', N, N, NR, U(1,NR+1), LDU, $ CWORK(N+1),V,LDV,CWORK(N+NR+1),LCWORK-N-NR,IERR) CALL ZLAPMT( .FALSE., N, N, V, LDV, IWORK ) * .. assemble the left singular vector matrix U of dimensions * (M x NR) or (M x N) or (M x M). IF ( ( NR .LT. M ) .AND. .NOT.(WNTUF)) THEN - CALL ZLASET('A',M-NR,NR,CZERO,CZERO,U(NR+1,1),LDU) + CALL ZLASET('A',M-NR,NR,CZERO,CZERO,U(NR+1,1), + $ LDU) IF ( NR .LT. N1 ) THEN - CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1),LDU) + CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1), + $ LDU) CALL ZLASET( 'A',M-NR,N1-NR,CZERO,CONE, $ U(NR+1,NR+1),LDU) END IF @@ -1262,7 +1287,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, * .. copy R into [V] and overwrite V with the right singular vectors CALL ZLACPY( 'U', NR, N, A, LDA, V, LDV ) IF ( NR .GT. 1 ) - $ CALL ZLASET( 'L', NR-1,NR-1, CZERO,CZERO, V(2,1), LDV ) + $ CALL ZLASET( 'L', NR-1,NR-1, CZERO,CZERO, V(2,1), + $ LDV ) * .. the right singular vectors of R overwrite [V], the NR left * singular vectors of R stored in [U](1:NR,1:NR) CALL ZGESVD( 'S', 'O', NR, N, V, LDV, S, U, LDU, @@ -1272,9 +1298,11 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, * .. assemble the left singular vector matrix U of dimensions * (M x NR) or (M x N) or (M x M). IF ( ( NR .LT. M ) .AND. .NOT.(WNTUF)) THEN - CALL ZLASET('A', M-NR,NR, CZERO,CZERO, U(NR+1,1), LDU) + CALL ZLASET('A', M-NR,NR, CZERO,CZERO, U(NR+1,1), + $ LDU) IF ( NR .LT. N1 ) THEN - CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1),LDU) + CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1), + $ LDU) CALL ZLASET( 'A',M-NR,N1-NR,CZERO,CONE, $ U(NR+1,NR+1), LDU ) END IF @@ -1293,10 +1321,12 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, IF ( OPTRATIO * NR .GT. N ) THEN CALL ZLACPY( 'U', NR, N, A, LDA, V, LDV ) IF ( NR .GT. 1 ) - $ CALL ZLASET('L', NR-1,NR-1, CZERO,CZERO, V(2,1),LDV) + $ CALL ZLASET('L', NR-1,NR-1, CZERO,CZERO, V(2,1), + $ LDV) * .. the right singular vectors of R overwrite [V], the NR left * singular vectors of R stored in [U](1:NR,1:NR) - CALL ZLASET('A', N-NR,N, CZERO,CZERO, V(NR+1,1),LDV) + CALL ZLASET('A', N-NR,N, CZERO,CZERO, V(NR+1,1), + $ LDV) CALL ZGESVD( 'S', 'O', N, N, V, LDV, S, U, LDU, $ V, LDV, CWORK(N+1), LCWORK-N, RWORK, INFO ) CALL ZLAPMT( .FALSE., N, N, V, LDV, IWORK ) @@ -1308,7 +1338,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, IF ( ( N .LT. M ) .AND. .NOT.(WNTUF)) THEN CALL ZLASET('A',M-N,N,CZERO,CZERO,U(N+1,1),LDU) IF ( N .LT. N1 ) THEN - CALL ZLASET('A',N,N1-N,CZERO,CZERO,U(1,N+1),LDU) + CALL ZLASET('A',N,N1-N,CZERO,CZERO,U(1,N+1), + $ LDU) CALL ZLASET( 'A',M-N,N1-N,CZERO,CONE, $ U(N+1,N+1), LDU ) END IF @@ -1316,7 +1347,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, ELSE CALL ZLACPY( 'U', NR, N, A, LDA, U(NR+1,1), LDU ) IF ( NR .GT. 1 ) - $ CALL ZLASET('L',NR-1,NR-1,CZERO,CZERO,U(NR+2,1),LDU) + $ CALL ZLASET('L',NR-1,NR-1,CZERO,CZERO,U(NR+2,1), + $ LDU) CALL ZGELQF( NR, N, U(NR+1,1), LDU, CWORK(N+1), $ CWORK(N+NR+1), LCWORK-N-NR, IERR ) CALL ZLACPY('L',NR,NR,U(NR+1,1),LDU,V,LDV) @@ -1326,16 +1358,20 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, $ V, LDV, CWORK(N+NR+1), LCWORK-N-NR, RWORK, INFO ) CALL ZLASET('A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV) CALL ZLASET('A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV) - CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) - CALL ZUNMLQ('R','N',N,N,NR,U(NR+1,1),LDU,CWORK(N+1), + CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1), + $ LDV) + CALL ZUNMLQ('R','N',N,N,NR,U(NR+1,1),LDU, + $ CWORK(N+1), $ V, LDV, CWORK(N+NR+1),LCWORK-N-NR,IERR) CALL ZLAPMT( .FALSE., N, N, V, LDV, IWORK ) * .. assemble the left singular vector matrix U of dimensions * (M x NR) or (M x N) or (M x M). IF ( ( NR .LT. M ) .AND. .NOT.(WNTUF)) THEN - CALL ZLASET('A',M-NR,NR,CZERO,CZERO,U(NR+1,1),LDU) + CALL ZLASET('A',M-NR,NR,CZERO,CZERO,U(NR+1,1), + $ LDU) IF ( NR .LT. N1 ) THEN - CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1),LDU) + CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1), + $ LDU) CALL ZLASET( 'A',M-NR,N1-NR,CZERO,CONE, $ U(NR+1,NR+1), LDU ) END IF @@ -1368,7 +1404,8 @@ SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, * * .. if numerical rank deficiency is detected, the truncated * singular values are set to zero. - IF ( NR .LT. N ) CALL DLASET( 'G', N-NR,1, ZERO,ZERO, S(NR+1), N ) + IF ( NR .LT. N ) CALL DLASET( 'G', N-NR,1, ZERO,ZERO, S(NR+1), + $ N ) * .. undo scaling; this may cause overflow in the largest singular * values. IF ( ASCALED ) diff --git a/lapack-netlib/SRC/zgesvj.f b/lapack-netlib/SRC/zgesvj.f index 2be45d826e..a8b881e131 100644 --- a/lapack-netlib/SRC/zgesvj.f +++ b/lapack-netlib/SRC/zgesvj.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZGESVJ + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -101,7 +99,7 @@ *> \param[in] M *> \verbatim *> M is INTEGER -*> The number of rows of the input matrix A. 1/DLAMCH('E') > M >= 0. +*> The number of rows of the input matrix A. 1/DLAMCH('E') >= M >= 0. *> \endverbatim *> *> \param[in] N @@ -217,7 +215,7 @@ *> LWORK >= 1, if MIN(M,N) = 0, and LWORK >= M+N, otherwise. *> *> If on entry LWORK = -1, then a workspace query is assumed and -*> no computation is done; CWORK(1) is set to the minial (and optimal) +*> no computation is done; CWORK(1) is set to the minimal (and optimal) *> length of CWORK. *> \endverbatim *> @@ -258,7 +256,7 @@ *> LRWORK >= 1, if MIN(M,N) = 0, and LRWORK >= MAX(6,N), otherwise. *> *> If on entry LRWORK = -1, then a workspace query is assumed and -*> no computation is done; RWORK(1) is set to the minial (and optimal) +*> no computation is done; RWORK(1) is set to the minimal (and optimal) *> length of RWORK. *> \endverbatim *> @@ -414,7 +412,8 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, * from BLAS EXTERNAL ZCOPY, ZROT, ZDSCAL, ZSWAP, ZAXPY * from LAPACK - EXTERNAL DLASCL, ZLASCL, ZLASET, ZLASSQ, XERBLA + EXTERNAL DLASCL, ZLASCL, ZLASET, ZLASSQ, + $ XERBLA EXTERNAL ZGSVJ0, ZGSVJ1 * .. * .. Executable Statements .. @@ -440,9 +439,13 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, LQUERY = ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) IF( .NOT.( UPPER .OR. LOWER .OR. LSAME( JOBA, 'G' ) ) ) THEN INFO = -1 - ELSE IF( .NOT.( LSVEC .OR. UCTOL .OR. LSAME( JOBU, 'N' ) ) ) THEN + ELSE IF( .NOT.( LSVEC .OR. + $ UCTOL .OR. + $ LSAME( JOBU, 'N' ) ) ) THEN INFO = -2 - ELSE IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + ELSE IF( .NOT.( RSVEC .OR. + $ APPLV .OR. + $ LSAME( JOBV, 'N' ) ) ) THEN INFO = -3 ELSE IF( M.LT.0 ) THEN INFO = -4 @@ -455,7 +458,7 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, ELSE IF( ( RSVEC .AND. ( LDV.LT.N ) ) .OR. $ ( APPLV .AND. ( LDV.LT.MV ) ) ) THEN INFO = -11 - ELSE IF( UCTOL .AND. ( RWORK( 1 ).LE.ONE ) ) THEN + ELSE IF( UCTOL .AND. ( RWORK( 1 ).LT.ONE ) ) THEN INFO = -12 ELSE IF( LWORK.LT.LWMIN .AND. ( .NOT.LQUERY ) ) THEN INFO = -13 @@ -471,7 +474,7 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, RETURN ELSE IF( LQUERY ) THEN CWORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) RETURN END IF * @@ -785,7 +788,8 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, $ V( N4*q+1, N4+1 ), LDV, EPSLN, SFMIN, TOL, 1, $ CWORK( N+1 ), LWORK-N, IERR ) * - CALL ZGSVJ0( JOBV, M, N4, A, LDA, CWORK, SVA, MVL, V, LDV, + CALL ZGSVJ0( JOBV, M, N4, A, LDA, CWORK, SVA, MVL, V, + $ LDV, $ EPSLN, SFMIN, TOL, 1, CWORK( N+1 ), LWORK-N, $ IERR ) * @@ -797,16 +801,19 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, ELSE IF( UPPER ) THEN * * - CALL ZGSVJ0( JOBV, N4, N4, A, LDA, CWORK, SVA, MVL, V, LDV, + CALL ZGSVJ0( JOBV, N4, N4, A, LDA, CWORK, SVA, MVL, V, + $ LDV, $ EPSLN, SFMIN, TOL, 2, CWORK( N+1 ), LWORK-N, $ IERR ) * - CALL ZGSVJ0( JOBV, N2, N4, A( 1, N4+1 ), LDA, CWORK( N4+1 ), + CALL ZGSVJ0( JOBV, N2, N4, A( 1, N4+1 ), LDA, + $ CWORK( N4+1 ), $ SVA( N4+1 ), MVL, V( N4*q+1, N4+1 ), LDV, $ EPSLN, SFMIN, TOL, 1, CWORK( N+1 ), LWORK-N, $ IERR ) * - CALL ZGSVJ1( JOBV, N2, N2, N4, A, LDA, CWORK, SVA, MVL, V, + CALL ZGSVJ1( JOBV, N2, N2, N4, A, LDA, CWORK, SVA, MVL, + $ V, $ LDV, EPSLN, SFMIN, TOL, 1, CWORK( N+1 ), $ LWORK-N, IERR ) * @@ -960,7 +967,8 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, T = HALF / THETA CS = ONE - CALL ZROT( M, A(1,p), 1, A(1,q), 1, + CALL ZROT( M, A(1,p), 1, A(1,q), + $ 1, $ CS, CONJG(OMPQ)*T ) IF ( RSVEC ) THEN CALL ZROT( MVL, V(1,p), 1, @@ -989,7 +997,8 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, AAPP = AAPP*SQRT( MAX( ZERO, $ ONE-T*AQOAP*AAPQ1 ) ) * - CALL ZROT( M, A(1,p), 1, A(1,q), 1, + CALL ZROT( M, A(1,p), 1, A(1,q), + $ 1, $ CS, CONJG(OMPQ)*SN ) IF ( RSVEC ) THEN CALL ZROT( MVL, V(1,p), 1, @@ -1002,14 +1011,17 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, * .. have to use modified Gram-Schmidt like transformation CALL ZCOPY( M, A( 1, p ), 1, $ CWORK(N+1), 1 ) - CALL ZLASCL( 'G', 0, 0, AAPP, ONE, M, + CALL ZLASCL( 'G', 0, 0, AAPP, ONE, + $ M, $ 1, CWORK(N+1), LDA, $ IERR ) - CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, M, + CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, $ 1, A( 1, q ), LDA, IERR ) CALL ZAXPY( M, -AAPQ, CWORK(N+1), 1, $ A( 1, q ), 1 ) - CALL ZLASCL( 'G', 0, 0, ONE, AAQQ, M, + CALL ZLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, $ 1, A( 1, q ), LDA, IERR ) SVA( q ) = AAQQ*SQRT( MAX( ZERO, $ ONE-AAPQ1*AAPQ1 ) ) @@ -1024,7 +1036,8 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, $ THEN IF( ( AAQQ.LT.ROOTBIG ) .AND. $ ( AAQQ.GT.ROOTSFMIN ) ) THEN - SVA( q ) = DZNRM2( M, A( 1, q ), 1 ) + SVA( q ) = DZNRM2( M, A( 1, q ), + $ 1 ) ELSE T = ZERO AAQQ = ONE @@ -1177,7 +1190,8 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, IF( ABS( THETA ).GT.BIGTHETA ) THEN T = HALF / THETA CS = ONE - CALL ZROT( M, A(1,p), 1, A(1,q), 1, + CALL ZROT( M, A(1,p), 1, A(1,q), + $ 1, $ CS, CONJG(OMPQ)*T ) IF( RSVEC ) THEN CALL ZROT( MVL, V(1,p), 1, @@ -1204,7 +1218,8 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, AAPP = AAPP*SQRT( MAX( ZERO, $ ONE-T*AQOAP*AAPQ1 ) ) * - CALL ZROT( M, A(1,p), 1, A(1,q), 1, + CALL ZROT( M, A(1,p), 1, A(1,q), + $ 1, $ CS, CONJG(OMPQ)*SN ) IF( RSVEC ) THEN CALL ZROT( MVL, V(1,p), 1, @@ -1218,15 +1233,18 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, IF( AAPP.GT.AAQQ ) THEN CALL ZCOPY( M, A( 1, p ), 1, $ CWORK(N+1), 1 ) - CALL ZLASCL( 'G', 0, 0, AAPP, ONE, + CALL ZLASCL( 'G', 0, 0, AAPP, + $ ONE, $ M, 1, CWORK(N+1),LDA, $ IERR ) - CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, + CALL ZLASCL( 'G', 0, 0, AAQQ, + $ ONE, $ M, 1, A( 1, q ), LDA, $ IERR ) CALL ZAXPY( M, -AAPQ, CWORK(N+1), $ 1, A( 1, q ), 1 ) - CALL ZLASCL( 'G', 0, 0, ONE, AAQQ, + CALL ZLASCL( 'G', 0, 0, ONE, + $ AAQQ, $ M, 1, A( 1, q ), LDA, $ IERR ) SVA( q ) = AAQQ*SQRT( MAX( ZERO, @@ -1235,15 +1253,18 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, ELSE CALL ZCOPY( M, A( 1, q ), 1, $ CWORK(N+1), 1 ) - CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, + CALL ZLASCL( 'G', 0, 0, AAQQ, + $ ONE, $ M, 1, CWORK(N+1),LDA, $ IERR ) - CALL ZLASCL( 'G', 0, 0, AAPP, ONE, + CALL ZLASCL( 'G', 0, 0, AAPP, + $ ONE, $ M, 1, A( 1, p ), LDA, $ IERR ) CALL ZAXPY( M, -CONJG(AAPQ), $ CWORK(N+1), 1, A( 1, p ), 1 ) - CALL ZLASCL( 'G', 0, 0, ONE, AAPP, + CALL ZLASCL( 'G', 0, 0, ONE, + $ AAPP, $ M, 1, A( 1, p ), LDA, $ IERR ) SVA( p ) = AAPP*SQRT( MAX( ZERO, @@ -1259,7 +1280,8 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, $ THEN IF( ( AAQQ.LT.ROOTBIG ) .AND. $ ( AAQQ.GT.ROOTSFMIN ) ) THEN - SVA( q ) = DZNRM2( M, A( 1, q ), 1) + SVA( q ) = DZNRM2( M, A( 1, q ), + $ 1) ELSE T = ZERO AAQQ = ONE @@ -1401,7 +1423,8 @@ SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, IF( LSVEC .OR. UCTOL ) THEN DO 1998 p = 1, N4 * CALL ZDSCAL( M, ONE / SVA( p ), A( 1, p ), 1 ) - CALL ZLASCL( 'G',0,0, SVA(p), ONE, M, 1, A(1,p), M, IERR ) + CALL ZLASCL( 'G',0,0, SVA(p), ONE, M, 1, A(1,p), M, + $ IERR ) 1998 CONTINUE END IF * diff --git a/lapack-netlib/SRC/zgetc2.f b/lapack-netlib/SRC/zgetc2.f index eb97194f29..2d94e53121 100644 --- a/lapack-netlib/SRC/zgetc2.f +++ b/lapack-netlib/SRC/zgetc2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZGETC2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -98,7 +96,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16GEauxiliary +*> \ingroup getc2 * *> \par Contributors: * ================== @@ -108,6 +106,7 @@ * * ===================================================================== SUBROUTINE ZGETC2( N, A, LDA, IPIV, JPIV, INFO ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -132,7 +131,7 @@ SUBROUTINE ZGETC2( N, A, LDA, IPIV, JPIV, INFO ) DOUBLE PRECISION BIGNUM, EPS, SMIN, SMLNUM, XMAX * .. * .. External Subroutines .. - EXTERNAL ZGERU, ZSWAP, DLABAD + EXTERNAL ZGERU, ZSWAP * .. * .. External Functions .. DOUBLE PRECISION DLAMCH @@ -155,7 +154,6 @@ SUBROUTINE ZGETC2( N, A, LDA, IPIV, JPIV, INFO ) EPS = DLAMCH( 'P' ) SMLNUM = DLAMCH( 'S' ) / EPS BIGNUM = ONE / SMLNUM - CALL DLABAD( SMLNUM, BIGNUM ) * * Handle the case N=1 by itself * @@ -177,8 +175,8 @@ SUBROUTINE ZGETC2( N, A, LDA, IPIV, JPIV, INFO ) * Find max element in matrix A * XMAX = ZERO - DO 20 IP = I, N - DO 10 JP = I, N + DO 20 JP = I, N + DO 10 IP = I, N IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN XMAX = ABS( A( IP, JP ) ) IPV = IP diff --git a/lapack-netlib/SRC/zhbevd.f b/lapack-netlib/SRC/zhbevd.f index be9f015560..94747ac3ad 100644 --- a/lapack-netlib/SRC/zhbevd.f +++ b/lapack-netlib/SRC/zhbevd.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZHBEVD + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -201,11 +199,13 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHEReigen +*> \ingroup hbevd * * ===================================================================== - SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK driver routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -243,7 +243,8 @@ SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, EXTERNAL LSAME, DLAMCH, ZLANHB * .. * .. External Subroutines .. - EXTERNAL DSCAL, DSTERF, XERBLA, ZGEMM, ZHBTRD, ZLACPY, + EXTERNAL DSCAL, DSTERF, XERBLA, ZGEMM, ZHBTRD, + $ ZLACPY, $ ZLASCL, ZSTEDC * .. * .. Intrinsic Functions .. @@ -289,7 +290,7 @@ SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, * IF( INFO.EQ.0 ) THEN WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -342,9 +343,11 @@ SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, END IF IF( ISCALE.EQ.1 ) THEN IF( LOWER ) THEN - CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, + $ INFO ) ELSE - CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, + $ INFO ) END IF END IF * @@ -363,7 +366,8 @@ SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, IF( .NOT.WANTZ ) THEN CALL DSTERF( N, W, RWORK( INDE ), INFO ) ELSE - CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, + $ WORK( INDWK2 ), $ LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK, $ INFO ) CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, @@ -383,7 +387,7 @@ SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, END IF * WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN RETURN * diff --git a/lapack-netlib/SRC/zhbevd_2stage.f b/lapack-netlib/SRC/zhbevd_2stage.f index e32c7125ca..eda9cf8725 100644 --- a/lapack-netlib/SRC/zhbevd_2stage.f +++ b/lapack-netlib/SRC/zhbevd_2stage.f @@ -7,7 +7,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZHBEVD_2STAGE + dependencies *> *> [TGZ] @@ -15,13 +14,12 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== * * SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, -* WORK, LWORK, RWORK, LRWORK, IWORK, +* WORK, LWORK, RWORK, LRWORK, IWORK, * LIWORK, INFO ) * * IMPLICIT NONE @@ -136,7 +134,7 @@ *> \verbatim *> LWORK is INTEGER *> The length of the array WORK. LWORK >= 1, when N <= 1; -*> otherwise +*> otherwise *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, dimension) where *> dimension = (2KD+1)*N + KD*NTHREADS @@ -213,7 +211,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHEReigen +*> \ingroup hbevd_2stage * *> \par Further Details: * ===================== @@ -231,7 +229,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -239,17 +237,18 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * * ===================================================================== - SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, - $ WORK, LWORK, RWORK, LRWORK, IWORK, + SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, + $ LDZ, + $ WORK, LWORK, RWORK, LRWORK, IWORK, $ LIWORK, INFO ) * IMPLICIT NONE @@ -292,7 +291,8 @@ SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV2STAGE * .. * .. External Subroutines .. - EXTERNAL DSCAL, DSTERF, XERBLA, ZGEMM, ZLACPY, + EXTERNAL DSCAL, DSTERF, XERBLA, ZGEMM, + $ ZLACPY, $ ZLASCL, ZSTEDC, ZHETRD_HB2ST * .. * .. Intrinsic Functions .. @@ -312,9 +312,12 @@ SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, LRWMIN = 1 LIWMIN = 1 ELSE - IB = ILAENV2STAGE( 2, 'ZHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV2STAGE( 3, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV2STAGE( 4, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_HB2ST', JOBZ, N, KD, -1, + $ -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, + $ -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, + $ -1 ) IF( WANTZ ) THEN LWMIN = 2*N**2 LRWMIN = 1 + 5*N + 2*N**2 @@ -341,7 +344,7 @@ SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, * IF( INFO.EQ.0 ) THEN WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -394,9 +397,11 @@ SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, END IF IF( ISCALE.EQ.1 ) THEN IF( LOWER ) THEN - CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, + $ INFO ) ELSE - CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, + $ INFO ) END IF END IF * @@ -412,7 +417,7 @@ SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, LLWK2 = LWORK - INDWK2 + 1 * CALL ZHETRD_HB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, - $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, + $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, $ WORK( INDWK ), LLWORK, IINFO ) * * For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEDC. @@ -420,7 +425,8 @@ SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, IF( .NOT.WANTZ ) THEN CALL DSTERF( N, W, RWORK( INDE ), INFO ) ELSE - CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, + $ WORK( INDWK2 ), $ LLWK2, RWORK( INDRWK ), LLRWK, IWORK, LIWORK, $ INFO ) CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, @@ -440,7 +446,7 @@ SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, END IF * WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN RETURN * diff --git a/lapack-netlib/SRC/zhbgvd.f b/lapack-netlib/SRC/zhbgvd.f index 4bd02168d4..82d62622cc 100644 --- a/lapack-netlib/SRC/zhbgvd.f +++ b/lapack-netlib/SRC/zhbgvd.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZHBGVD + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -232,7 +230,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHEReigen +*> \ingroup hbgvd * *> \par Contributors: * ================== @@ -240,9 +238,11 @@ *> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA * * ===================================================================== - SUBROUTINE ZHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, + SUBROUTINE ZHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, + $ W, $ Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, $ LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK driver routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -278,7 +278,8 @@ SUBROUTINE ZHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL DSTERF, XERBLA, ZGEMM, ZHBGST, ZHBTRD, ZLACPY, + EXTERNAL DSTERF, XERBLA, ZGEMM, ZHBGST, ZHBTRD, + $ ZLACPY, $ ZPBSTF, ZSTEDC * .. * .. Executable Statements .. @@ -323,7 +324,7 @@ SUBROUTINE ZHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, * IF( INFO.EQ.0 ) THEN WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -380,7 +381,8 @@ SUBROUTINE ZHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, IF( .NOT.WANTZ ) THEN CALL DSTERF( N, W, RWORK( INDE ), INFO ) ELSE - CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, + $ WORK( INDWK2 ), $ LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK, $ INFO ) CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, @@ -389,7 +391,7 @@ SUBROUTINE ZHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, END IF * WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN RETURN * diff --git a/lapack-netlib/SRC/zheevd.f b/lapack-netlib/SRC/zheevd.f index 8e86b9e88a..01ad3b25c4 100644 --- a/lapack-netlib/SRC/zheevd.f +++ b/lapack-netlib/SRC/zheevd.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZHEEVD + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -193,8 +191,10 @@ *> at Berkeley, USA *> * ===================================================================== - SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ RWORK, $ LRWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK driver routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -233,7 +233,8 @@ SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE * .. * .. External Subroutines .. - EXTERNAL DSCAL, DSTERF, XERBLA, ZHETRD, ZLACPY, ZLASCL, + EXTERNAL DSCAL, DSTERF, XERBLA, ZHETRD, ZLACPY, + $ ZLASCL, $ ZSTEDC, ZUNMTR * .. * .. Intrinsic Functions .. @@ -277,12 +278,13 @@ SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, LIWMIN = 1 END IF LOPT = MAX( LWMIN, N + - $ N*ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) ) + $ N*ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, + $ -1 ) ) LROPT = LRWMIN LIOPT = LIWMIN END IF WORK( 1 ) = LOPT - RWORK( 1 ) = LROPT + RWORK( 1 ) = DBLE( LROPT ) IWORK( 1 ) = LIOPT * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -378,7 +380,7 @@ SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, END IF * WORK( 1 ) = LOPT - RWORK( 1 ) = LROPT + RWORK( 1 ) = DBLE( LROPT ) IWORK( 1 ) = LIOPT * RETURN diff --git a/lapack-netlib/SRC/zheevd_2stage.f b/lapack-netlib/SRC/zheevd_2stage.f index e697a98237..b73d7155e1 100644 --- a/lapack-netlib/SRC/zheevd_2stage.f +++ b/lapack-netlib/SRC/zheevd_2stage.f @@ -7,7 +7,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZHEEVD_2STAGE + dependencies *> *> [TGZ] @@ -15,7 +14,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -113,8 +111,8 @@ *> If JOBZ = 'N' and N > 1, LWORK must be queried. *> LWORK = MAX(1, dimension) where *> dimension = max(stage1,stage2) + (KD+1)*N + N+1 -*> = N*KD + N*max(KD+1,FACTOPTNB) -*> + max(2*KD*KD, KD*NTHREADS) +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) *> + (KD+1)*N + N+1 *> where KD is the blocking size of the reduction, *> FACTOPTNB is the blocking used by the QR or LQ @@ -196,7 +194,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16HEeigen +*> \ingroup heevd_2stage * *> \par Further Details: * ===================== @@ -225,7 +223,7 @@ *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. -*> An improved parallel singular value algorithm and its implementation +*> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. @@ -233,16 +231,17 @@ *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. -*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. -*> http://hpc.sagepub.com/content/28/2/196 +*> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim * * ===================================================================== - SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, + $ LWORK, $ RWORK, LRWORK, IWORK, LIWORK, INFO ) * IMPLICIT NONE @@ -287,11 +286,12 @@ SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, EXTERNAL LSAME, DLAMCH, ZLANHE, ILAENV2STAGE * .. * .. External Subroutines .. - EXTERNAL DSCAL, DSTERF, XERBLA, ZLACPY, ZLASCL, + EXTERNAL DSCAL, DSTERF, XERBLA, ZLACPY, + $ ZLASCL, $ ZSTEDC, ZUNMTR, ZHETRD_2STAGE * .. * .. Intrinsic Functions .. - INTRINSIC DBLE, MAX, SQRT + INTRINSIC DBLE, MAX, SQRT * .. * .. Executable Statements .. * @@ -337,7 +337,7 @@ SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, END IF END IF WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -404,7 +404,7 @@ SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, LLWRK2 = LWORK - INDWK2 + 1 * CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ), - $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, $ WORK( INDWRK ), LLWORK, IINFO ) * * For eigenvalues only, call DSTERF. For eigenvectors, first call @@ -436,7 +436,7 @@ SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, END IF * WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/zheevr.f b/lapack-netlib/SRC/zheevr.f index fe6e1a85f7..038738ec8b 100644 --- a/lapack-netlib/SRC/zheevr.f +++ b/lapack-netlib/SRC/zheevr.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZHEEVR + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -41,9 +39,16 @@ *> \verbatim *> *> ZHEEVR computes selected eigenvalues and, optionally, eigenvectors -*> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can -*> be selected by specifying either a range of values or a range of -*> indices for the desired eigenvalues. +*> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of indices +*> for the desired eigenvalues. Invocations with different choices for +*> these parameters may result in the computation of slightly different +*> eigenvalues and/or eigenvectors for the same matrix. The reason for +*> this behavior is that there exists a variety of algorithms (each +*> performing best for a particular set of options) with ZHEEVR +*> attempting to select the best based on the various parameters. In all +*> cases, the computed values are accurate within the limits of finite +*> precision arithmetic. *> *> ZHEEVR first reduces the matrix A to tridiagonal form T with a call *> to ZHETRD. Then, whenever possible, ZHEEVR calls ZSTEMR to compute @@ -107,6 +112,9 @@ *> JOBZ is CHARACTER*1 *> = 'N': Compute eigenvalues only; *> = 'V': Compute eigenvalues and eigenvectors. +*> +*> This parameter influences the choice of the algorithm and +*> may alter the computed values. *> \endverbatim *> *> \param[in] RANGE @@ -118,6 +126,9 @@ *> = 'I': the IL-th through IU-th eigenvalues will be found. *> For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and *> ZSTEIN are called +*> +*> This parameter influences the choice of the algorithm and +*> may alter the computed values. *> \endverbatim *> *> \param[in] UPLO @@ -242,6 +253,7 @@ *> Note: the user must ensure that at least max(1,M) columns are *> supplied in the array Z; if RANGE = 'V', the exact value of M *> is not known in advance and an upper bound must be used. +*> Supplying N columns is always safe. *> \endverbatim *> *> \param[in] LDZ @@ -354,9 +366,11 @@ *> California at Berkeley, USA \n *> * ===================================================================== - SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, + $ IU, $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, $ RWORK, LRWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK driver routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -399,7 +413,8 @@ SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, EXTERNAL LSAME, ILAENV, DLAMCH, ZLANSY * .. * .. External Subroutines .. - EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, + $ ZDSCAL, $ ZHETRD, ZSTEMR, ZSTEIN, ZSWAP, ZUNMTR * .. * .. Intrinsic Functions .. @@ -464,7 +479,7 @@ SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, NB = MAX( NB, ILAENV( 1, 'ZUNMTR', UPLO, N, -1, -1, -1 ) ) LWKOPT = MAX( ( NB+1 )*N, LWMIN ) WORK( 1 ) = LWKOPT - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -676,7 +691,8 @@ SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, * INDWKN = INDWK LLWRKN = LWORK - INDWKN + 1 - CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), + $ Z, $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) END IF * @@ -720,7 +736,7 @@ SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, * Set WORK(1) to optimal workspace size. * WORK( 1 ) = LWKOPT - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/zheevr_2stage.f b/lapack-netlib/SRC/zheevr_2stage.f index b1cc7175fa..0ba5a29533 100644 --- a/lapack-netlib/SRC/zheevr_2stage.f +++ b/lapack-netlib/SRC/zheevr_2stage.f @@ -7,7 +7,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZHEEVR_2STAGE + dependencies *> *> [TGZ] @@ -15,7 +14,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -447,10 +445,12 @@ SUBROUTINE ZHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, LOGICAL LSAME INTEGER ILAENV, ILAENV2STAGE DOUBLE PRECISION DLAMCH, ZLANSY - EXTERNAL LSAME, DLAMCH, ZLANSY, ILAENV, ILAENV2STAGE + EXTERNAL LSAME, DLAMCH, ZLANSY, ILAENV, + $ ILAENV2STAGE * .. * .. External Subroutines .. - EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, + $ ZDSCAL, $ ZHETRD_2STAGE, ZSTEMR, ZSTEIN, ZSWAP, ZUNMTR * .. * .. Intrinsic Functions .. @@ -471,10 +471,14 @@ SUBROUTINE ZHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR. $ ( LIWORK.EQ.-1 ) ) * - KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) - IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) - LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) - LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, + $ -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, + $ -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, + $ -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, + $ -1 ) * IF( N.LE.1 ) THEN LWMIN = 1 @@ -517,7 +521,7 @@ SUBROUTINE ZHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, * IF( INFO.EQ.0 ) THEN WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -732,7 +736,8 @@ SUBROUTINE ZHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, * INDWKN = INDWK LLWRKN = LWORK - INDWKN + 1 - CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), + $ Z, $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) END IF * @@ -776,7 +781,7 @@ SUBROUTINE ZHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, * Set WORK(1) to optimal workspace size. * WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/zhegvd.f b/lapack-netlib/SRC/zhegvd.f index a1c569fe3c..bdcd681a93 100644 --- a/lapack-netlib/SRC/zhegvd.f +++ b/lapack-netlib/SRC/zhegvd.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZHEGVD + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -219,7 +217,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16HEeigen +*> \ingroup hegvd * *> \par Further Details: * ===================== @@ -238,8 +236,10 @@ *> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA *> * ===================================================================== - SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, + $ WORK, $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK driver routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -272,7 +272,8 @@ SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, EXTERNAL LSAME, DROUNDUP_LWORK * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZHEEVD, ZHEGST, ZPOTRF, ZTRMM, ZTRSM + EXTERNAL XERBLA, ZHEEVD, ZHEGST, ZPOTRF, ZTRMM, + $ ZTRSM * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX @@ -317,8 +318,8 @@ SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, END IF * IF( INFO.EQ.0 ) THEN - WORK( 1 ) = LOPT - RWORK( 1 ) = LROPT + WORK( 1 ) = DROUNDUP_LWORK(LOPT) + RWORK( 1 ) = DROUNDUP_LWORK(LROPT) IWORK( 1 ) = LIOPT * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -353,7 +354,8 @@ SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, * Transform problem to standard eigenvalue problem and solve. * CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) - CALL ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, LRWORK, + CALL ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + $ LRWORK, $ IWORK, LIWORK, INFO ) LOPT = MAX( LOPT, INT( DBLE( WORK( 1 ) ) ) ) LROPT = MAX( LROPT, INT( RWORK( 1 ) ) ) @@ -393,8 +395,8 @@ SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, END IF END IF * - WORK( 1 ) = LOPT - RWORK( 1 ) = LROPT + WORK( 1 ) = DROUNDUP_LWORK(LOPT) + RWORK( 1 ) = DROUNDUP_LWORK(LROPT) IWORK( 1 ) = LIOPT * RETURN diff --git a/lapack-netlib/SRC/zhpevd.f b/lapack-netlib/SRC/zhpevd.f index 5260aaf14a..1a033de79e 100644 --- a/lapack-netlib/SRC/zhpevd.f +++ b/lapack-netlib/SRC/zhpevd.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZHPEVD + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -186,11 +184,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHEReigen +*> \ingroup hpevd * * ===================================================================== SUBROUTINE ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, $ RWORK, LRWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK driver routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -227,7 +226,8 @@ SUBROUTINE ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, EXTERNAL LSAME, DLAMCH, ZLANHP * .. * .. External Subroutines .. - EXTERNAL DSCAL, DSTERF, XERBLA, ZDSCAL, ZHPTRD, ZSTEDC, + EXTERNAL DSCAL, DSTERF, XERBLA, ZDSCAL, ZHPTRD, + $ ZSTEDC, $ ZUPMTR * .. * .. Intrinsic Functions .. @@ -243,7 +243,8 @@ SUBROUTINE ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, INFO = 0 IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN INFO = -1 - ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. + $ LSAME( UPLO, 'U' ) ) ) $ THEN INFO = -2 ELSE IF( N.LT.0 ) THEN @@ -269,7 +270,7 @@ SUBROUTINE ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, END IF END IF WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -341,10 +342,12 @@ SUBROUTINE ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, IF( .NOT.WANTZ ) THEN CALL DSTERF( N, W, RWORK( INDE ), INFO ) ELSE - CALL ZSTEDC( 'I', N, W, RWORK( INDE ), Z, LDZ, WORK( INDWRK ), + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), $ LLWRK, RWORK( INDRWK ), LLRWK, IWORK, LIWORK, $ INFO ) - CALL ZUPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ, + CALL ZUPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, + $ LDZ, $ WORK( INDWRK ), IINFO ) END IF * @@ -360,7 +363,7 @@ SUBROUTINE ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, END IF * WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN RETURN * diff --git a/lapack-netlib/SRC/zhpgvd.f b/lapack-netlib/SRC/zhpgvd.f index e0d35b4d49..78414fa5fb 100644 --- a/lapack-netlib/SRC/zhpgvd.f +++ b/lapack-netlib/SRC/zhpgvd.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZHPGVD + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -212,7 +210,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHEReigen +*> \ingroup hpgvd * *> \par Contributors: * ================== @@ -220,8 +218,10 @@ *> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA * * ===================================================================== - SUBROUTINE ZHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + SUBROUTINE ZHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, + $ WORK, $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK driver routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -250,7 +250,8 @@ SUBROUTINE ZHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, EXTERNAL LSAME, DROUNDUP_LWORK * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZHPEVD, ZHPGST, ZPPTRF, ZTPMV, ZTPSV + EXTERNAL XERBLA, ZHPEVD, ZHPGST, ZPPTRF, ZTPMV, + $ ZTPSV * .. * .. Intrinsic Functions .. INTRINSIC DBLE, MAX @@ -293,8 +294,8 @@ SUBROUTINE ZHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, END IF END IF * - WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + WORK( 1 ) = DROUNDUP_LWORK(LWMIN) + RWORK( 1 ) = DROUNDUP_LWORK(LRWMIN) IWORK( 1 ) = LIWMIN IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN INFO = -11 @@ -375,8 +376,8 @@ SUBROUTINE ZHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, END IF END IF * - WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + WORK( 1 ) = DROUNDUP_LWORK(LWMIN) + RWORK( 1 ) = DROUNDUP_LWORK(LRWMIN) IWORK( 1 ) = LIWMIN RETURN * diff --git a/lapack-netlib/SRC/zlacpy.f b/lapack-netlib/SRC/zlacpy.f index 06017509e0..3345530464 100644 --- a/lapack-netlib/SRC/zlacpy.f +++ b/lapack-netlib/SRC/zlacpy.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZLACPY + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -96,10 +94,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERauxiliary +*> \ingroup lacpy * * ===================================================================== SUBROUTINE ZLACPY( UPLO, M, N, A, LDA, B, LDB ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -135,7 +134,7 @@ SUBROUTINE ZLACPY( UPLO, M, N, A, LDA, B, LDB ) 20 CONTINUE * ELSE IF( LSAME( UPLO, 'L' ) ) THEN - DO 40 J = 1, N + DO 40 J = 1, MIN( M, N ) DO 30 I = J, M B( I, J ) = A( I, J ) 30 CONTINUE diff --git a/lapack-netlib/SRC/zlantr.f b/lapack-netlib/SRC/zlantr.f index bd4b5da7c2..73a7bc621e 100644 --- a/lapack-netlib/SRC/zlantr.f +++ b/lapack-netlib/SRC/zlantr.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZLANTR + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -134,11 +132,13 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERauxiliary +*> \ingroup lantr * * ===================================================================== - DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, + $ LDA, $ WORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -188,14 +188,16 @@ DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 20 J = 1, N DO 10 I = 1, MIN( M, J-1 ) SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ DISNAN( SUM ) ) VALUE = SUM 10 CONTINUE 20 CONTINUE ELSE - DO 40 J = 1, N + DO 40 J = 1, MIN( M, N ) DO 30 I = J + 1, M SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ DISNAN( SUM ) ) VALUE = SUM 30 CONTINUE 40 CONTINUE END IF @@ -205,14 +207,16 @@ DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 60 J = 1, N DO 50 I = 1, MIN( M, J ) SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ DISNAN( SUM ) ) VALUE = SUM 50 CONTINUE 60 CONTINUE ELSE - DO 80 J = 1, N + DO 80 J = 1, MIN( M, N ) DO 70 I = J, M SUM = ABS( A( I, J ) ) - IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + IF( VALUE .LT. SUM .OR. + $ DISNAN( SUM ) ) VALUE = SUM 70 CONTINUE 80 CONTINUE END IF @@ -239,7 +243,7 @@ DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, LDA, IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM 110 CONTINUE ELSE - DO 140 J = 1, N + DO 140 J = 1, MIN( M, N ) IF( UDIAG ) THEN SUM = ONE DO 120 I = J + 1, M @@ -286,7 +290,7 @@ DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 220 I = N + 1, M WORK( I ) = ZERO 220 CONTINUE - DO 240 J = 1, N + DO 240 J = 1, MIN( M, N ) DO 230 I = J + 1, M WORK( I ) = WORK( I ) + ABS( A( I, J ) ) 230 CONTINUE @@ -295,7 +299,7 @@ DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, LDA, DO 250 I = 1, M WORK( I ) = ZERO 250 CONTINUE - DO 270 J = 1, N + DO 270 J = 1, MIN( M, N ) DO 260 I = J, M WORK( I ) = WORK( I ) + ABS( A( I, J ) ) 260 CONTINUE @@ -307,7 +311,8 @@ DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, LDA, SUM = WORK( I ) IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM 280 CONTINUE - ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. + $ ( LSAME( NORM, 'E' ) ) ) THEN * * Find normF(A). * @@ -316,27 +321,29 @@ DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, LDA, SCALE = ONE SUM = MIN( M, N ) DO 290 J = 2, N - CALL ZLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + CALL ZLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, + $ SUM ) 290 CONTINUE ELSE SCALE = ZERO SUM = ONE DO 300 J = 1, N - CALL ZLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + CALL ZLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, + $ SUM ) 300 CONTINUE END IF ELSE IF( LSAME( DIAG, 'U' ) ) THEN SCALE = ONE SUM = MIN( M, N ) - DO 310 J = 1, N + DO 310 J = 1, MIN( M, N ) CALL ZLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, $ SUM ) 310 CONTINUE ELSE SCALE = ZERO SUM = ONE - DO 320 J = 1, N + DO 320 J = 1, MIN( M, N ) CALL ZLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) 320 CONTINUE END IF diff --git a/lapack-netlib/SRC/zlaqp2.f b/lapack-netlib/SRC/zlaqp2.f index 2354b67887..bc81b28c26 100644 --- a/lapack-netlib/SRC/zlaqp2.f +++ b/lapack-netlib/SRC/zlaqp2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZLAQP2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -122,7 +120,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERauxiliary +*> \ingroup laqp2 * *> \par Contributors: * ================== @@ -139,13 +137,12 @@ *> *> LAPACK Working Note 176 * -*> \htmlonly *> [PDF] -*> \endhtmlonly * * ===================================================================== SUBROUTINE ZLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, $ WORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -171,10 +168,9 @@ SUBROUTINE ZLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, * .. Local Scalars .. INTEGER I, ITEMP, J, MN, OFFPI, PVT DOUBLE PRECISION TEMP, TEMP2, TOL3Z - COMPLEX*16 AII * .. * .. External Subroutines .. - EXTERNAL ZLARF, ZLARFG, ZSWAP + EXTERNAL ZLARF1F, ZLARFG, ZSWAP * .. * .. Intrinsic Functions .. INTRINSIC ABS, DCONJG, MAX, MIN, SQRT @@ -211,7 +207,8 @@ SUBROUTINE ZLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, * Generate elementary reflector H(i). * IF( OFFPI.LT.M ) THEN - CALL ZLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + CALL ZLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), + $ 1, $ TAU( I ) ) ELSE CALL ZLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) @@ -221,12 +218,9 @@ SUBROUTINE ZLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, * * Apply H(i)**H to A(offset+i:m,i+1:n) from the left. * - AII = A( OFFPI, I ) - A( OFFPI, I ) = CONE - CALL ZLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, - $ DCONJG( TAU( I ) ), A( OFFPI, I+1 ), LDA, - $ WORK( 1 ) ) - A( OFFPI, I ) = AII + CALL ZLARF1F( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ CONJG( TAU( I ) ), A( OFFPI, I+1 ), LDA, + $ WORK( 1 ) ) END IF * * Update partial column norms. diff --git a/lapack-netlib/SRC/zlaqp2rk.f b/lapack-netlib/SRC/zlaqp2rk.f index f6bf555c26..0e0133ecfc 100644 --- a/lapack-netlib/SRC/zlaqp2rk.f +++ b/lapack-netlib/SRC/zlaqp2rk.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZLAQP2RK + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -254,7 +252,7 @@ *> \param[out] WORK *> \verbatim *> WORK is COMPLEX*16 array, dimension (N-1) -*> Used in ZLARF subroutine to apply an elementary +*> Used in ZLARF1F subroutine to apply an elementary *> reflector from the left. *> \endverbatim *> @@ -304,27 +302,19 @@ *> C. H. Bischof, Math. and Comp. Sci. Div., Argonne National Lab, USA. *> A BLAS-3 version of the QR factorization with column pivoting. *> LAPACK Working Note 114 -*> \htmlonly *> https://www.netlib.org/lapack/lawnspdf/lawn114.pdf -*> \endhtmlonly *> and in *> SIAM J. Sci. Comput., 19(5):1486-1494, Sept. 1998. -*> \htmlonly *> https://doi.org/10.1137/S1064827595296732 -*> \endhtmlonly *> *> [2] A partial column norm updating strategy developed in 2006. *> Z. Drmac and Z. Bujanovic, Dept. of Math., University of Zagreb, Croatia. *> On the failure of rank revealing QR factorization software – a case study. *> LAPACK Working Note 176. -*> \htmlonly *> http://www.netlib.org/lapack/lawnspdf/lawn176.pdf -*> \endhtmlonly *> and in *> ACM Trans. Math. Softw. 35, 2, Article 12 (July 2008), 28 pages. -*> \htmlonly *> https://doi.org/10.1145/1377612.1377616 -*> \endhtmlonly * *> \par Contributors: * ================== @@ -369,13 +359,12 @@ SUBROUTINE ZLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, $ CONE = ( 1.0D+0, 0.0D+0 ) ) * .. * .. Local Scalars .. - INTEGER I, ITEMP, J, JMAXC2NRM, KK, KP, MINMNFACT, - $ MINMNUPDT + INTEGER I, ITEMP, J, JMAXC2NRM, KK, KP, + $ KBOUND, MINMNFACT, MINMNUPDT DOUBLE PRECISION HUGEVAL, TAUNAN, TEMP, TEMP2, TOL3Z - COMPLEX*16 AIKK * .. * .. External Subroutines .. - EXTERNAL ZLARF, ZLARFG, ZSWAP + EXTERNAL ZLARF1F, ZLARFG, ZSWAP * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, SQRT @@ -402,13 +391,13 @@ SUBROUTINE ZLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * MINMNFACT = MIN( M-IOFFSET, N ) MINMNUPDT = MIN( M-IOFFSET, N+NRHS ) - KMAX = MIN( KMAX, MINMNFACT ) + KBOUND = MIN( KMAX, MINMNFACT ) TOL3Z = SQRT( DLAMCH( 'Epsilon' ) ) HUGEVAL = DLAMCH( 'Overflow' ) * * Compute the factorization, KK is the lomn loop index. * - DO KK = 1, KMAX + DO KK = 1, KBOUND * I = IOFFSET + KK * @@ -633,12 +622,9 @@ SUBROUTINE ZLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * condition is satisfied, not only KK < N+NRHS ) * IF( KK.LT.MINMNUPDT ) THEN - AIKK = A( I, KK ) - A( I, KK ) = CONE - CALL ZLARF( 'Left', M-I+1, N+NRHS-KK, A( I, KK ), 1, - $ DCONJG( TAU( KK ) ), A( I, KK+1 ), LDA, - $ WORK( 1 ) ) - A( I, KK ) = AIKK + CALL ZLARF1F( 'Left', M-I+1, N+NRHS-KK, A( I, KK ), 1, + $ CONJG( TAU( KK ) ), A( I, KK+1 ), LDA, + $ WORK( 1 ) ) END IF * IF( KK.LT.MINMNFACT ) THEN @@ -689,7 +675,7 @@ SUBROUTINE ZLAQP2RK( M, N, NRHS, IOFFSET, KMAX, ABSTOL, RELTOL, * i.e. no condition was triggered to exit the routine. * Set the number of factorized columns. * - K = KMAX + K = KBOUND * * We reached the end of the loop, i.e. all KMAX columns were * factorized, we need to set MAXC2NRMK and RELMAXC2NRMK before diff --git a/lapack-netlib/SRC/zlaqr2.f b/lapack-netlib/SRC/zlaqr2.f index f78ea206dd..5cab69b91c 100644 --- a/lapack-netlib/SRC/zlaqr2.f +++ b/lapack-netlib/SRC/zlaqr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZLAQR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -255,7 +253,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERauxiliary +*> \ingroup laqr2 * *> \par Contributors: * ================== @@ -264,9 +262,11 @@ *> University of Kansas, USA *> * ===================================================================== - SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, + $ ILOZ, $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, $ NV, WV, LDWV, WORK, LWORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -292,7 +292,7 @@ SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 ) * .. * .. Local Scalars .. - COMPLEX*16 BETA, CDUM, S, TAU + COMPLEX*16 CDUM, S, TAU DOUBLE PRECISION FOO, SAFMAX, SAFMIN, SMLNUM, ULP INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN, $ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWKOPT @@ -302,8 +302,9 @@ SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, EXTERNAL DLAMCH * .. * .. External Subroutines .. - EXTERNAL DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR, - $ ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR + EXTERNAL ZCOPY, ZGEHRD, ZGEMM, ZLACPY, + $ ZLAHQR, + $ ZLARF1F, ZLARFG, ZLASET, ZTREXC, ZUNMHR * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN @@ -330,7 +331,8 @@ SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * * ==== Workspace query call to ZUNMHR ==== * - CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, + $ LDV, $ WORK, -1, INFO ) LWK2 = INT( WORK( 1 ) ) * @@ -361,7 +363,6 @@ SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * SAFMIN = DLAMCH( 'SAFE MINIMUM' ) SAFMAX = RONE / SAFMIN - CALL DLABAD( SAFMIN, SAFMAX ) ULP = DLAMCH( 'PRECISION' ) SMLNUM = SAFMIN*( DBLE( N ) / ULP ) * @@ -400,7 +401,8 @@ SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . here and there to keep track.) ==== * CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) - CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) + CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), + $ LDT+1 ) * CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, @@ -452,7 +454,8 @@ SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, 20 CONTINUE ILST = I IF( IFST.NE.ILST ) - $ CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + $ CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ INFO ) 30 CONTINUE END IF * @@ -472,18 +475,17 @@ SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, DO 50 I = 1, NS WORK( I ) = DCONJG( WORK( I ) ) 50 CONTINUE - BETA = WORK( 1 ) - CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU ) - WORK( 1 ) = ONE + CALL ZLARFG( NS, WORK( 1 ), WORK( 2 ), 1, TAU ) * - CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) + CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), + $ LDT ) * - CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT, - $ WORK( JW+1 ) ) - CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, - $ WORK( JW+1 ) ) - CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, - $ WORK( JW+1 ) ) + CALL ZLARF1F( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT, + $ WORK( JW+1 ) ) + CALL ZLARF1F( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL ZLARF1F( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) * CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), $ LWORK-JW, INFO ) @@ -501,7 +503,8 @@ SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . H and Z, if requested. ==== * IF( NS.GT.1 .AND. S.NE.ZERO ) - $ CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, + $ LDV, $ WORK( JW+1 ), LWORK-JW, INFO ) * * ==== Update vertical slab in H ==== @@ -515,7 +518,8 @@ SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, KLN = MIN( NV, KWTOP-KROW ) CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), $ LDH, V, LDV, ZERO, WV, LDWV ) - CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), + $ LDH ) 60 CONTINUE * * ==== Update horizontal slab in H ==== @@ -535,7 +539,8 @@ SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, IF( WANTZ ) THEN DO 80 KROW = ILOZ, IHIZ, NV KLN = MIN( NV, IHIZ-KROW+1 ) - CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, + $ KWTOP ), $ LDZ, V, LDV, ZERO, WV, LDWV ) CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), $ LDZ ) diff --git a/lapack-netlib/SRC/zlaqr3.f b/lapack-netlib/SRC/zlaqr3.f index c8e5fe9996..9a696fe115 100644 --- a/lapack-netlib/SRC/zlaqr3.f +++ b/lapack-netlib/SRC/zlaqr3.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZLAQR3 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -252,7 +250,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERauxiliary +*> \ingroup laqr3 * *> \par Contributors: * ================== @@ -261,9 +259,11 @@ *> University of Kansas, USA *> * ===================================================================== - SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, + $ ILOZ, $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, $ NV, WV, LDWV, WORK, LWORK ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -289,7 +289,7 @@ SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 ) * .. * .. Local Scalars .. - COMPLEX*16 BETA, CDUM, S, TAU + COMPLEX*16 CDUM, S, TAU DOUBLE PRECISION FOO, SAFMAX, SAFMIN, SMLNUM, ULP INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN, $ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3, @@ -301,8 +301,9 @@ SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, EXTERNAL DLAMCH, ILAENV * .. * .. External Subroutines .. - EXTERNAL DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR, - $ ZLAQR4, ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR + EXTERNAL ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR, + $ ZLAQR4, + $ ZLARF1F, ZLARFG, ZLASET, ZTREXC, ZUNMHR * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN @@ -329,13 +330,15 @@ SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * * ==== Workspace query call to ZUNMHR ==== * - CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, + $ LDV, $ WORK, -1, INFO ) LWK2 = INT( WORK( 1 ) ) * * ==== Workspace query call to ZLAQR4 ==== * - CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, V, + CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, + $ V, $ LDV, WORK, -1, INFQR ) LWK3 = INT( WORK( 1 ) ) * @@ -366,7 +369,6 @@ SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * SAFMIN = DLAMCH( 'SAFE MINIMUM' ) SAFMAX = RONE / SAFMIN - CALL DLABAD( SAFMIN, SAFMAX ) ULP = DLAMCH( 'PRECISION' ) SMLNUM = SAFMIN*( DBLE( N ) / ULP ) * @@ -405,15 +407,18 @@ SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . here and there to keep track.) ==== * CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) - CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) + CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), + $ LDT+1 ) * CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) NMIN = ILAENV( 12, 'ZLAQR3', 'SV', JW, 1, JW, LWORK ) IF( JW.GT.NMIN ) THEN - CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), + $ 1, $ JW, V, LDV, WORK, LWORK, INFQR ) ELSE - CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), + $ 1, $ JW, V, LDV, INFQR ) END IF * @@ -463,7 +468,8 @@ SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, 20 CONTINUE ILST = I IF( IFST.NE.ILST ) - $ CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + $ CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, + $ INFO ) 30 CONTINUE END IF * @@ -483,18 +489,17 @@ SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, DO 50 I = 1, NS WORK( I ) = DCONJG( WORK( I ) ) 50 CONTINUE - BETA = WORK( 1 ) - CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU ) - WORK( 1 ) = ONE + CALL ZLARFG( NS, WORK( 1 ), WORK( 2 ), 1, TAU ) * - CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) + CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), + $ LDT ) * - CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT, - $ WORK( JW+1 ) ) - CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, - $ WORK( JW+1 ) ) - CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, - $ WORK( JW+1 ) ) + CALL ZLARF1F( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT, + $ WORK( JW+1 ) ) + CALL ZLARF1F( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL ZLARF1F( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) * CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), $ LWORK-JW, INFO ) @@ -512,7 +517,8 @@ SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, * . H and Z, if requested. ==== * IF( NS.GT.1 .AND. S.NE.ZERO ) - $ CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, + $ LDV, $ WORK( JW+1 ), LWORK-JW, INFO ) * * ==== Update vertical slab in H ==== @@ -526,7 +532,8 @@ SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, KLN = MIN( NV, KWTOP-KROW ) CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), $ LDH, V, LDV, ZERO, WV, LDWV ) - CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), + $ LDH ) 60 CONTINUE * * ==== Update horizontal slab in H ==== @@ -546,7 +553,8 @@ SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, IF( WANTZ ) THEN DO 80 KROW = ILOZ, IHIZ, NV KLN = MIN( NV, IHIZ-KROW+1 ) - CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, + $ KWTOP ), $ LDZ, V, LDV, ZERO, WV, LDWV ) CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), $ LDZ ) diff --git a/lapack-netlib/SRC/zlarf1f.c b/lapack-netlib/SRC/zlarf1f.c new file mode 100644 index 0000000000..fa7f345820 --- /dev/null +++ b/lapack-netlib/SRC/zlarf1f.c @@ -0,0 +1,590 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* Table of constant values */ + +static doublecomplex c_b1 = {1.,0.}; +static doublecomplex c_b2 = {0.,0.}; +static integer c__1 = 1; + +/* > \brief \b ZLARF1F applies an elementary reflector to a general rectangular */ +/* matrix assuming v(1) = 1. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > Download ZLARF1F + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLARF1F( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) */ + +/* CHARACTER SIDE */ +/* INTEGER INCV, LDC, M, N */ +/* COMPLEX*16 TAU */ +/* COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLARF1F applies a complex elementary reflector H to a real m by n matrix */ +/* > C, from either the left or the right. H is represented in the form */ +/* > */ +/* > H = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar and v is a complex vector. */ +/* > */ +/* > If tau = 0, then H is taken to be the unit matrix. */ +/* > */ +/* > To apply H**H, supply conjg(tau) instead */ +/* > tau. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': form H * C */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension */ +/* > (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* > or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* > The vector v in the representation of H. V is not used if */ +/* > TAU = 0. V(1) is not referenced or modified. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCV */ +/* > \verbatim */ +/* > INCV is INTEGER */ +/* > The increment between elements of v. INCV <> 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 */ +/* > The value tau in the representation of H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* > or C * H if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension */ +/* > (N) if SIDE = 'L' */ +/* > or (M) if SIDE = 'R' */ +/* > \endverbatim */ +/* To take advantage of the fact that v(1) = 1, we do the following */ +/* v = [ 1 v_2 ]**T */ +/* If SIDE='L' */ +/* |-----| */ +/* | C_1 | */ +/* C =| C_2 | */ +/* |-----| */ +/* C_1\in\mathbb{C}^{1\times n}, C_2\in\mathbb{C}^{m-1\times n} */ +/* So we compute: */ +/* C = HC = (I - \tau vv**T)C */ +/* = C - \tau vv**T C */ +/* w = C**T v = [ C_1**T C_2**T ] [ 1 v_2 ]**T */ +/* = C_1**T + C_2**T v ( ZGEMM then ZAXPYC-like ) */ +/* C = C - \tau vv**T C */ +/* = C - \tau vw**T */ +/* Giving us C_1 = C_1 - \tau w**T ( ZAXPYC-like ) */ +/* and */ +/* C_2 = C_2 - \tau v_2w**T ( ZGERC ) */ +/* If SIDE='R' */ + +/* C = [ C_1 C_2 ] */ +/* C_1\in\mathbb{C}^{m\times 1}, C_2\in\mathbb{C}^{m\times n-1} */ +/* So we compute: */ +/* C = CH = C(I - \tau vv**T) */ +/* = C - \tau Cvv**T */ + +/* w = Cv = [ C_1 C_2 ] [ 1 v_2 ]**T */ +/* = C_1 + C_2v_2 ( ZGEMM then ZAXPYC-like ) */ +/* C = C - \tau Cvv**T */ +/* = C - \tau wv**T */ +/* Giving us C_1 = C_1 - \tau w ( ZAXPYC-like ) */ +/* and */ +/* C_2 = C_2 - \tau wv_2**T ( ZGERC ) */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup larf */ + +/* ===================================================================== */ +/* Subroutine */ int zlarf1f_(char *side, integer *m, integer *n, + doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * + c__, integer *ldc, doublecomplex *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1, i__2, i__3; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + integer i__; + logical applyleft; + extern logical lsame_(char *, char *); + integer lastc; + extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + integer lastv; + extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern integer ilazlc_(integer *, integer *, doublecomplex *, integer *), + ilazlr_(integer *, integer *, doublecomplex *, integer *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, "L"); + lastv = 1; + lastc = 0; + if (tau->r != 0. || tau->i != 0.) { +/* Set up variables for scanning V. LASTV begins pointing to the end */ +/* of V. */ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + if (*incv > 0) { + i__ = (lastv - 1) * *incv + 1; + } else { + i__ = 1; + } +/* Look for the last non-zero row in V. */ +/* Since we are assuming that V(1) = 1, and it is not stored, so we */ +/* shouldn't access it. */ + for(;;) { /* while(complicated condition) */ + i__1 = i__; + if (!(lastv > 1 && (v[i__1].r == 0. && v[i__1].i == 0.))) + break; + --lastv; + i__ -= *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc); + } + } + if (lastc == 0) { + return 0; + } + if (applyleft) { + +/* Form H * C */ + +/* Check if m = 1. This means v = 1, So we just need to compu */ +/* C := HC = (1-\tau)C. */ + if (lastv == 1) { + z__1.r = 1. - tau->r, z__1.i = 0. - tau->i; + zscal_(&lastc, &z__1, &c__[c_offset], ldc); + } else { + +/* w(1:lastc,1) := C(1:lastv,1:lastc)**H * v(1:lastv,1) */ + +/* (I - tvv**H)C = C - tvv**H C */ +/* First compute w**H = v**H c -> w = C**H v */ +/* C = [ C_1 C_2 ]**T, v = [1 v_2]**T */ +/* w = C_1**H + C_2**Hv_2 */ +/* w = C_2**Hv_2 */ + i__1 = lastv - 1; + zgemv_("Conjugate transpose", &i__1, &lastc, &c_b1, &c__[c_dim1 + + 2], ldc, &v[*incv + 1], incv, &c_b2, &work[1], &c__1); + +/* w(1:lastc,1) += v(1,1) * C(1,1:lastc)**H */ + + i__1 = lastc; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + d_cnjg(&z__2, &c__[i__ * c_dim1 + 1]); + z__1.r = work[i__3].r + z__2.r, z__1.i = work[i__3].i + + z__2.i; + work[i__2].r = z__1.r, work[i__2].i = z__1.i; + } + +/* C(1:lastv,1:lastc) := C(...) - tau * v(1:lastv,1) * w(1:lastc,1)**H */ + +/* C(1, 1:lastc) := C(...) - tau * v(1,1) * w(1:lastc,1)**H */ +/* = C(...) - tau * Conj(w(1:lastc,1)) */ +/* This is essentially a zaxpyc */ + i__1 = lastc; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ * c_dim1 + 1; + i__3 = i__ * c_dim1 + 1; + d_cnjg(&z__3, &work[i__]); + z__2.r = tau->r * z__3.r - tau->i * z__3.i, z__2.i = tau->r * + z__3.i + tau->i * z__3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + } + +/* C(2:lastv,1:lastc) += - tau * v(2:lastv,1) * w(1:lastc,1)**H */ + + i__1 = lastv - 1; + z__1.r = -tau->r, z__1.i = -tau->i; + zgerc_(&i__1, &lastc, &z__1, &v[*incv + 1], incv, &work[1], &c__1, + &c__[c_dim1 + 2], ldc); + } + } else { + +/* Form C * H */ + +/* Check if n = 1. This means v = 1, so we just need to compu */ +/* C := CH = C(1-\tau). */ + if (lastv == 1) { + z__1.r = 1. - tau->r, z__1.i = 0. - tau->i; + zscal_(&lastc, &z__1, &c__[c_offset], &c__1); + } else { + +/* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ + +/* w(1:lastc,1) := C(1:lastc,2:lastv) * v(2:lastv,1) */ + i__1 = lastv - 1; + zgemv_("No transpose", &lastc, &i__1, &c_b1, &c__[(c_dim1 << 1) + + 1], ldc, &v[*incv + 1], incv, &c_b2, &work[1], &c__1); +/* w(1:lastc,1) += C(1:lastc,1) v(1,1) = C(1:lastc,1) */ + zaxpy_(&lastc, &c_b1, &c__[c_offset], &c__1, &work[1], &c__1); + +/* C(1:lastc,1:lastv) := C(...) - tau * w(1:lastc,1) * v(1:lastv,1)**T */ + +/* C(1:lastc,1) := C(...) - tau * w(1:lastc,1) * v(1,1 */ +/* = C(...) - tau * w(1:lastc,1) */ + z__1.r = -tau->r, z__1.i = -tau->i; + zaxpy_(&lastc, &z__1, &work[1], &c__1, &c__[c_offset], &c__1); +/* C(1:lastc,2:lastv) := C(...) - tau * w(1:lastc,1) * v(2 */ + i__1 = lastv - 1; + z__1.r = -tau->r, z__1.i = -tau->i; + zgerc_(&lastc, &i__1, &z__1, &work[1], &c__1, &v[*incv + 1], incv, + &c__[(c_dim1 << 1) + 1], ldc); + } + } + return 0; + +/* End of ZLARF1F */ + +} /* zlarf1f_ */ + diff --git a/lapack-netlib/SRC/zlarf1f.f b/lapack-netlib/SRC/zlarf1f.f new file mode 100644 index 0000000000..adaca3c9b4 --- /dev/null +++ b/lapack-netlib/SRC/zlarf1f.f @@ -0,0 +1,302 @@ +*> \brief \b ZLARF1F applies an elementary reflector to a general rectangular +* matrix assuming v(1) = 1. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> Download ZLARF1F + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +* +* Definition: +* =========== +* +* SUBROUTINE ZLARF1F( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* COMPLEX*16 TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARF1F applies a complex elementary reflector H to a real m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar and v is a complex vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> To apply H**H, supply conjg(tau) instead +*> tau. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. V(1) is not referenced or modified. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* To take advantage of the fact that v(1) = 1, we do the following +* v = [ 1 v_2 ]**T +* If SIDE='L' +* |-----| +* | C_1 | +* C =| C_2 | +* |-----| +* C_1\in\mathbb{C}^{1\times n}, C_2\in\mathbb{C}^{m-1\times n} +* So we compute: +* C = HC = (I - \tau vv**T)C +* = C - \tau vv**T C +* w = C**T v = [ C_1**T C_2**T ] [ 1 v_2 ]**T +* = C_1**T + C_2**T v ( ZGEMM then ZAXPYC-like ) +* C = C - \tau vv**T C +* = C - \tau vw**T +* Giving us C_1 = C_1 - \tau w**T ( ZAXPYC-like ) +* and +* C_2 = C_2 - \tau v_2w**T ( ZGERC ) +* If SIDE='R' +* +* C = [ C_1 C_2 ] +* C_1\in\mathbb{C}^{m\times 1}, C_2\in\mathbb{C}^{m\times n-1} +* So we compute: +* C = CH = C(I - \tau vv**T) +* = C - \tau Cvv**T +* +* w = Cv = [ C_1 C_2 ] [ 1 v_2 ]**T +* = C_1 + C_2v_2 ( ZGEMM then ZAXPYC-like ) +* C = C - \tau Cvv**T +* = C - \tau wv**T +* Giving us C_1 = C_1 - \tau w ( ZAXPYC-like ) +* and +* C_2 = C_2 - \tau wv_2**T ( ZGERC ) +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larf +* +* ===================================================================== + SUBROUTINE ZLARF1F( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) + IMPLICIT NONE +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC, J +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZGEMV, ZGERC, ZSCAL +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAZLR, ILAZLC + EXTERNAL LSAME, ILAZLR, ILAZLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 1 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +! Look for the last non-zero row in V. +! Since we are assuming that V(1) = 1, and it is not stored, so we +! shouldn't access it. + DO WHILE( LASTV.GT.1 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILAZLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILAZLR(M, LASTV, C, LDC) + END IF + END IF + IF( LASTC.EQ.0 ) THEN + RETURN + END IF + IF( APPLYLEFT ) THEN +* +* Form H * C +* + ! Check if m = 1. This means v = 1, So we just need to compute + ! C := HC = (1-\tau)C. + IF( LASTV.EQ.1 ) THEN + CALL ZSCAL(LASTC, ONE - TAU, C, LDC) + ELSE +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**H * v(1:lastv,1) +* + ! (I - tvv**H)C = C - tvv**H C + ! First compute w**H = v**H c -> w = C**H v + ! C = [ C_1 C_2 ]**T, v = [1 v_2]**T + ! w = C_1**H + C_2**Hv_2 + ! w = C_2**Hv_2 + CALL ZGEMV( 'Conjugate transpose', LASTV - 1, + $ LASTC, ONE, C( 1+1, 1 ), LDC, V( 1 + INCV ), + $ INCV, ZERO, WORK, 1 ) +* +* w(1:lastc,1) += v(1,1) * C(1,1:lastc)**H +* + DO I = 1, LASTC + WORK( I ) = WORK( I ) + DCONJG( C( 1, I ) ) + END DO +* +* C(1:lastv,1:lastc) := C(...) - tau * v(1:lastv,1) * w(1:lastc,1)**H +* + ! C(1, 1:lastc) := C(...) - tau * v(1,1) * w(1:lastc,1)**H + ! = C(...) - tau * Conj(w(1:lastc,1)) + ! This is essentially a zaxpyc + DO I = 1, LASTC + C( 1, I ) = C( 1, I ) - TAU * DCONJG( WORK( I ) ) + END DO +* +* C(2:lastv,1:lastc) += - tau * v(2:lastv,1) * w(1:lastc,1)**H +* + CALL ZGERC( LASTV - 1, LASTC, -TAU, V( 1 + INCV ), + $ INCV, WORK, 1, C( 1+1, 1 ), LDC ) + END IF + ELSE +* +* Form C * H +* + ! Check if n = 1. This means v = 1, so we just need to compute + ! C := CH = C(1-\tau). + IF( LASTV.EQ.1 ) THEN + CALL ZSCAL(LASTC, ONE - TAU, C, 1) + ELSE +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + ! w(1:lastc,1) := C(1:lastc,2:lastv) * v(2:lastv,1) + CALL ZGEMV( 'No transpose', LASTC, LASTV-1, ONE, + $ C(1,1+1), LDC, V(1+INCV), INCV, ZERO, WORK, 1 ) + ! w(1:lastc,1) += C(1:lastc,1) v(1,1) = C(1:lastc,1) + CALL ZAXPY(LASTC, ONE, C, 1, WORK, 1) +* +* C(1:lastc,1:lastv) := C(...) - tau * w(1:lastc,1) * v(1:lastv,1)**T +* + ! C(1:lastc,1) := C(...) - tau * w(1:lastc,1) * v(1,1)**T + ! = C(...) - tau * w(1:lastc,1) + CALL ZAXPY(LASTC, -TAU, WORK, 1, C, 1) + ! C(1:lastc,2:lastv) := C(...) - tau * w(1:lastc,1) * v(2:lastv)**T + CALL ZGERC( LASTC, LASTV-1, -TAU, WORK, 1, V(1+INCV), + $ INCV, C(1,1+1), LDC ) + END IF + END IF + RETURN +* +* End of ZLARF1F +* + END diff --git a/lapack-netlib/SRC/zlarf1l.c b/lapack-netlib/SRC/zlarf1l.c new file mode 100644 index 0000000000..305a77c944 --- /dev/null +++ b/lapack-netlib/SRC/zlarf1l.c @@ -0,0 +1,555 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +/* Table of constant values */ + +static doublecomplex c_b1 = {1.,0.}; +static doublecomplex c_b2 = {0.,0.}; +static integer c__1 = 1; + +/* > \brief \b ZLARF1L applies an elementary reflector to a general rectangular */ +/* matrix assuming v(lastv) = 1, where lastv is the last non-zero */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > Download ZLARF1L + dependencies */ +/* > +*/ +/* > [TGZ] */ +/* > +*/ +/* > [ZIP] */ +/* > +*/ +/* > [TXT] */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLARF1L( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) */ + +/* CHARACTER SIDE */ +/* INTEGER INCV, LDC, M, N */ +/* COMPLEX*16 TAU */ +/* COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLARF1L applies a complex elementary reflector H to a complex m by n matrix */ +/* > C, from either the left or the right. H is represented in the form */ +/* > */ +/* > H = I - tau * v * v**H */ +/* > */ +/* > where tau is a real scalar and v is a real vector assuming v(lastv) = 1, */ +/* > where lastv is the last non-zero element. */ +/* > */ +/* > If tau = 0, then H is taken to be the unit matrix. */ +/* > */ +/* > To apply H**H (the conjugate transpose of H), supply conjg(tau) instead */ +/* > tau. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': form H * C */ +/* > = 'R': form C * H */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension */ +/* > (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* > or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* > The vector v in the representation of H. V is not used if */ +/* > TAU = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCV */ +/* > \verbatim */ +/* > INCV is INTEGER */ +/* > The increment between elements of v. INCV > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 */ +/* > The value tau in the representation of H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* > or C * H if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension */ +/* > (N) if SIDE = 'L' */ +/* > or (M) if SIDE = 'R' */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup larf1f */ + +/* ===================================================================== */ +/* Subroutine */ int zlarf1l_(char *side, integer *m, integer *n, + doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * + c__, integer *ldc, doublecomplex *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1, i__2, i__3; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + integer i__, j; + logical applyleft; + extern logical lsame_(char *, char *); + integer lastc; + extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + integer lastv; + extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern integer ilazlc_(integer *, integer *, doublecomplex *, integer *), + ilazlr_(integer *, integer *, doublecomplex *, integer *); + integer firstv; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, "L"); + firstv = 1; + lastc = 0; + if (tau->r != 0. || tau->i != 0.) { +/* Set up variables for scanning V. LASTV begins pointing to the end */ +/* of V up to V(1). */ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + i__ = 1; +/* Look for the last non-zero row in V. */ + for(;;) { /* while(complicated condition) */ + i__1 = i__; + if (!(lastv > firstv && (v[i__1].r == 0. && v[i__1].i == 0.))) + break; + ++firstv; + i__ += *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc); + } + } + if (lastc == 0) { + return 0; + } + if (applyleft) { + +/* Form H * C */ + + if (lastv == firstv) { + +/* C(lastv,1:lastc) := ( 1 - tau ) * C(lastv,1:lastc) */ + + z__1.r = 1. - tau->r, z__1.i = 0. - tau->i; + zscal_(&lastc, &z__1, &c__[lastv + c_dim1], ldc); + } else { + +/* w(1:lastc,1) := C(firstv:lastv-1,1:lastc)**T * v(firstv:lastv-1,1) */ + + i__1 = lastv - firstv; + zgemv_("Conjugate transpose", &i__1, &lastc, &c_b1, &c__[firstv + + c_dim1], ldc, &v[i__], incv, &c_b2, &work[1], &c__1); + +/* w(1:lastc,1) += C(lastv,1:lastc)**H * v(lastv,1) */ + + i__1 = lastc; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + d_cnjg(&z__2, &c__[lastv + j * c_dim1]); + z__1.r = work[i__3].r + z__2.r, z__1.i = work[i__3].i + + z__2.i; + work[i__2].r = z__1.r, work[i__2].i = z__1.i; + } + +/* C(lastv,1:lastc) += - tau * v(lastv,1) * w(1:lastc,1)**H */ + + i__1 = lastc; + for (j = 1; j <= i__1; ++j) { + i__2 = lastv + j * c_dim1; + i__3 = lastv + j * c_dim1; + d_cnjg(&z__3, &work[j]); + z__2.r = tau->r * z__3.r - tau->i * z__3.i, z__2.i = tau->r * + z__3.i + tau->i * z__3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + } + +/* C(firstv:lastv-1,1:lastc) += - tau * v(firstv:lastv-1,1) * w(1:lastc,1)**H */ + + i__1 = lastv - firstv; + z__1.r = -tau->r, z__1.i = -tau->i; + zgerc_(&i__1, &lastc, &z__1, &v[i__], incv, &work[1], &c__1, &c__[ + firstv + c_dim1], ldc); + } + } else { + +/* Form C * H */ + + if (lastv == firstv) { + +/* C(1:lastc,lastv) := ( 1 - tau ) * C(1:lastc,lastv) */ + + z__1.r = 1. - tau->r, z__1.i = 0. - tau->i; + zscal_(&lastc, &z__1, &c__[lastv * c_dim1 + 1], &c__1); + } else { + +/* w(1:lastc,1) := C(1:lastc,firstv:lastv-1) * v(firstv:lastv-1,1) */ + + i__1 = lastv - firstv; + zgemv_("No transpose", &lastc, &i__1, &c_b1, &c__[firstv * c_dim1 + + 1], ldc, &v[i__], incv, &c_b2, &work[1], &c__1); + +/* w(1:lastc,1) += C(1:lastc,lastv) * v(lastv,1) */ + + zaxpy_(&lastc, &c_b1, &c__[lastv * c_dim1 + 1], &c__1, &work[1], & + c__1); + +/* C(1:lastc,lastv) += - tau * v(lastv,1) * w(1:lastc,1) */ + + z__1.r = -tau->r, z__1.i = -tau->i; + zaxpy_(&lastc, &z__1, &work[1], &c__1, &c__[lastv * c_dim1 + 1], & + c__1); + +/* C(1:lastc,firstv:lastv-1) += - tau * w(1:lastc,1) * v(firstv:lastv-1)**H */ + + i__1 = lastv - firstv; + z__1.r = -tau->r, z__1.i = -tau->i; + zgerc_(&lastc, &i__1, &z__1, &work[1], &c__1, &v[i__], incv, &c__[ + firstv * c_dim1 + 1], ldc); + } + } + return 0; + +/* End of ZLARF1L */ + +} /* zlarf1l_ */ + diff --git a/lapack-netlib/SRC/zlarf1l.f b/lapack-netlib/SRC/zlarf1l.f new file mode 100644 index 0000000000..2a3bd1d373 --- /dev/null +++ b/lapack-netlib/SRC/zlarf1l.f @@ -0,0 +1,267 @@ +*> \brief \b ZLARF1L applies an elementary reflector to a general rectangular +* matrix assuming v(lastv) = 1, where lastv is the last non-zero +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> Download ZLARF1L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +* +* Definition: +* =========== +* +* SUBROUTINE ZLARF1L( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* COMPLEX*16 TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARF1L applies a complex elementary reflector H to a complex m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a real scalar and v is a real vector assuming v(lastv) = 1, +*> where lastv is the last non-zero element. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead +*> tau. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV > 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larf1f +* +* ===================================================================== + SUBROUTINE ZLARF1L( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) + IMPLICIT NONE +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, J, LASTV, LASTC, FIRSTV +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZGEMV, ZGERC, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAZLR, ILAZLC + EXTERNAL LSAME, ILAZLR, ILAZLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + FIRSTV = 1 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V up to V(1). + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + I = 1 +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.FIRSTV .AND. V( I ).EQ.ZERO ) + FIRSTV = FIRSTV + 1 + I = I + INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILAZLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILAZLR(M, LASTV, C, LDC) + END IF + END IF + IF( LASTC.EQ.0 ) THEN + RETURN + END IF + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.EQ.FIRSTV ) THEN +* +* C(lastv,1:lastc) := ( 1 - tau ) * C(lastv,1:lastc) +* + CALL ZSCAL( LASTC, ONE - TAU, C( LASTV, 1 ), LDC ) + ELSE +* +* w(1:lastc,1) := C(firstv:lastv-1,1:lastc)**T * v(firstv:lastv-1,1) +* + CALL ZGEMV( 'Conjugate transpose', LASTV - FIRSTV, LASTC, + $ ONE, C( FIRSTV, 1 ), LDC, V( I ), INCV, ZERO, + $ WORK, 1 ) +* +* w(1:lastc,1) += C(lastv,1:lastc)**H * v(lastv,1) +* + DO J = 1, LASTC + WORK( J ) = WORK( J ) + CONJG( C( LASTV, J ) ) + END DO +* +* C(lastv,1:lastc) += - tau * v(lastv,1) * w(1:lastc,1)**H +* + DO J = 1, LASTC + C( LASTV, J ) = C( LASTV, J ) + $ - TAU * CONJG( WORK( J ) ) + END DO +* +* C(firstv:lastv-1,1:lastc) += - tau * v(firstv:lastv-1,1) * w(1:lastc,1)**H +* + CALL ZGERC( LASTV - FIRSTV, LASTC, -TAU, V( I ), INCV, + $ WORK, 1, C( FIRSTV, 1 ), LDC) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.EQ.FIRSTV ) THEN +* +* C(1:lastc,lastv) := ( 1 - tau ) * C(1:lastc,lastv) +* + CALL ZSCAL( LASTC, ONE - TAU, C( 1, LASTV ), 1 ) + ELSE +* +* w(1:lastc,1) := C(1:lastc,firstv:lastv-1) * v(firstv:lastv-1,1) +* + CALL ZGEMV( 'No transpose', LASTC, LASTV - FIRSTV, ONE, + $ C( 1, FIRSTV ), LDC, V( I ), INCV, ZERO, + $ WORK, 1 ) +* +* w(1:lastc,1) += C(1:lastc,lastv) * v(lastv,1) +* + CALL ZAXPY( LASTC, ONE, C( 1, LASTV ), 1, WORK, 1 ) +* +* C(1:lastc,lastv) += - tau * v(lastv,1) * w(1:lastc,1) +* + CALL ZAXPY( LASTC, -TAU, WORK, 1, C( 1, LASTV ), 1 ) +* +* C(1:lastc,firstv:lastv-1) += - tau * w(1:lastc,1) * v(firstv:lastv-1)**H +* + CALL ZGERC( LASTC, LASTV - FIRSTV, -TAU, WORK, 1, V( I ), + $ INCV, C( 1, FIRSTV ), LDC ) + END IF + END IF + RETURN +* +* End of ZLARF1L +* + END diff --git a/lapack-netlib/SRC/zlascl.f b/lapack-netlib/SRC/zlascl.f index 4cce5ff5e0..a1262f1adb 100644 --- a/lapack-netlib/SRC/zlascl.f +++ b/lapack-netlib/SRC/zlascl.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZLASCL + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -136,10 +134,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERauxiliary +*> \ingroup lascl * * ===================================================================== - SUBROUTINE ZLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) + SUBROUTINE ZLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, + $ INFO ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -291,7 +291,7 @@ SUBROUTINE ZLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) * * Lower triangular matrix * - DO 50 J = 1, N + DO 50 J = 1, MIN( M, N ) DO 40 I = J, M A( I, J ) = A( I, J )*MUL 40 CONTINUE diff --git a/lapack-netlib/SRC/zstedc.f b/lapack-netlib/SRC/zstedc.f index e62063a19e..4a5d9fa693 100644 --- a/lapack-netlib/SRC/zstedc.f +++ b/lapack-netlib/SRC/zstedc.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZSTEDC + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -192,7 +190,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup stedc * *> \par Contributors: * ================== @@ -203,6 +201,7 @@ * ===================================================================== SUBROUTINE ZSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, $ LRWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -237,7 +236,8 @@ SUBROUTINE ZSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, EXTERNAL LSAME, ILAENV, DLAMCH, DLANST * .. * .. External Subroutines .. - EXTERNAL DLASCL, DLASET, DSTEDC, DSTEQR, DSTERF, XERBLA, + EXTERNAL DLASCL, DLASET, DSTEDC, DSTEQR, DSTERF, + $ XERBLA, $ ZLACPY, ZLACRM, ZLAED0, ZSTEQR, ZSWAP * .. * .. Intrinsic Functions .. @@ -296,7 +296,7 @@ SUBROUTINE ZSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, LIWMIN = 3 + 5*N END IF WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN @@ -408,12 +408,15 @@ SUBROUTINE ZSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, * Scale. * ORGNRM = DLANST( 'M', M, D( START ), E( START ) ) - CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M, + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), + $ M, $ INFO ) - CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ), + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, + $ E( START ), $ M-1, INFO ) * - CALL ZLAED0( N, M, D( START ), E( START ), Z( 1, START ), + CALL ZLAED0( N, M, D( START ), E( START ), Z( 1, + $ START ), $ LDZ, WORK, N, RWORK, IWORK, INFO ) IF( INFO.GT.0 ) THEN INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) + @@ -423,13 +426,15 @@ SUBROUTINE ZSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, * * Scale back. * - CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M, + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), + $ M, $ INFO ) * ELSE CALL DSTEQR( 'I', M, D( START ), E( START ), RWORK, M, $ RWORK( M*M+1 ), INFO ) - CALL ZLACRM( N, M, Z( 1, START ), LDZ, RWORK, M, WORK, N, + CALL ZLACRM( N, M, Z( 1, START ), LDZ, RWORK, M, WORK, + $ N, $ RWORK( M*M+1 ) ) CALL ZLACPY( 'A', N, M, WORK, N, Z( 1, START ), LDZ ) IF( INFO.GT.0 ) THEN @@ -467,7 +472,7 @@ SUBROUTINE ZSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, * 70 CONTINUE WORK( 1 ) = LWMIN - RWORK( 1 ) = LRWMIN + RWORK( 1 ) = DBLE( LRWMIN ) IWORK( 1 ) = LIWMIN * RETURN diff --git a/lapack-netlib/SRC/ztgsen.f b/lapack-netlib/SRC/ztgsen.f index 25a5c6f4ed..84b69c2fe0 100644 --- a/lapack-netlib/SRC/ztgsen.f +++ b/lapack-netlib/SRC/ztgsen.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZTGSEN + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -242,7 +240,7 @@ *> \verbatim *> LWORK is INTEGER *> The dimension of the array WORK. LWORK >= 1 -*> If IJOB = 1, 2 or 4, LWORK >= 2*M*(N-M) +*> If IJOB = 1, 2 or 4, LWORK >= 2*M*(N-M) + 1 *> If IJOB = 3 or 5, LWORK >= 4*M*(N-M) *> *> If LWORK = -1, then a workspace query is assumed; the routine @@ -290,7 +288,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup tgsen * *> \par Further Details: * ===================== @@ -427,9 +425,11 @@ *> 1996. *> * ===================================================================== - SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, + SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, + $ LDB, $ ALPHA, BETA, Q, LDQ, Z, LDZ, M, PL, PR, DIF, $ WORK, LWORK, IWORK, LIWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -468,7 +468,8 @@ SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, INTEGER ISAVE( 3 ) * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLACN2, ZLACPY, ZLASSQ, ZSCAL, ZTGEXC, + EXTERNAL XERBLA, ZLACN2, ZLACPY, ZLASSQ, ZSCAL, + $ ZTGEXC, $ ZTGSYL * .. * .. Intrinsic Functions .. @@ -530,7 +531,7 @@ SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, END IF * IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN - LWMIN = MAX( 1, 2*M*( N-M ) ) + LWMIN = MAX( 1, 2*M*( N-M ) + 1 ) LIWMIN = MAX( 1, N+2 ) ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN LWMIN = MAX( 1, 4*M*( N-M ) ) @@ -592,7 +593,8 @@ SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * and Z that will swap adjacent diagonal blocks in (A, B). * IF( K.NE.KS ) - $ CALL ZTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ CALL ZTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, $ LDZ, K, KS, IERR ) * IF( IERR.GT.0 ) THEN @@ -622,7 +624,8 @@ SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, N2 = N - M I = N1 + 1 CALL ZLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 ) - CALL ZLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ), + CALL ZLACPY( 'Full', N1, N2, B( 1, I ), LDB, + $ WORK( N1*N2+1 ), $ N1 ) IJB = 0 CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, @@ -664,14 +667,16 @@ SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Frobenius norm-based Difu estimate. * - CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), $ N1, DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), $ LWORK-2*N1*N2, IWORK, IERR ) * * Frobenius norm-based Difl estimate. * - CALL ZTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK, + CALL ZTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, $ N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ), $ N2, DSCALE, DIF( 2 ), WORK( N1*N2*2+1 ), $ LWORK-2*N1*N2, IWORK, IERR ) @@ -699,7 +704,8 @@ SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve generalized Sylvester equation * - CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), + $ LDA, $ WORK, N1, B, LDB, B( I, I ), LDB, $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, @@ -708,7 +714,8 @@ SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve the transposed variant. * - CALL ZTGSYL( 'C', IJB, N1, N2, A, LDA, A( I, I ), LDA, + CALL ZTGSYL( 'C', IJB, N1, N2, A, LDA, A( I, I ), + $ LDA, $ WORK, N1, B, LDB, B( I, I ), LDB, $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, @@ -728,7 +735,8 @@ SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve generalized Sylvester equation * - CALL ZTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + CALL ZTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, + $ LDA, $ WORK, N2, B( I, I ), LDB, B, LDB, $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, @@ -737,7 +745,8 @@ SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, * * Solve the transposed variant. * - CALL ZTGSYL( 'C', IJB, N2, N1, A( I, I ), LDA, A, LDA, + CALL ZTGSYL( 'C', IJB, N2, N1, A( I, I ), LDA, A, + $ LDA, $ WORK, N2, B, LDB, B( I, I ), LDB, $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, diff --git a/lapack-netlib/SRC/ztrsyl3.f b/lapack-netlib/SRC/ztrsyl3.f index b5a058da4e..648e4d1d6c 100644 --- a/lapack-netlib/SRC/ztrsyl3.f +++ b/lapack-netlib/SRC/ztrsyl3.f @@ -1,10 +1,23 @@ *> \brief \b ZTRSYL3 * -* Definition: -* =========== +* Definition: +* =========== * +* SUBROUTINE ZTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, +* C, LDC, SCALE, SWORK, LDSWORK, INFO ) * -*> \par Purpose +* .. Scalar Arguments .. +* CHARACTER TRANA, TRANB +* INTEGER INFO, ISGN, LDA, LDB, LDC, LDSWORK, M, N +* DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* DOUBLE PRECISION SWORK( LDSWORK, * ) +* .. +* +* +*> \par Purpose: * ============= *> *> \verbatim @@ -22,8 +35,8 @@ *> This is the block version of the algorithm. *> \endverbatim * -* Arguments -* ========= +* Arguments: +* ========== * *> \param[in] TRANA *> \verbatim @@ -136,7 +149,7 @@ *> A and B are unchanged). *> \endverbatim * -*> \ingroup complex16SYcomputational +*> \ingroup trsyl3 * * ===================================================================== * References: @@ -152,8 +165,8 @@ * Angelika Schwarz, Umea University, Sweden. * * ===================================================================== - SUBROUTINE ZTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, - $ LDC, SCALE, SWORK, LDSWORK, INFO ) + SUBROUTINE ZTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, + $ C, LDC, SCALE, SWORK, LDSWORK, INFO ) IMPLICIT NONE * * .. Scalar Arguments .. @@ -186,10 +199,12 @@ SUBROUTINE ZTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, LOGICAL LSAME INTEGER ILAENV DOUBLE PRECISION DLAMCH, DLARMM, ZLANGE - EXTERNAL DLAMCH, DLARMM, ILAENV, LSAME, ZLANGE + EXTERNAL DLAMCH, DLARMM, ILAENV, LSAME, + $ ZLANGE * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZDSCAL, ZGEMM, ZLASCL, ZTRSYL + EXTERNAL XERBLA, ZDSCAL, ZGEMM, ZLASCL, + $ ZTRSYL * .. * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DIMAG, EXPONENT, MAX, MIN @@ -215,7 +230,6 @@ SUBROUTINE ZTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, INFO = 0 LQUERY = ( LDSWORK.EQ.-1 ) IF( LQUERY ) THEN - LDSWORK = 2 SWORK(1,1) = MAX( NBA, NBB ) SWORK(2,1) = 2 * NBB + NBA END IF diff --git a/lapack-netlib/SRC/zunbdb.f b/lapack-netlib/SRC/zunbdb.f index 281ec6e45f..e0c8beadec 100644 --- a/lapack-netlib/SRC/zunbdb.f +++ b/lapack-netlib/SRC/zunbdb.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNBDB + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -255,7 +253,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup unbdb * *> \par Further Details: * ===================== @@ -281,9 +279,11 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, + SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, + $ LDX12, $ X21, LDX21, X22, LDX22, THETA, PHI, TAUP1, $ TAUP2, TAUQ1, TAUQ2, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -315,7 +315,8 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, DOUBLE PRECISION Z1, Z2, Z3, Z4 * .. * .. External Subroutines .. - EXTERNAL ZAXPY, ZLARF, ZLARFGP, ZSCAL, XERBLA + EXTERNAL ZAXPY, ZLARF1F, ZLARFGP, ZSCAL, + $ XERBLA EXTERNAL ZLACGV * * .. @@ -406,9 +407,11 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ 0.0D0 ), X12(I,I-1), 1, X11(I,I), 1 ) END IF IF( I .EQ. 1 ) THEN - CALL ZSCAL( M-P-I+1, DCMPLX( Z2, 0.0D0 ), X21(I,I), 1 ) + CALL ZSCAL( M-P-I+1, DCMPLX( Z2, 0.0D0 ), X21(I,I), + $ 1 ) ELSE - CALL ZSCAL( M-P-I+1, DCMPLX( Z2*COS(PHI(I-1)), 0.0D0 ), + CALL ZSCAL( M-P-I+1, DCMPLX( Z2*COS(PHI(I-1)), + $ 0.0D0 ), $ X21(I,I), 1 ) CALL ZAXPY( M-P-I+1, DCMPLX( -Z2*Z3*Z4*SIN(PHI(I-1)), $ 0.0D0 ), X22(I,I-1), 1, X21(I,I), 1 ) @@ -418,11 +421,11 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ DZNRM2( P-I+1, X11(I,I), 1 ) ) * IF( P .GT. I ) THEN - CALL ZLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + CALL ZLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, + $ TAUP1(I) ) ELSE IF ( P .EQ. I ) THEN CALL ZLARFGP( P-I+1, X11(I,I), X11(I,I), 1, TAUP1(I) ) END IF - X11(I,I) = ONE IF ( M-P .GT. I ) THEN CALL ZLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, $ TAUP2(I) ) @@ -430,30 +433,34 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL ZLARFGP( M-P-I+1, X21(I,I), X21(I,I), 1, $ TAUP2(I) ) END IF - X21(I,I) = ONE * IF ( Q .GT. I ) THEN - CALL ZLARF( 'L', P-I+1, Q-I, X11(I,I), 1, - $ DCONJG(TAUP1(I)), X11(I,I+1), LDX11, WORK ) - CALL ZLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, - $ DCONJG(TAUP2(I)), X21(I,I+1), LDX21, WORK ) + CALL ZLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, + $ CONJG(TAUP1(I)), X11(I,I+1), LDX11, + $ WORK ) + CALL ZLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, + $ CONJG(TAUP2(I)), X21(I,I+1), LDX21, + $ WORK ) END IF IF ( M-Q+1 .GT. I ) THEN - CALL ZLARF( 'L', P-I+1, M-Q-I+1, X11(I,I), 1, - $ DCONJG(TAUP1(I)), X12(I,I), LDX12, WORK ) - CALL ZLARF( 'L', M-P-I+1, M-Q-I+1, X21(I,I), 1, - $ DCONJG(TAUP2(I)), X22(I,I), LDX22, WORK ) + CALL ZLARF1F( 'L', P-I+1, M-Q-I+1, X11(I,I), 1, + $ CONJG(TAUP1(I)), X12(I,I), LDX12, WORK ) + CALL ZLARF1F( 'L', M-P-I+1, M-Q-I+1, X21(I,I), 1, + $ CONJG(TAUP2(I)), X22(I,I), LDX22, WORK ) END IF * IF( I .LT. Q ) THEN - CALL ZSCAL( Q-I, DCMPLX( -Z1*Z3*SIN(THETA(I)), 0.0D0 ), + CALL ZSCAL( Q-I, DCMPLX( -Z1*Z3*SIN(THETA(I)), + $ 0.0D0 ), $ X11(I,I+1), LDX11 ) CALL ZAXPY( Q-I, DCMPLX( Z2*Z3*COS(THETA(I)), 0.0D0 ), $ X21(I,I+1), LDX21, X11(I,I+1), LDX11 ) END IF - CALL ZSCAL( M-Q-I+1, DCMPLX( -Z1*Z4*SIN(THETA(I)), 0.0D0 ), + CALL ZSCAL( M-Q-I+1, DCMPLX( -Z1*Z4*SIN(THETA(I)), + $ 0.0D0 ), $ X12(I,I), LDX12 ) - CALL ZAXPY( M-Q-I+1, DCMPLX( Z2*Z4*COS(THETA(I)), 0.0D0 ), + CALL ZAXPY( M-Q-I+1, DCMPLX( Z2*Z4*COS(THETA(I)), + $ 0.0D0 ), $ X22(I,I), LDX22, X12(I,I), LDX12 ) * IF( I .LT. Q ) @@ -469,7 +476,6 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL ZLARFGP( Q-I, X11(I,I+1), X11(I,I+2), LDX11, $ TAUQ1(I) ) END IF - X11(I,I+1) = ONE END IF IF ( M-Q+1 .GT. I ) THEN CALL ZLACGV( M-Q-I+1, X12(I,I), LDX12 ) @@ -481,21 +487,23 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ TAUQ2(I) ) END IF END IF - X12(I,I) = ONE * IF( I .LT. Q ) THEN - CALL ZLARF( 'R', P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), - $ X11(I+1,I+1), LDX11, WORK ) - CALL ZLARF( 'R', M-P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), - $ X21(I+1,I+1), LDX21, WORK ) + CALL ZLARF1F( 'R', P-I, Q-I, X11(I,I+1), LDX11, + $ TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK ) + CALL ZLARF1F( 'R', M-P-I, Q-I, X11(I,I+1), LDX11, + $ TAUQ1(I), + $ X21(I+1,I+1), LDX21, WORK ) END IF IF ( P .GT. I ) THEN - CALL ZLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), - $ X12(I+1,I), LDX12, WORK ) + CALL ZLARF1F( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), + $ X12(I+1,I), LDX12, WORK ) END IF IF ( M-P .GT. I ) THEN - CALL ZLARF( 'R', M-P-I, M-Q-I+1, X12(I,I), LDX12, - $ TAUQ2(I), X22(I+1,I), LDX22, WORK ) + CALL ZLARF1F( 'R', M-P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(I+1,I), LDX22, WORK ) END IF * IF( I .LT. Q ) @@ -518,15 +526,15 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL ZLARFGP( M-Q-I+1, X12(I,I), X12(I,I+1), LDX12, $ TAUQ2(I) ) END IF - X12(I,I) = ONE * IF ( P .GT. I ) THEN - CALL ZLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), - $ X12(I+1,I), LDX12, WORK ) + CALL ZLARF1F( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), + $ X12(I+1,I), LDX12, WORK ) END IF IF( M-P-Q .GE. 1 ) - $ CALL ZLARF( 'R', M-P-Q, M-Q-I+1, X12(I,I), LDX12, - $ TAUQ2(I), X22(Q+1,I), LDX22, WORK ) + $ CALL ZLARF1F( 'R', M-P-Q, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(Q+1,I), LDX22, WORK ) * CALL ZLACGV( M-Q-I+1, X12(I,I), LDX12 ) * @@ -541,9 +549,9 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL ZLACGV( M-P-Q-I+1, X22(Q+I,P+I), LDX22 ) CALL ZLARFGP( M-P-Q-I+1, X22(Q+I,P+I), X22(Q+I,P+I+1), $ LDX22, TAUQ2(P+I) ) - X22(Q+I,P+I) = ONE - CALL ZLARF( 'R', M-P-Q-I, M-P-Q-I+1, X22(Q+I,P+I), LDX22, - $ TAUQ2(P+I), X22(Q+I+1,P+I), LDX22, WORK ) + CALL ZLARF1F( 'R', M-P-Q-I, M-P-Q-I+1, X22(Q+I,P+I), + $ LDX22, TAUQ2(P+I), X22(Q+I+1,P+I), LDX22, + $ WORK ) * CALL ZLACGV( M-P-Q-I+1, X22(Q+I,P+I), LDX22 ) * @@ -568,7 +576,8 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL ZSCAL( M-P-I+1, DCMPLX( Z2, 0.0D0 ), X21(I,I), $ LDX21 ) ELSE - CALL ZSCAL( M-P-I+1, DCMPLX( Z2*COS(PHI(I-1)), 0.0D0 ), + CALL ZSCAL( M-P-I+1, DCMPLX( Z2*COS(PHI(I-1)), + $ 0.0D0 ), $ X21(I,I), LDX21 ) CALL ZAXPY( M-P-I+1, DCMPLX( -Z2*Z3*Z4*SIN(PHI(I-1)), $ 0.0D0 ), X22(I-1,I), LDX22, X21(I,I), LDX21 ) @@ -580,8 +589,8 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL ZLACGV( P-I+1, X11(I,I), LDX11 ) CALL ZLACGV( M-P-I+1, X21(I,I), LDX21 ) * - CALL ZLARFGP( P-I+1, X11(I,I), X11(I,I+1), LDX11, TAUP1(I) ) - X11(I,I) = ONE + CALL ZLARFGP( P-I+1, X11(I,I), X11(I,I+1), LDX11, + $ TAUP1(I) ) IF ( I .EQ. M-P ) THEN CALL ZLARFGP( M-P-I+1, X21(I,I), X21(I,I), LDX21, $ TAUP2(I) ) @@ -589,29 +598,32 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, CALL ZLARFGP( M-P-I+1, X21(I,I), X21(I,I+1), LDX21, $ TAUP2(I) ) END IF - X21(I,I) = ONE * - CALL ZLARF( 'R', Q-I, P-I+1, X11(I,I), LDX11, TAUP1(I), - $ X11(I+1,I), LDX11, WORK ) - CALL ZLARF( 'R', M-Q-I+1, P-I+1, X11(I,I), LDX11, TAUP1(I), - $ X12(I,I), LDX12, WORK ) - CALL ZLARF( 'R', Q-I, M-P-I+1, X21(I,I), LDX21, TAUP2(I), - $ X21(I+1,I), LDX21, WORK ) - CALL ZLARF( 'R', M-Q-I+1, M-P-I+1, X21(I,I), LDX21, - $ TAUP2(I), X22(I,I), LDX22, WORK ) + CALL ZLARF1F( 'R', Q-I, P-I+1, X11(I,I), LDX11, TAUP1(I), + $ X11(I+1,I), LDX11, WORK ) + CALL ZLARF1F( 'R', M-Q-I+1, P-I+1, X11(I,I), LDX11, + $ TAUP1(I), + $ X12(I,I), LDX12, WORK ) + CALL ZLARF1F( 'R', Q-I, M-P-I+1, X21(I,I), LDX21, + $ TAUP2(I), X21(I+1,I), LDX21, WORK ) + CALL ZLARF1F( 'R', M-Q-I+1, M-P-I+1, X21(I,I), LDX21, + $ TAUP2(I), X22(I,I), LDX22, WORK ) * CALL ZLACGV( P-I+1, X11(I,I), LDX11 ) CALL ZLACGV( M-P-I+1, X21(I,I), LDX21 ) * IF( I .LT. Q ) THEN - CALL ZSCAL( Q-I, DCMPLX( -Z1*Z3*SIN(THETA(I)), 0.0D0 ), + CALL ZSCAL( Q-I, DCMPLX( -Z1*Z3*SIN(THETA(I)), + $ 0.0D0 ), $ X11(I+1,I), 1 ) CALL ZAXPY( Q-I, DCMPLX( Z2*Z3*COS(THETA(I)), 0.0D0 ), $ X21(I+1,I), 1, X11(I+1,I), 1 ) END IF - CALL ZSCAL( M-Q-I+1, DCMPLX( -Z1*Z4*SIN(THETA(I)), 0.0D0 ), + CALL ZSCAL( M-Q-I+1, DCMPLX( -Z1*Z4*SIN(THETA(I)), + $ 0.0D0 ), $ X12(I,I), 1 ) - CALL ZAXPY( M-Q-I+1, DCMPLX( Z2*Z4*COS(THETA(I)), 0.0D0 ), + CALL ZAXPY( M-Q-I+1, DCMPLX( Z2*Z4*COS(THETA(I)), + $ 0.0D0 ), $ X22(I,I), 1, X12(I,I), 1 ) * IF( I .LT. Q ) @@ -619,23 +631,28 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ DZNRM2( M-Q-I+1, X12(I,I), 1 ) ) * IF( I .LT. Q ) THEN - CALL ZLARFGP( Q-I, X11(I+1,I), X11(I+2,I), 1, TAUQ1(I) ) - X11(I+1,I) = ONE + CALL ZLARFGP( Q-I, X11(I+1,I), X11(I+2,I), 1, + $ TAUQ1(I) ) END IF - CALL ZLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, TAUQ2(I) ) - X12(I,I) = ONE + CALL ZLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, + $ TAUQ2(I) ) * IF( I .LT. Q ) THEN - CALL ZLARF( 'L', Q-I, P-I, X11(I+1,I), 1, - $ DCONJG(TAUQ1(I)), X11(I+1,I+1), LDX11, WORK ) - CALL ZLARF( 'L', Q-I, M-P-I, X11(I+1,I), 1, - $ DCONJG(TAUQ1(I)), X21(I+1,I+1), LDX21, WORK ) + CALL ZLARF1F( 'L', Q-I, P-I, X11(I+1,I), 1, + $ CONJG(TAUQ1(I)), X11(I+1,I+1), LDX11, + $ WORK ) + CALL ZLARF1F( 'L', Q-I, M-P-I, X11(I+1,I), 1, + $ CONJG(TAUQ1(I)), X21(I+1,I+1), LDX21, + $ WORK ) END IF - CALL ZLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, - $ DCONJG(TAUQ2(I)), X12(I,I+1), LDX12, WORK ) + CALL ZLARF1F( 'L', M-Q-I+1, P-I, X12(I,I), 1, + $ CONJG(TAUQ2(I)), + $ X12(I,I+1), LDX12, WORK ) + IF ( M-P .GT. I ) THEN - CALL ZLARF( 'L', M-Q-I+1, M-P-I, X12(I,I), 1, - $ DCONJG(TAUQ2(I)), X22(I,I+1), LDX22, WORK ) + CALL ZLARF1F( 'L', M-Q-I+1, M-P-I, X12(I,I), 1, + $ CONJG(TAUQ2(I)), X22(I,I+1), LDX22, + $ WORK ) END IF * END DO @@ -644,17 +661,20 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, * DO I = Q + 1, P * - CALL ZSCAL( M-Q-I+1, DCMPLX( -Z1*Z4, 0.0D0 ), X12(I,I), 1 ) - CALL ZLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, TAUQ2(I) ) - X12(I,I) = ONE + CALL ZSCAL( M-Q-I+1, DCMPLX( -Z1*Z4, 0.0D0 ), X12(I,I), + $ 1 ) + CALL ZLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, + $ TAUQ2(I) ) * IF ( P .GT. I ) THEN - CALL ZLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, - $ DCONJG(TAUQ2(I)), X12(I,I+1), LDX12, WORK ) + CALL ZLARF1F( 'L', M-Q-I+1, P-I, X12(I,I), 1, + $ CONJG(TAUQ2(I)), X12(I,I+1), LDX12, + $ WORK ) END IF IF( M-P-Q .GE. 1 ) - $ CALL ZLARF( 'L', M-Q-I+1, M-P-Q, X12(I,I), 1, - $ DCONJG(TAUQ2(I)), X22(I,Q+1), LDX22, WORK ) + $ CALL ZLARF1F( 'L', M-Q-I+1, M-P-Q, X12(I,I), 1, + $ CONJG(TAUQ2(I)), X22(I,Q+1), LDX22, + $ WORK ) * END DO * @@ -666,12 +686,10 @@ SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, $ X22(P+I,Q+I), 1 ) CALL ZLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I+1,Q+I), 1, $ TAUQ2(P+I) ) - X22(P+I,Q+I) = ONE -* IF ( M-P-Q .NE. I ) THEN - CALL ZLARF( 'L', M-P-Q-I+1, M-P-Q-I, X22(P+I,Q+I), 1, - $ DCONJG(TAUQ2(P+I)), X22(P+I,Q+I+1), LDX22, - $ WORK ) + CALL ZLARF1F( 'L', M-P-Q-I+1, M-P-Q-I, X22(P+I,Q+I), + $ 1, CONJG(TAUQ2(P+I)), X22(P+I,Q+I+1), + $ LDX22, WORK ) END IF * END DO diff --git a/lapack-netlib/SRC/zunbdb1.f b/lapack-netlib/SRC/zunbdb1.f index 2fae170de4..d1e1933176 100644 --- a/lapack-netlib/SRC/zunbdb1.f +++ b/lapack-netlib/SRC/zunbdb1.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNBDB1 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -174,7 +172,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup unbdb1 * *> \par Further Details: * ===================== @@ -198,8 +196,10 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE ZUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE ZUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -227,7 +227,8 @@ SUBROUTINE ZUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, XERBLA + EXTERNAL ZLARF1F, ZLARFGP, ZUNBDB5, ZDROT, + $ XERBLA EXTERNAL ZLACGV * .. * .. External Functions .. @@ -286,27 +287,29 @@ SUBROUTINE ZUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, THETA(I) = ATAN2( DBLE( X21(I,I) ), DBLE( X11(I,I) ) ) C = COS( THETA(I) ) S = SIN( THETA(I) ) - X11(I,I) = ONE - X21(I,I) = ONE - CALL ZLARF( 'L', P-I+1, Q-I, X11(I,I), 1, DCONJG(TAUP1(I)), - $ X11(I,I+1), LDX11, WORK(ILARF) ) - CALL ZLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, DCONJG(TAUP2(I)), - $ X21(I,I+1), LDX21, WORK(ILARF) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + CALL ZLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, CONJG(TAUP1(I)), + $ X11(I,I+1), LDX11, WORK(ILARF) ) + CALL ZLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, + $ CONJG(TAUP2(I)), X21(I,I+1), LDX21, + $ WORK(ILARF) ) * IF( I .LT. Q ) THEN CALL ZDROT( Q-I, X11(I,I+1), LDX11, X21(I,I+1), LDX21, C, $ S ) CALL ZLACGV( Q-I, X21(I,I+1), LDX21 ) - CALL ZLARFGP( Q-I, X21(I,I+1), X21(I,I+2), LDX21, TAUQ1(I) ) + CALL ZLARFGP( Q-I, X21(I,I+1), X21(I,I+2), LDX21, + $ TAUQ1(I) ) S = DBLE( X21(I,I+1) ) - X21(I,I+1) = ONE - CALL ZLARF( 'R', P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), - $ X11(I+1,I+1), LDX11, WORK(ILARF) ) - CALL ZLARF( 'R', M-P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), - $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + CALL ZLARF1F( 'R', P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + CALL ZLARF1F( 'R', M-P-I, Q-I, X21(I,I+1), LDX21, + $ TAUQ1(I), X21(I+1,I+1), LDX21, + $ WORK(ILARF) ) CALL ZLACGV( Q-I, X21(I,I+1), LDX21 ) C = SQRT( DZNRM2( P-I, X11(I+1,I+1), 1 )**2 - $ + DZNRM2( M-P-I, X21(I+1,I+1), 1 )**2 ) + $ + DZNRM2( M-P-I, X21(I+1,I+1), 1 )**2 ) PHI(I) = ATAN2( S, C ) CALL ZUNBDB5( P-I, M-P-I, Q-I-1, X11(I+1,I+1), 1, $ X21(I+1,I+1), 1, X11(I+1,I+2), LDX11, diff --git a/lapack-netlib/SRC/zunbdb2.f b/lapack-netlib/SRC/zunbdb2.f index 28e78fc23c..f8bc474f3e 100644 --- a/lapack-netlib/SRC/zunbdb2.f +++ b/lapack-netlib/SRC/zunbdb2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNBDB2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -172,7 +170,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup unbdb2 * *> \par Further Details: * ===================== @@ -196,8 +194,10 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE ZUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE ZUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -226,7 +226,8 @@ SUBROUTINE ZUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, ZSCAL, ZLACGV, + EXTERNAL ZLARF1F, ZLARFGP, ZUNBDB5, ZDROT, ZSCAL, + $ ZLACGV, $ XERBLA * .. * .. External Functions .. @@ -287,11 +288,10 @@ SUBROUTINE ZUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, CALL ZLACGV( Q-I+1, X11(I,I), LDX11 ) CALL ZLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) C = DBLE( X11(I,I) ) - X11(I,I) = ONE - CALL ZLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X11(I+1,I), LDX11, WORK(ILARF) ) - CALL ZLARF( 'R', M-P-I+1, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X21(I,I), LDX21, WORK(ILARF) ) + CALL ZLARF1F( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL ZLARF1F( 'R', M-P-I+1, Q-I+1, X11(I,I), LDX11, + $ TAUQ1(I), X21(I,I), LDX21, WORK(ILARF) ) CALL ZLACGV( Q-I+1, X11(I,I), LDX11 ) S = SQRT( DZNRM2( P-I, X11(I+1,I), 1 )**2 $ + DZNRM2( M-P-I+1, X21(I,I), 1 )**2 ) @@ -307,13 +307,13 @@ SUBROUTINE ZUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, PHI(I) = ATAN2( DBLE( X11(I+1,I) ), DBLE( X21(I,I) ) ) C = COS( PHI(I) ) S = SIN( PHI(I) ) - X11(I+1,I) = ONE - CALL ZLARF( 'L', P-I, Q-I, X11(I+1,I), 1, DCONJG(TAUP1(I)), - $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + CALL ZLARF1F( 'L', P-I, Q-I, X11(I+1,I), 1, + $ CONJG(TAUP1(I)), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) END IF - X21(I,I) = ONE - CALL ZLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, DCONJG(TAUP2(I)), - $ X21(I,I+1), LDX21, WORK(ILARF) ) + CALL ZLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, + $ CONJG(TAUP2(I)), X21(I,I+1), LDX21, + $ WORK(ILARF) ) * END DO * @@ -321,9 +321,9 @@ SUBROUTINE ZUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * DO I = P + 1, Q CALL ZLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) - X21(I,I) = ONE - CALL ZLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, DCONJG(TAUP2(I)), - $ X21(I,I+1), LDX21, WORK(ILARF) ) + CALL ZLARF1F( 'L', M-P-I+1, Q-I, X21(I,I), 1, + $ CONJG(TAUP2(I)), X21(I,I+1), LDX21, + $ WORK(ILARF) ) END DO * RETURN diff --git a/lapack-netlib/SRC/zunbdb3.f b/lapack-netlib/SRC/zunbdb3.f index 9f32a7a886..057fe46772 100644 --- a/lapack-netlib/SRC/zunbdb3.f +++ b/lapack-netlib/SRC/zunbdb3.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNBDB3 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -172,7 +170,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup unbdb3 * *> \par Further Details: * ===================== @@ -196,8 +194,10 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE ZUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE ZUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -214,10 +214,6 @@ SUBROUTINE ZUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * * ==================================================================== * -* .. Parameters .. - COMPLEX*16 ONE - PARAMETER ( ONE = (1.0D0,0.0D0) ) -* .. * .. Local Scalars .. DOUBLE PRECISION C, S INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, @@ -225,7 +221,8 @@ SUBROUTINE ZUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, ZLACGV, XERBLA + EXTERNAL ZLARF1F, ZLARFGP, ZUNBDB5, ZDROT, ZLACGV, + $ XERBLA * .. * .. External Functions .. DOUBLE PRECISION DZNRM2 @@ -286,11 +283,10 @@ SUBROUTINE ZUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, CALL ZLACGV( Q-I+1, X21(I,I), LDX21 ) CALL ZLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) S = DBLE( X21(I,I) ) - X21(I,I) = ONE - CALL ZLARF( 'R', P-I+1, Q-I+1, X21(I,I), LDX21, TAUQ1(I), - $ X11(I,I), LDX11, WORK(ILARF) ) - CALL ZLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), - $ X21(I+1,I), LDX21, WORK(ILARF) ) + CALL ZLARF1F( 'R', P-I+1, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I,I), LDX11, WORK(ILARF) ) + CALL ZLARF1F( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) CALL ZLACGV( Q-I+1, X21(I,I), LDX21 ) C = SQRT( DZNRM2( P-I+1, X11(I,I), 1 )**2 $ + DZNRM2( M-P-I, X21(I+1,I), 1 )**2 ) @@ -301,28 +297,25 @@ SUBROUTINE ZUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ WORK(IORBDB5), LORBDB5, CHILDINFO ) CALL ZLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) IF( I .LT. M-P ) THEN - CALL ZLARFGP( M-P-I, X21(I+1,I), X21(I+2,I), 1, TAUP2(I) ) + CALL ZLARFGP( M-P-I, X21(I+1,I), X21(I+2,I), 1, + $ TAUP2(I) ) PHI(I) = ATAN2( DBLE( X21(I+1,I) ), DBLE( X11(I,I) ) ) C = COS( PHI(I) ) S = SIN( PHI(I) ) - X21(I+1,I) = ONE - CALL ZLARF( 'L', M-P-I, Q-I, X21(I+1,I), 1, - $ DCONJG(TAUP2(I)), X21(I+1,I+1), LDX21, - $ WORK(ILARF) ) + CALL ZLARF1F( 'L', M-P-I, Q-I, X21(I+1,I), 1, + $ CONJG(TAUP2(I)), + $ X21(I+1,I+1), LDX21, WORK(ILARF) ) END IF - X11(I,I) = ONE - CALL ZLARF( 'L', P-I+1, Q-I, X11(I,I), 1, DCONJG(TAUP1(I)), - $ X11(I,I+1), LDX11, WORK(ILARF) ) -* + CALL ZLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, CONJG(TAUP1(I)), + $ X11(I,I+1), LDX11, WORK(ILARF) ) END DO * * Reduce the bottom-right portion of X11 to the identity matrix * DO I = M-P + 1, Q CALL ZLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) - X11(I,I) = ONE - CALL ZLARF( 'L', P-I+1, Q-I, X11(I,I), 1, DCONJG(TAUP1(I)), - $ X11(I,I+1), LDX11, WORK(ILARF) ) + CALL ZLARF1F( 'L', P-I+1, Q-I, X11(I,I), 1, CONJG(TAUP1(I)), + $ X11(I,I+1), LDX11, WORK(ILARF) ) END DO * RETURN diff --git a/lapack-netlib/SRC/zunbdb4.f b/lapack-netlib/SRC/zunbdb4.f index a1db5eb793..31d8ec40b7 100644 --- a/lapack-netlib/SRC/zunbdb4.f +++ b/lapack-netlib/SRC/zunbdb4.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNBDB4 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -183,7 +181,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup unbdb4 * *> \par Further Details: * ===================== @@ -207,9 +205,11 @@ *> Algorithms, 50(1):33-65, 2009. *> * ===================================================================== - SUBROUTINE ZUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + SUBROUTINE ZUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ PHI, $ TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, $ INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -238,7 +238,8 @@ SUBROUTINE ZUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, LOGICAL LQUERY * .. * .. External Subroutines .. - EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, ZSCAL, ZLACGV, + EXTERNAL ZLARF1F, ZLARFGP, ZUNBDB5, ZDROT, ZSCAL, + $ ZLACGV, $ XERBLA * .. * .. External Functions .. @@ -302,44 +303,45 @@ SUBROUTINE ZUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, $ LORBDB5, CHILDINFO ) CALL ZSCAL( P, NEGONE, PHANTOM(1), 1 ) CALL ZLARFGP( P, PHANTOM(1), PHANTOM(2), 1, TAUP1(1) ) - CALL ZLARFGP( M-P, PHANTOM(P+1), PHANTOM(P+2), 1, TAUP2(1) ) + CALL ZLARFGP( M-P, PHANTOM(P+1), PHANTOM(P+2), 1, + $ TAUP2(1) ) THETA(I) = ATAN2( DBLE( PHANTOM(1) ), DBLE( PHANTOM(P+1) ) ) C = COS( THETA(I) ) S = SIN( THETA(I) ) - PHANTOM(1) = ONE - PHANTOM(P+1) = ONE - CALL ZLARF( 'L', P, Q, PHANTOM(1), 1, DCONJG(TAUP1(1)), X11, - $ LDX11, WORK(ILARF) ) - CALL ZLARF( 'L', M-P, Q, PHANTOM(P+1), 1, DCONJG(TAUP2(1)), - $ X21, LDX21, WORK(ILARF) ) + CALL ZLARF1F( 'L', P, Q, PHANTOM(1), 1, CONJG(TAUP1(1)), + $ X11, + $ LDX11, WORK(ILARF) ) + CALL ZLARF1F( 'L', M-P, Q, PHANTOM(P+1), 1, + $ CONJG(TAUP2(1)), + $ X21, LDX21, WORK(ILARF) ) ELSE CALL ZUNBDB5( P-I+1, M-P-I+1, Q-I+1, X11(I,I-1), 1, $ X21(I,I-1), 1, X11(I,I), LDX11, X21(I,I), $ LDX21, WORK(IORBDB5), LORBDB5, CHILDINFO ) CALL ZSCAL( P-I+1, NEGONE, X11(I,I-1), 1 ) - CALL ZLARFGP( P-I+1, X11(I,I-1), X11(I+1,I-1), 1, TAUP1(I) ) + CALL ZLARFGP( P-I+1, X11(I,I-1), X11(I+1,I-1), 1, + $ TAUP1(I) ) CALL ZLARFGP( M-P-I+1, X21(I,I-1), X21(I+1,I-1), 1, $ TAUP2(I) ) THETA(I) = ATAN2( DBLE( X11(I,I-1) ), DBLE( X21(I,I-1) ) ) C = COS( THETA(I) ) S = SIN( THETA(I) ) - X11(I,I-1) = ONE - X21(I,I-1) = ONE - CALL ZLARF( 'L', P-I+1, Q-I+1, X11(I,I-1), 1, - $ DCONJG(TAUP1(I)), X11(I,I), LDX11, WORK(ILARF) ) - CALL ZLARF( 'L', M-P-I+1, Q-I+1, X21(I,I-1), 1, - $ DCONJG(TAUP2(I)), X21(I,I), LDX21, WORK(ILARF) ) + CALL ZLARF1F( 'L', P-I+1, Q-I+1, X11(I,I-1), 1, + $ CONJG(TAUP1(I)), X11(I,I), LDX11, + $ WORK(ILARF) ) + CALL ZLARF1F( 'L', M-P-I+1, Q-I+1, X21(I,I-1), 1, + $ CONJG(TAUP2(I)), X21(I,I), LDX21, + $ WORK(ILARF) ) END IF * CALL ZDROT( Q-I+1, X11(I,I), LDX11, X21(I,I), LDX21, S, -C ) CALL ZLACGV( Q-I+1, X21(I,I), LDX21 ) CALL ZLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) C = DBLE( X21(I,I) ) - X21(I,I) = ONE - CALL ZLARF( 'R', P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), - $ X11(I+1,I), LDX11, WORK(ILARF) ) - CALL ZLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), - $ X21(I+1,I), LDX21, WORK(ILARF) ) + CALL ZLARF1F( 'R', P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL ZLARF1F( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) CALL ZLACGV( Q-I+1, X21(I,I), LDX21 ) IF( I .LT. M-Q ) THEN S = SQRT( DZNRM2( P-I, X11(I+1,I), 1 )**2 @@ -354,11 +356,10 @@ SUBROUTINE ZUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, DO I = M - Q + 1, P CALL ZLACGV( Q-I+1, X11(I,I), LDX11 ) CALL ZLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) - X11(I,I) = ONE - CALL ZLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X11(I+1,I), LDX11, WORK(ILARF) ) - CALL ZLARF( 'R', Q-P, Q-I+1, X11(I,I), LDX11, TAUQ1(I), - $ X21(M-Q+1,I), LDX21, WORK(ILARF) ) + CALL ZLARF1F( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL ZLARF1F( 'R', Q-P, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X21(M-Q+1,I), LDX21, WORK(ILARF) ) CALL ZLACGV( Q-I+1, X11(I,I), LDX11 ) END DO * @@ -366,11 +367,12 @@ SUBROUTINE ZUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, * DO I = P + 1, Q CALL ZLACGV( Q-I+1, X21(M-Q+I-P,I), LDX21 ) - CALL ZLARFGP( Q-I+1, X21(M-Q+I-P,I), X21(M-Q+I-P,I+1), LDX21, + CALL ZLARFGP( Q-I+1, X21(M-Q+I-P,I), X21(M-Q+I-P,I+1), + $ LDX21, $ TAUQ1(I) ) - X21(M-Q+I-P,I) = ONE - CALL ZLARF( 'R', Q-I, Q-I+1, X21(M-Q+I-P,I), LDX21, TAUQ1(I), - $ X21(M-Q+I-P+1,I), LDX21, WORK(ILARF) ) + CALL ZLARF1F( 'R', Q-I, Q-I+1, X21(M-Q+I-P,I), LDX21, + $ TAUQ1(I), + $ X21(M-Q+I-P+1,I), LDX21, WORK(ILARF) ) CALL ZLACGV( Q-I+1, X21(M-Q+I-P,I), LDX21 ) END DO * diff --git a/lapack-netlib/SRC/zung2l.f b/lapack-netlib/SRC/zung2l.f index add5cb946b..7f5cf64b00 100644 --- a/lapack-netlib/SRC/zung2l.f +++ b/lapack-netlib/SRC/zung2l.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNG2L + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,10 +105,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup ung2l * * ===================================================================== SUBROUTINE ZUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -134,7 +133,7 @@ SUBROUTINE ZUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, II, J, L * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLARF, ZSCAL + EXTERNAL XERBLA, ZLARF1L, ZSCAL * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -178,8 +177,9 @@ SUBROUTINE ZUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) * Apply H(i) to A(1:m-k+i,1:n-k+i) from the left * A( M-N+II, II ) = ONE - CALL ZLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, - $ LDA, WORK ) + CALL ZLARF1L( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), + $ A, + $ LDA, WORK ) CALL ZSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) A( M-N+II, II ) = ONE - TAU( I ) * diff --git a/lapack-netlib/SRC/zung2r.f b/lapack-netlib/SRC/zung2r.f index 2823b7ebdd..56374be425 100644 --- a/lapack-netlib/SRC/zung2r.f +++ b/lapack-netlib/SRC/zung2r.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNG2R + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,10 +105,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup ung2r * * ===================================================================== SUBROUTINE ZUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -134,7 +133,7 @@ SUBROUTINE ZUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, J, L * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLARF, ZSCAL + EXTERNAL XERBLA, ZLARF1F, ZSCAL * .. * .. Intrinsic Functions .. INTRINSIC MAX @@ -177,9 +176,8 @@ SUBROUTINE ZUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) * Apply H(i) to A(i:m,i:n) from the left * IF( I.LT.N ) THEN - A( I, I ) = ONE - CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), - $ A( I, I+1 ), LDA, WORK ) + CALL ZLARF1F( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) END IF IF( I.LT.M ) $ CALL ZSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) diff --git a/lapack-netlib/SRC/zungl2.f b/lapack-netlib/SRC/zungl2.f index e7a0b59603..24f41b9be8 100644 --- a/lapack-netlib/SRC/zungl2.f +++ b/lapack-netlib/SRC/zungl2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNGL2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -106,10 +104,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup ungl2 * * ===================================================================== SUBROUTINE ZUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -133,7 +132,7 @@ SUBROUTINE ZUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, J, L * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLACGV, ZLARF, ZSCAL + EXTERNAL XERBLA, ZLACGV, ZLARF1F, ZSCAL * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX @@ -182,9 +181,9 @@ SUBROUTINE ZUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) IF( I.LT.N ) THEN CALL ZLACGV( N-I, A( I, I+1 ), LDA ) IF( I.LT.M ) THEN - A( I, I ) = ONE - CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, - $ DCONJG( TAU( I ) ), A( I+1, I ), LDA, WORK ) + CALL ZLARF1F( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ CONJG( TAU( I ) ), A( I+1, I ), LDA, + $ WORK ) END IF CALL ZSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) CALL ZLACGV( N-I, A( I, I+1 ), LDA ) diff --git a/lapack-netlib/SRC/zungr2.f b/lapack-netlib/SRC/zungr2.f index 034ff4fe41..f24a455fe6 100644 --- a/lapack-netlib/SRC/zungr2.f +++ b/lapack-netlib/SRC/zungr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNGR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -107,10 +105,11 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup ungr2 * * ===================================================================== SUBROUTINE ZUNGR2( M, N, K, A, LDA, TAU, WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -134,7 +133,7 @@ SUBROUTINE ZUNGR2( M, N, K, A, LDA, TAU, WORK, INFO ) INTEGER I, II, J, L * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLACGV, ZLARF, ZSCAL + EXTERNAL XERBLA, ZLACGV, ZLARF1L, ZSCAL * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX @@ -182,9 +181,8 @@ SUBROUTINE ZUNGR2( M, N, K, A, LDA, TAU, WORK, INFO ) * Apply H(i)**H to A(1:m-k+i,1:n-k+i) from the right * CALL ZLACGV( N-M+II-1, A( II, 1 ), LDA ) - A( II, N-M+II ) = ONE - CALL ZLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, - $ DCONJG( TAU( I ) ), A, LDA, WORK ) + CALL ZLARF1L( 'Right', II-1, N-M+II, A( II, 1 ), LDA, + $ CONJG( TAU( I ) ), A, LDA, WORK ) CALL ZSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA ) CALL ZLACGV( N-M+II-1, A( II, 1 ), LDA ) A( II, N-M+II ) = ONE - DCONJG( TAU( I ) ) diff --git a/lapack-netlib/SRC/zunm2l.f b/lapack-netlib/SRC/zunm2l.f index 48c2dbfc0c..2756bf9811 100644 --- a/lapack-netlib/SRC/zunm2l.f +++ b/lapack-netlib/SRC/zunm2l.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNM2L + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -151,11 +149,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup unm2l * * ===================================================================== SUBROUTINE ZUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -178,14 +177,14 @@ SUBROUTINE ZUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, MI, NI, NQ - COMPLEX*16 AII, TAUI + COMPLEX*16 TAUI * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLARF + EXTERNAL XERBLA, ZLARF1L * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX @@ -266,10 +265,8 @@ SUBROUTINE ZUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, ELSE TAUI = DCONJG( TAU( I ) ) END IF - AII = A( NQ-K+I, I ) - A( NQ-K+I, I ) = ONE - CALL ZLARF( SIDE, MI, NI, A( 1, I ), 1, TAUI, C, LDC, WORK ) - A( NQ-K+I, I ) = AII + CALL ZLARF1L( SIDE, MI, NI, A( 1, I ), 1, TAUI, C, LDC, + $ WORK ) 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/zunm2r.f b/lapack-netlib/SRC/zunm2r.f index aec5a8bcae..8e42228a7f 100644 --- a/lapack-netlib/SRC/zunm2r.f +++ b/lapack-netlib/SRC/zunm2r.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNM2R + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -151,11 +149,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup unm2r * * ===================================================================== SUBROUTINE ZUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -178,14 +177,14 @@ SUBROUTINE ZUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ - COMPLEX*16 AII, TAUI + COMPLEX*16 TAUI * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLARF + EXTERNAL XERBLA, ZLARF1F * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX @@ -270,11 +269,9 @@ SUBROUTINE ZUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, ELSE TAUI = DCONJG( TAU( I ) ) END IF - AII = A( I, I ) - A( I, I ) = ONE - CALL ZLARF( SIDE, MI, NI, A( I, I ), 1, TAUI, C( IC, JC ), LDC, + CALL ZLARF1F( SIDE, MI, NI, A( I, I ), 1, TAUI, C( IC, JC ), + $ LDC, $ WORK ) - A( I, I ) = AII 10 CONTINUE RETURN * diff --git a/lapack-netlib/SRC/zunml2.f b/lapack-netlib/SRC/zunml2.f index f47f768b08..969d586d17 100644 --- a/lapack-netlib/SRC/zunml2.f +++ b/lapack-netlib/SRC/zunml2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNML2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -151,11 +149,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup unml2 * * ===================================================================== SUBROUTINE ZUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -178,14 +177,14 @@ SUBROUTINE ZUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ - COMPLEX*16 AII, TAUI + COMPLEX*16 TAUI * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLACGV, ZLARF + EXTERNAL XERBLA, ZLACGV, ZLARF1F * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX @@ -272,11 +271,8 @@ SUBROUTINE ZUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, END IF IF( I.LT.NQ ) $ CALL ZLACGV( NQ-I, A( I, I+1 ), LDA ) - AII = A( I, I ) - A( I, I ) = ONE - CALL ZLARF( SIDE, MI, NI, A( I, I ), LDA, TAUI, C( IC, JC ), - $ LDC, WORK ) - A( I, I ) = AII + CALL ZLARF1F( SIDE, MI, NI, A( I, I ), LDA, TAUI, C( IC, + $ JC ), LDC, WORK ) IF( I.LT.NQ ) $ CALL ZLACGV( NQ-I, A( I, I+1 ), LDA ) 10 CONTINUE diff --git a/lapack-netlib/SRC/zunmr2.f b/lapack-netlib/SRC/zunmr2.f index 3685e9c7c6..6696a7f8f2 100644 --- a/lapack-netlib/SRC/zunmr2.f +++ b/lapack-netlib/SRC/zunmr2.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUNMR2 + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -151,11 +149,12 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup unmr2 * * ===================================================================== SUBROUTINE ZUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, $ WORK, INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -178,14 +177,14 @@ SUBROUTINE ZUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * .. Local Scalars .. LOGICAL LEFT, NOTRAN INTEGER I, I1, I2, I3, MI, NI, NQ - COMPLEX*16 AII, TAUI + COMPLEX*16 TAUI * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLACGV, ZLARF + EXTERNAL XERBLA, ZLACGV, ZLARF1L * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX @@ -267,10 +266,8 @@ SUBROUTINE ZUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, TAUI = TAU( I ) END IF CALL ZLACGV( NQ-K+I-1, A( I, 1 ), LDA ) - AII = A( I, NQ-K+I ) - A( I, NQ-K+I ) = ONE - CALL ZLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAUI, C, LDC, WORK ) - A( I, NQ-K+I ) = AII + CALL ZLARF1L( SIDE, MI, NI, A( I, 1 ), LDA, TAUI, C, LDC, + $ WORK ) CALL ZLACGV( NQ-K+I-1, A( I, 1 ), LDA ) 10 CONTINUE RETURN diff --git a/lapack-netlib/SRC/zupmtr.f b/lapack-netlib/SRC/zupmtr.f index c195800261..97b0a67451 100644 --- a/lapack-netlib/SRC/zupmtr.f +++ b/lapack-netlib/SRC/zupmtr.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZUPMTR + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -142,11 +140,13 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup upmtr * * ===================================================================== - SUBROUTINE ZUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, + SUBROUTINE ZUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, + $ WORK, $ INFO ) + IMPLICIT NONE * * -- LAPACK computational routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -169,14 +169,14 @@ SUBROUTINE ZUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, * .. Local Scalars .. LOGICAL FORWRD, LEFT, NOTRAN, UPPER INTEGER I, I1, I2, I3, IC, II, JC, MI, NI, NQ - COMPLEX*16 AII, TAUI + COMPLEX*16 TAUI * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZLARF + EXTERNAL XERBLA, ZLARF1L, ZLARF1F * .. * .. Intrinsic Functions .. INTRINSIC DCONJG, MAX @@ -265,11 +265,8 @@ SUBROUTINE ZUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, ELSE TAUI = DCONJG( TAU( I ) ) END IF - AII = AP( II ) - AP( II ) = ONE - CALL ZLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAUI, C, LDC, - $ WORK ) - AP( II ) = AII + CALL ZLARF1L( SIDE, MI, NI, AP( II-I+1 ), 1, TAUI, C, + $ LDC, WORK ) * IF( FORWRD ) THEN II = II + I + 2 @@ -305,8 +302,6 @@ SUBROUTINE ZUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, END IF * DO 20 I = I1, I2, I3 - AII = AP( II ) - AP( II ) = ONE IF( LEFT ) THEN * * H(i) or H(i)**H is applied to C(i+1:m,1:n) @@ -328,9 +323,8 @@ SUBROUTINE ZUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, ELSE TAUI = DCONJG( TAU( I ) ) END IF - CALL ZLARF( SIDE, MI, NI, AP( II ), 1, TAUI, C( IC, JC ), - $ LDC, WORK ) - AP( II ) = AII + CALL ZLARF1F( SIDE, MI, NI, AP( II ), 1, TAUI, C( IC, + $ JC ), LDC, WORK ) * IF( FORWRD ) THEN II = II + NQ - I + 1 diff --git a/lapack-netlib/TESTING/EIG/CMakeLists.txt b/lapack-netlib/TESTING/EIG/CMakeLists.txt index d9c34fe985..324288ce9e 100644 --- a/lapack-netlib/TESTING/EIG/CMakeLists.txt +++ b/lapack-netlib/TESTING/EIG/CMakeLists.txt @@ -112,20 +112,20 @@ endmacro() if(BUILD_SINGLE) add_eig_executable(xeigtsts ${SEIGTST} ${SCIGTST} ${AEIGTST}) -add_eig_executable(xdmdeigtsts ${SDMDEIGTST}) +add_eig_executable(xdmdeigtsts ${SDMDEIGTST} ${AEIGTST}) endif() if(BUILD_COMPLEX) add_eig_executable(xeigtstc ${CEIGTST} ${SCIGTST} ${AEIGTST}) -add_eig_executable(xdmdeigtstc ${CDMDEIGTST}) +add_eig_executable(xdmdeigtstc ${CDMDEIGTST} ${AEIGTST}) endif() if(BUILD_DOUBLE) add_eig_executable(xeigtstd ${DEIGTST} ${DZIGTST} ${AEIGTST}) -add_eig_executable(xdmdeigtstd ${DDMDEIGTST}) +add_eig_executable(xdmdeigtstd ${DDMDEIGTST} ${AEIGTST}) endif() if(BUILD_COMPLEX16) add_eig_executable(xeigtstz ${ZEIGTST} ${DZIGTST} ${AEIGTST}) -add_eig_executable(xdmdeigtstz ${ZDMDEIGTST}) +add_eig_executable(xdmdeigtstz ${ZDMDEIGTST} ${AEIGTST}) endif() diff --git a/lapack-netlib/TESTING/EIG/Makefile b/lapack-netlib/TESTING/EIG/Makefile index 9cf0fc95e5..a5d5d45617 100644 --- a/lapack-netlib/TESTING/EIG/Makefile +++ b/lapack-netlib/TESTING/EIG/Makefile @@ -135,16 +135,16 @@ complex: xeigtstc xdmdeigtstc double: xeigtstd xdmdeigtstd complex16: xeigtstz xdmdeigtstz -xdmdeigtsts: $(SDMDEIGTST) $(TMGLIB) ../$(LAPACKLIB) $(BLASLIB) +xdmdeigtsts: $(SDMDEIGTST) $(AEIGTST) $(TMGLIB) ../$(LAPACKLIB) $(BLASLIB) $(LOADER) $(FFLAGS) $(LDFLAGS) -o $@ $^ -xdmdeigtstc: $(CDMDEIGTST) $(TMGLIB) ../$(LAPACKLIB) $(BLASLIB) +xdmdeigtstc: $(CDMDEIGTST) $(AEIGTST) $(TMGLIB) ../$(LAPACKLIB) $(BLASLIB) $(LOADER) $(FFLAGS) $(LDFLAGS) -o $@ $^ -xdmdeigtstd: $(DDMDEIGTST) $(TMGLIB) ../$(LAPACKLIB) $(BLASLIB) +xdmdeigtstd: $(DDMDEIGTST) $(AEIGTST) $(TMGLIB) ../$(LAPACKLIB) $(BLASLIB) $(LOADER) $(FFLAGS) $(LDFLAGS) -o $@ $^ -xdmdeigtstz: $(ZDMDEIGTST) $(TMGLIB) ../$(LAPACKLIB) $(BLASLIB) +xdmdeigtstz: $(ZDMDEIGTST) $(AEIGTST) $(TMGLIB) ../$(LAPACKLIB) $(BLASLIB) $(LOADER) $(FFLAGS) $(LDFLAGS) -o $@ $^ xeigtsts: $(SEIGTST) $(SCIGTST) $(AEIGTST) $(TMGLIB) ../$(LAPACKLIB) $(BLASLIB) diff --git a/lapack-netlib/TESTING/EIG/cchkdmd.f90 b/lapack-netlib/TESTING/EIG/cchkdmd.f90 index a9c181da9b..f8aa19a30a 100644 --- a/lapack-netlib/TESTING/EIG/cchkdmd.f90 +++ b/lapack-netlib/TESTING/EIG/cchkdmd.f90 @@ -1,721 +1,723 @@ -! This is a test program for checking the implementations of -! the implementations of the following subroutines -! -! CGEDMD, for computation of the -! Dynamic Mode Decomposition (DMD) -! CGEDMDQ, for computation of a -! QR factorization based compressed DMD -! -! Developed and supported by: -! =========================== -! Developed and coded by Zlatko Drmac, Faculty of Science, -! University of Zagreb; drmac@math.hr -! In cooperation with -! AIMdyn Inc., Santa Barbara, CA. -! ======================================================== -! How to run the code (compiler, link info) -! ======================================================== -! Compile as FORTRAN 90 (or later) and link with BLAS and -! LAPACK libraries. -! NOTE: The code is developed and tested on top of the -! Intel MKL library (versions 2022.0.3 and 2022.2.0), -! using the Intel Fortran compiler. -! -! For developers of the C++ implementation -! ======================================================== -! See the LAPACK++ and Template Numerical Toolkit (TNT) -! -! Note on a development of the GPU HP implementation -! ======================================================== -! Work in progress. See CUDA, MAGMA, SLATE. -! NOTE: The four SVD subroutines used in this code are -! included as a part of R&D and for the completeness. -! This was also an opportunity to test those SVD codes. -! If the scaling option is used all four are essentially -! equally good. For implementations on HP platforms, -! one can use whichever SVD is available. -!............................................................ - -!............................................................ -!............................................................ -! - PROGRAM DMD_TEST - - use iso_fortran_env - IMPLICIT NONE - integer, parameter :: WP = real32 -!............................................................ - REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP - REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP - - COMPLEX(KIND=WP), PARAMETER :: CONE = ( 1.0_WP, 0.0_WP ) - COMPLEX(KIND=WP), PARAMETER :: CZERO = ( 0.0_WP, 0.0_WP ) -!............................................................ - REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: RES, & - RES1, RESEX, SINGVX, SINGVQX, WORK - INTEGER , ALLOCATABLE, DIMENSION(:) :: IWORK - REAL(KIND=WP) :: WDUMMY(2) - INTEGER :: IDUMMY(4), ISEED(4) - REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, EPS, & - TOL, TOL2, SVDIFF, TMP, TMP_AU, & - TMP_FQR, TMP_REZ, TMP_REZQ, TMP_XW, & - TMP_EX -!............................................................ - COMPLEX(KIND=WP) :: CMAX - INTEGER :: LCWORK - COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:,:) :: A, AC, & - AU, F, F0, F1, S, W, & - X, X0, Y, Y0, Y1, Z, Z1 - COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:) :: CDA, CDR, & - CDL, CEIGS, CEIGSA, CWORK - COMPLEX(KIND=WP) :: CDUMMY(22), CDUM2X2(2,2) -!............................................................ - INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY, & - LDZ, LIWORK, LWORK, M, N, LLOOP, NRNK - INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j, & - NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ, & - NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, & - NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD - INTEGER :: iNRNK, iWHTSVD, K_traj, LWMINOPT - CHARACTER :: GRADE, JOBREF, JOBZ, PIVTNG, RSIGN, & - SCALE, RESIDS, WANTQ, WANTR - LOGICAL :: TEST_QRDMD - -!..... external subroutines (BLAS and LAPACK) - EXTERNAL CAXPY, CGEEV, CGEMM, CGEMV, CLASCL -!.....external subroutines DMD package -! subroutines under test - EXTERNAL CGEDMD, CGEDMDQ -!..... external functions (BLAS and LAPACK) - EXTERNAL SCNRM2, SLAMCH - REAL(KIND=WP) :: SCNRM2, SLAMCH - EXTERNAL CLANGE - REAL(KIND=WP) :: CLANGE - EXTERNAL ICAMAX - INTEGER ICAMAX - EXTERNAL LSAME - LOGICAL LSAME - - INTRINSIC ABS, INT, MIN, MAX, SIGN -!............................................................ - - - WRITE(*,*) 'COMPLEX CODE TESTING' - - ! The test is always in pairs : ( CGEDMD and CGEDMDQ) - ! because the test includes comparing the results (in pairs). -!..................................................................................... - ! This code by default performs tests on CGEDMDQ - ! Since the QR factorizations based algorithm is designed for - ! single trajectory data, only single trajectory tests will - ! be performed with xGEDMDQ. - - WANTQ = 'Q' - WANTR = 'R' -!................................................................................. - - EPS = SLAMCH( 'P' ) ! machine precision WP - - ! Global counters of failures of some particular tests - NFAIL = 0 - NFAIL_REZ = 0 - NFAIL_REZQ = 0 - NFAIL_Z_XV = 0 - NFAIL_F_QR = 0 - NFAIL_AU = 0 - NFAIL_SVDIFF = 0 - NFAIL_TOTAL = 0 - NFAILQ_TOTAL = 0 - - DO LLOOP = 1, 4 - - WRITE(*,*) 'L Loop Index = ', LLOOP - - ! Set the dimensions of the problem ... - READ(*,*) M - WRITE(*,*) 'M = ', M - ! ... and the number of snapshots. - READ(*,*) N - WRITE(*,*) 'N = ', N - - ! Test the dimensions - IF ( ( MIN(M,N) == 0 ) .OR. ( M < N ) ) THEN - WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.' - STOP - END IF -!............. - ! The seed inside the LLOOP so that each pass can be reproduced easily. - ISEED(1) = 4 - ISEED(2) = 3 - ISEED(3) = 2 - ISEED(4) = 1 - - LDA = M - LDF = M - LDX = M - LDY = M - LDW = N - LDZ = M - LDAU = M - LDS = N - - TMP_XW = ZERO - TMP_AU = ZERO - TMP_REZ = ZERO - TMP_REZQ = ZERO - SVDIFF = ZERO - TMP_EX = ZERO - - ALLOCATE( A(LDA,M) ) - ALLOCATE( AC(LDA,M) ) - ALLOCATE( F(LDF,N+1) ) - ALLOCATE( F0(LDF,N+1) ) - ALLOCATE( F1(LDF,N+1) ) - ALLOCATE( X(LDX,N) ) - ALLOCATE( X0(LDX,N) ) - ALLOCATE( Y(LDY,N+1) ) - ALLOCATE( Y0(LDY,N+1) ) - ALLOCATE( Y1(LDY,N+1) ) - ALLOCATE( AU(LDAU,N) ) - ALLOCATE( W(LDW,N) ) - ALLOCATE( S(LDS,N) ) - ALLOCATE( Z(LDZ,N) ) - ALLOCATE( Z1(LDZ,N) ) - ALLOCATE( RES(N) ) - ALLOCATE( RES1(N) ) - ALLOCATE( RESEX(N) ) - ALLOCATE( CEIGS(N) ) - ALLOCATE( SINGVX(N) ) - ALLOCATE( SINGVQX(N) ) - - TOL = 10*M*EPS - TOL2 = 10*M*N*EPS - -!............. - - DO K_traj = 1, 2 - ! Number of intial conditions in the simulation/trajectories (1 or 2) - - COND = 1.0D4 - CMAX = (1.0D1,1.0D1) - RSIGN = 'F' - GRADE = 'N' - MODEL = 6 - CONDL = 1.0D1 - MODER = 6 - CONDR = 1.0D1 - PIVTNG = 'N' - ! Loop over all parameter MODE values for CLATMR (+-1,..,+-6) - - DO MODE = 1, 6 - - ALLOCATE( IWORK(2*M) ) - ALLOCATE( CDA(M) ) - ALLOCATE( CDL(M) ) - ALLOCATE( CDR(M) ) - - CALL CLATMR( M, M, 'N', ISEED, 'N', CDA, MODE, COND, & - CMAX, RSIGN, GRADE, CDL, MODEL, CONDL, & - CDR, MODER, CONDR, PIVTNG, IWORK, M, M, & - ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO ) - DEALLOCATE( CDR ) - DEALLOCATE( CDL ) - DEALLOCATE( CDA ) - DEALLOCATE( IWORK ) - - LCWORK = MAX(1,2*M) - ALLOCATE( CEIGSA(M) ) - ALLOCATE( CWORK(LCWORK) ) - ALLOCATE( WORK(2*M) ) - AC(1:M,1:M) = A(1:M,1:M) - CALL CGEEV( 'N','N', M, AC, LDA, CEIGSA, CDUM2X2, 2, & - CDUM2X2, 2, CWORK, LCWORK, WORK, INFO ) ! LAPACK CALL - DEALLOCATE(WORK) - DEALLOCATE(CWORK) - - TMP = ABS(CEIGSA(ICAMAX(M, CEIGSA, 1))) ! The spectral radius of A - ! Scale the matrix A to have unit spectral radius. - CALL CLASCL( 'G',0, 0, TMP, ONE, M, M, & - A, LDA, INFO ) - CALL CLASCL( 'G',0, 0, TMP, ONE, M, 1, & - CEIGSA, M, INFO ) - ANORM = CLANGE( 'F', M, M, A, LDA, WDUMMY ) - - IF ( K_traj == 2 ) THEN - ! generate data as two trajectories - ! with two inital conditions - CALL CLARNV(2, ISEED, M, F(1,1) ) - DO i = 1, N/2 - CALL CGEMV( 'N', M, M, CONE, A, LDA, F(1,i), 1, & - CZERO, F(1,i+1), 1 ) - END DO - X0(1:M,1:N/2) = F(1:M,1:N/2) - Y0(1:M,1:N/2) = F(1:M,2:N/2+1) - - CALL CLARNV(2, ISEED, M, F(1,1) ) - DO i = 1, N-N/2 - CALL CGEMV( 'N', M, M, CONE, A, LDA, F(1,i), 1, & - CZERO, F(1,i+1), 1 ) - END DO - X0(1:M,N/2+1:N) = F(1:M,1:N-N/2) - Y0(1:M,N/2+1:N) = F(1:M,2:N-N/2+1) - ELSE - CALL CLARNV(2, ISEED, M, F(1,1) ) - DO i = 1, N - CALL CGEMV( 'N', M, M, CONE, A, M, F(1,i), 1, & - CZERO, F(1,i+1), 1 ) - END DO - F0(1:M,1:N+1) = F(1:M,1:N+1) - X0(1:M,1:N) = F0(1:M,1:N) - Y0(1:M,1:N) = F0(1:M,2:N+1) - END IF - - DEALLOCATE( CEIGSA ) -!........................................................................ - - DO iJOBZ = 1, 4 - - SELECT CASE ( iJOBZ ) - CASE(1) - JOBZ = 'V' - RESIDS = 'R' - CASE(2) - JOBZ = 'V' - RESIDS = 'N' - CASE(3) - JOBZ = 'F' - RESIDS = 'N' - CASE(4) - JOBZ = 'N' - RESIDS = 'N' - END SELECT - - DO iJOBREF = 1, 3 - - SELECT CASE ( iJOBREF ) - CASE(1) - JOBREF = 'R' - CASE(2) - JOBREF = 'E' - CASE(3) - JOBREF = 'N' - END SELECT - - DO iSCALE = 1, 4 - - SELECT CASE ( iSCALE ) - CASE(1) - SCALE = 'S' - CASE(2) - SCALE = 'C' - CASE(3) - SCALE = 'Y' - CASE(4) - SCALE = 'N' - END SELECT - - DO iNRNK = -1, -2, -1 - NRNK = iNRNK - - DO iWHTSVD = 1, 3 - ! Check all four options to compute the POD basis - ! via the SVD. - WHTSVD = iWHTSVD - - DO LWMINOPT = 1, 2 - ! Workspace query for the minimal (1) and for the optimal - ! (2) workspace lengths determined by workspace query. - - ! CGEDMD is always tested and its results are also used for - ! comparisons with CGEDMDQ. - - X(1:M,1:N) = X0(1:M,1:N) - Y(1:M,1:N) = Y0(1:M,1:N) - - CALL CGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, X, LDX, Y, LDY, NRNK, TOL, & - K, CEIGS, Z, LDZ, RES, & - AU, LDAU, W, LDW, S, LDS, & - CDUMMY, -1, WDUMMY, -1, IDUMMY, -1, INFO ) - - IF ( (INFO .EQ. 2) .OR. ( INFO .EQ. 3 ) & - .OR. ( INFO < 0 ) ) THEN - WRITE(*,*) 'Call to CGEDMD workspace query failed. & - &Check the calling sequence and the code.' - WRITE(*,*) 'The error code is ', INFO - WRITE(*,*) 'The input parameters were ', & - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL, LDZ, LDAU, LDW, LDS - STOP - ELSE - !WRITE(*,*) '... done. Workspace length computed.' - END IF - - LCWORK = INT(CDUMMY(LWMINOPT)) - ALLOCATE(CWORK(LCWORK)) - LIWORK = IDUMMY(1) - ALLOCATE(IWORK(LIWORK)) - LWORK = INT(WDUMMY(1)) - ALLOCATE(WORK(LWORK)) - - CALL CGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, X, LDX, Y, LDY, NRNK, TOL, & - K, CEIGS, Z, LDZ, RES, & - AU, LDAU, W, LDW, S, LDS, & - CWORK, LCWORK, WORK, LWORK, IWORK, LIWORK, INFO ) - IF ( INFO /= 0 ) THEN - WRITE(*,*) 'Call to CGEDMD failed. & - &Check the calling sequence and the code.' - WRITE(*,*) 'The error code is ', INFO - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL - STOP - END IF - SINGVX(1:N) = WORK(1:N) - - !...... CGEDMD check point - IF ( LSAME(JOBZ,'V') ) THEN - ! Check that Z = X*W, on return from CGEDMD - ! This checks that the returned eigenvectors in Z are - ! the product of the SVD'POD basis returned in X - ! and the eigenvectors of the Rayleigh quotient - ! returned in W - CALL CGEMM( 'N', 'N', M, K, K, CONE, X, LDX, W, LDW, & - CZERO, Z1, LDZ ) - TMP = ZERO - DO i = 1, K - CALL CAXPY( M, -CONE, Z(1,i), 1, Z1(1,i), 1) - TMP = MAX(TMP, SCNRM2( M, Z1(1,i), 1 ) ) - END DO - TMP_XW = MAX(TMP_XW, TMP ) - IF ( TMP_XW <= TOL ) THEN - !WRITE(*,*) ' :) .... OK .........CGEDMD PASSED.' - ELSE - NFAIL_Z_XV = NFAIL_Z_XV + 1 - WRITE(*,*) ':( .................CGEDMD FAILED!', & - 'Check the code for implementation errors.' - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL - END IF - END IF - !...... CGEDMD check point - - IF ( LSAME(JOBREF,'R') ) THEN - ! The matrix A*U is returned for computing refined Ritz vectors. - ! Check that A*U is computed correctly using the formula - ! A*U = Y * V * inv(SIGMA). This depends on the - ! accuracy in the computed singular values and vectors of X. - ! See the paper for an error analysis. - ! Note that the left singular vectors of the input matrix X - ! are returned in the array X. - CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, X, LDX, & - CZERO, Z1, LDZ ) - TMP = ZERO - DO i = 1, K - CALL CAXPY( M, -CONE, AU(1,i), 1, Z1(1,i), 1) - TMP = MAX( TMP, SCNRM2( M, Z1(1,i),1 ) * & - SINGVX(K)/(ANORM*SINGVX(1)) ) - END DO - TMP_AU = MAX( TMP_AU, TMP ) - IF ( TMP <= TOL2 ) THEN - !WRITE(*,*) ':) .... OK .........CGEDMD PASSED.' - ELSE - NFAIL_AU = NFAIL_AU + 1 - WRITE(*,*) ':( .................CGEDMD FAILED!', & - 'Check the code for implementation errors.' - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL2 - END IF - ELSEIF ( LSAME(JOBREF,'E') ) THEN - ! The unscaled vectors of the Exact DMD are computed. - ! This option is included for the sake of completeness, - ! for users who prefer the Exact DMD vectors. The - ! returned vectors are in the real form, in the same way - ! as the Ritz vectors. Here we just save the vectors - ! and test them separately using a Matlab script. - CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, AU, LDAU, CZERO, Y1, LDY ) - - DO i=1, K - CALL CAXPY( M, -CEIGS(i), AU(1,i), 1, Y1(1,i), 1 ) - RESEX(i) = SCNRM2( M, Y1(1,i), 1) / SCNRM2(M,AU(1,i),1) - END DO - END IF - !...... CGEDMD check point - - IF ( LSAME(RESIDS, 'R') ) THEN - ! Compare the residuals returned by CGEDMD with the - ! explicitly computed residuals using the matrix A. - ! Compute explicitly Y1 = A*Z - CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, Z, LDZ, CZERO, Y1, LDY ) - ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms - ! of the invariant subspaces that correspond to complex conjugate - ! pairs of eigencalues. (See the description of Z in CGEDMD,) - - DO i=1, K - ! have a real eigenvalue with real eigenvector - CALL CAXPY( M, -CEIGS(i), Z(1,i), 1, Y1(1,i), 1 ) - RES1(i) = SCNRM2( M, Y1(1,i), 1) - END DO - TMP = ZERO - DO i = 1, K - TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & - SINGVX(K)/(ANORM*SINGVX(1)) ) - END DO - TMP_REZ = MAX( TMP_REZ, TMP ) - IF ( TMP <= TOL2 ) THEN - !WRITE(*,*) ':) .... OK ..........CGEDMD PASSED.' - ELSE - NFAIL_REZ = NFAIL_REZ + 1 - WRITE(*,*) ':( ..................CGEDMD FAILED!', & - 'Check the code for implementation errors.' - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL - END IF - - - IF ( LSAME(JOBREF,'E') ) THEN - TMP = ZERO - DO i = 1, K - TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) ) - END DO - TMP_EX = MAX(TMP_EX,TMP) - END IF - - END IF - - DEALLOCATE(CWORK) - DEALLOCATE(WORK) - DEALLOCATE(IWORK) - -!....................................................................................................... - - IF ( K_traj == 1 ) THEN - - F(1:M,1:N+1) = F0(1:M,1:N+1) - CALL CGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & - WHTSVD, M, N+1, F, LDF, X, LDX, Y, LDY, & - NRNK, TOL, K, CEIGS, Z, LDZ, RES, AU, & - LDAU, W, LDW, S, LDS, CDUMMY, -1, & - WDUMMY, -1, IDUMMY, -1, INFO ) - - LCWORK = INT(CDUMMY(LWMINOPT)) - ALLOCATE(CWORK(LCWORK)) - LIWORK = IDUMMY(1) - ALLOCATE(IWORK(LIWORK)) - LWORK = INT(WDUMMY(1)) - ALLOCATE(WORK(LWORK)) - - CALL CGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & - WHTSVD, M, N+1, F, LDF, X, LDX, Y, LDY, & - NRNK, TOL, KQ, CEIGS, Z, LDZ, RES, AU, & - LDAU, W, LDW, S, LDS, CWORK, LCWORK, & - WORK, LWORK, IWORK, LIWORK, INFO ) - IF ( INFO /= 0 ) THEN - WRITE(*,*) 'Call to CGEDMDQ failed. & - &Check the calling sequence and the code.' - WRITE(*,*) 'The error code is ', INFO - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, WANTQ, WANTR, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL - STOP - END IF - SINGVQX(1:N) =WORK(1:N) - - !..... ZGEDMDQ check point - - TMP = ZERO - DO i = 1, MIN(K, KQ) - TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / & - SINGVX(1) ) - END DO - SVDIFF = MAX( SVDIFF, TMP ) - IF ( TMP > TOL2 ) THEN - WRITE(*,*) 'FAILED! Something was wrong with the run.' - NFAIL_SVDIFF = NFAIL_SVDIFF + 1 - END IF - !..... CGEDMDQ check point - - !..... CGEDMDQ check point - IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN - ! Check that the QR factors are computed and returned - ! as requested. The residual ||F-Q*R||_F / ||F||_F - ! is compared to M*N*EPS. - F1(1:M,1:N+1) = F0(1:M,1:N+1) - CALL CGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -CONE, F, & - LDF, Y, LDY, CONE, F1, LDF ) - TMP_FQR = CLANGE( 'F', M, N+1, F1, LDF, WORK ) / & - CLANGE( 'F', M, N+1, F0, LDF, WORK ) - IF ( TMP_FQR <= TOL2 ) THEN - !WRITE(*,*) ':) CGEDMDQ ........ PASSED.' - ELSE - WRITE(*,*) ':( CGEDMDQ ........ FAILED.' - NFAIL_F_QR = NFAIL_F_QR + 1 - END IF - END IF - !..... ZGEDMDQ checkpoint - !..... ZGEDMDQ checkpoint - IF ( LSAME(RESIDS, 'R') ) THEN - ! Compare the residuals returned by ZGEDMDQ with the - ! explicitly computed residuals using the matrix A. - ! Compute explicitly Y1 = A*Z - CALL CGEMM( 'N', 'N', M, KQ, M, CONE, A, LDA, Z, LDZ, CZERO, Y1, LDY ) - ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms - ! of the invariant subspaces that correspond to complex conjugate - ! pairs of eigencalues. (See the description of Z in ZGEDMDQ) - DO i = 1, KQ - ! have a real eigenvalue with real eigenvector - CALL CAXPY( M, -CEIGS(i), Z(1,i), 1, Y1(1,i), 1 ) - ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i) - RES1(i) = SCNRM2( M, Y1(1,i), 1) - END DO - TMP = ZERO - DO i = 1, KQ - TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & - SINGVQX(KQ)/(ANORM*SINGVQX(1)) ) - END DO - TMP_REZQ = MAX( TMP_REZQ, TMP ) - IF ( TMP <= TOL2 ) THEN - !WRITE(*,*) '.... OK ........ CGEDMDQ PASSED.' - ELSE - NFAIL_REZQ = NFAIL_REZQ + 1 - WRITE(*,*) '................ CGEDMDQ FAILED!', & - 'Check the code for implementation errors.' - END IF - END IF - - DEALLOCATE(CWORK) - DEALLOCATE(WORK) - DEALLOCATE(IWORK) - - END IF - - END DO ! LWMINOPT - !write(*,*) 'LWMINOPT loop completed' - END DO ! iWHTSVD - !write(*,*) 'WHTSVD loop completed' - END DO ! iNRNK -2:-1 - !write(*,*) 'NRNK loop completed' - END DO ! iSCALE 1:4 - !write(*,*) 'SCALE loop completed' - END DO - !write(*,*) 'JOBREF loop completed' - END DO ! iJOBZ - !write(*,*) 'JOBZ loop completed' - - END DO ! MODE -6:6 - !write(*,*) 'MODE loop completed' - END DO ! 1 or 2 trajectories - !write(*,*) 'trajectories loop completed' - - DEALLOCATE( A ) - DEALLOCATE( AC ) - DEALLOCATE( Z ) - DEALLOCATE( F ) - DEALLOCATE( F0 ) - DEALLOCATE( F1 ) - DEALLOCATE( X ) - DEALLOCATE( X0 ) - DEALLOCATE( Y ) - DEALLOCATE( Y0 ) - DEALLOCATE( Y1 ) - DEALLOCATE( AU ) - DEALLOCATE( W ) - DEALLOCATE( S ) - DEALLOCATE( Z1 ) - DEALLOCATE( RES ) - DEALLOCATE( RES1 ) - DEALLOCATE( RESEX ) - DEALLOCATE( CEIGS ) - DEALLOCATE( SINGVX ) - DEALLOCATE( SINGVQX ) - - END DO ! LLOOP - - WRITE(*,*) - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) ' Test summary for CGEDMD :' - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) - IF ( NFAIL_Z_XV == 0 ) THEN - WRITE(*,*) '>>>> Z - U*V test PASSED.' - ELSE - WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)' - WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_XW - NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_z_XV - END IF - - IF ( NFAIL_AU == 0 ) THEN - WRITE(*,*) '>>>> A*U test PASSED. ' - ELSE - WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)' - WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU - END IF - - - IF ( NFAIL_REZ == 0 ) THEN - WRITE(*,*) '>>>> Rezidual computation test PASSED.' - ELSE - WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)' - WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ - END IF - IF ( NFAIL_TOTAL == 0 ) THEN - WRITE(*,*) '>>>> CGEDMD :: ALL TESTS PASSED.' - ELSE - WRITE(*,*) NFAIL_TOTAL, 'FAILURES!' - WRITE(*,*) '>>>>>>>>>>>>>> CGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.' - END IF - - WRITE(*,*) - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) ' Test summary for CGEDMDQ :' - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) - - IF ( NFAIL_SVDIFF == 0 ) THEN - WRITE(*,*) '>>>> CGEDMD and CGEDMDQ computed singular & - &values test PASSED.' - ELSE - WRITE(*,*) 'ZGEDMD and ZGEDMDQ discrepancies in & - &the singular values unacceptable ', & - NFAIL_SVDIFF, ' times. Test FAILED.' - WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF - END IF - IF ( NFAIL_F_QR == 0 ) THEN - WRITE(*,*) '>>>> F - Q*R test PASSED.' - ELSE - WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)' - WRITE(*,*) 'The largest relative residual was ', TMP_FQR - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR - END IF - - IF ( NFAIL_REZQ == 0 ) THEN - WRITE(*,*) '>>>> Rezidual computation test PASSED.' - ELSE - WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)' - WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ - END IF - - IF ( NFAILQ_TOTAL == 0 ) THEN - WRITE(*,*) '>>>>>>> CGEDMDQ :: ALL TESTS PASSED.' - ELSE - WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!' - WRITE(*,*) '>>>>>>> CGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.' - END IF - - WRITE(*,*) - WRITE(*,*) 'Test completed.' - STOP - END +! This is a test program for checking the implementations of +! the implementations of the following subroutines +! +! CGEDMD, for computation of the +! Dynamic Mode Decomposition (DMD) +! CGEDMDQ, for computation of a +! QR factorization based compressed DMD +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! ======================================================== +! How to run the code (compiler, link info) +! ======================================================== +! Compile as FORTRAN 90 (or later) and link with BLAS and +! LAPACK libraries. +! NOTE: The code is developed and tested on top of the +! Intel MKL library (versions 2022.0.3 and 2022.2.0), +! using the Intel Fortran compiler. +! +! For developers of the C++ implementation +! ======================================================== +! See the LAPACK++ and Template Numerical Toolkit (TNT) +! +! Note on a development of the GPU HP implementation +! ======================================================== +! Work in progress. See CUDA, MAGMA, SLATE. +! NOTE: The four SVD subroutines used in this code are +! included as a part of R&D and for the completeness. +! This was also an opportunity to test those SVD codes. +! If the scaling option is used all four are essentially +! equally good. For implementations on HP platforms, +! one can use whichever SVD is available. +!............................................................ + +!............................................................ +!............................................................ +! + PROGRAM DMD_TEST + + use iso_fortran_env + IMPLICIT NONE + integer, parameter :: WP = real32 +!............................................................ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + + COMPLEX(KIND=WP), PARAMETER :: CONE = ( 1.0_WP, 0.0_WP ) + COMPLEX(KIND=WP), PARAMETER :: CZERO = ( 0.0_WP, 0.0_WP ) +!............................................................ + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: RES, & + RES1, RESEX, SINGVX, SINGVQX, WORK + INTEGER , ALLOCATABLE, DIMENSION(:) :: IWORK + REAL(KIND=WP) :: WDUMMY(2) + INTEGER :: IDUMMY(4), ISEED(4) + REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, EPS, & + TOL, TOL2, SVDIFF, TMP, TMP_AU, & + TMP_FQR, TMP_REZ, TMP_REZQ, TMP_XW, & + TMP_EX +!............................................................ + COMPLEX(KIND=WP) :: CMAX + INTEGER :: LCWORK + COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:,:) :: A, AC, & + AU, F, F0, F1, S, W, & + X, X0, Y, Y0, Y1, Z, Z1 + COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:) :: CDA, CDR, & + CDL, CEIGS, CEIGSA, CWORK + COMPLEX(KIND=WP) :: CDUMMY(22), CDUM2X2(2,2) +!............................................................ + INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY, & + LDZ, LIWORK, LWORK, M, N, LLOOP, NRNK + INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j, & + NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ, & + NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, & + NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD + INTEGER :: iNRNK, iWHTSVD, K_traj, LWMINOPT + CHARACTER :: GRADE, JOBREF, JOBZ, PIVTNG, RSIGN, & + SCALE, RESIDS, WANTQ, WANTR + LOGICAL :: TEST_QRDMD + +!..... external subroutines (BLAS and LAPACK) + EXTERNAL CAXPY, CGEEV, CGEMM, CGEMV, CLASCL, XLAENV +!.....external subroutines DMD package +! subroutines under test + EXTERNAL CGEDMD, CGEDMDQ +!..... external functions (BLAS and LAPACK) + EXTERNAL SCNRM2, SLAMCH + REAL(KIND=WP) :: SCNRM2, SLAMCH + EXTERNAL CLANGE + REAL(KIND=WP) :: CLANGE + EXTERNAL ICAMAX + INTEGER ICAMAX + EXTERNAL LSAME + LOGICAL LSAME + + INTRINSIC ABS, INT, MIN, MAX, SIGN +!............................................................ + + + WRITE(*,*) 'COMPLEX CODE TESTING' + + ! The test is always in pairs : ( CGEDMD and CGEDMDQ) + ! because the test includes comparing the results (in pairs). +!..................................................................................... + ! This code by default performs tests on CGEDMDQ + ! Since the QR factorizations based algorithm is designed for + ! single trajectory data, only single trajectory tests will + ! be performed with xGEDMDQ. + + WANTQ = 'Q' + WANTR = 'R' + ! Initialize the divide-and-conquer cutoff used by xGESDD/xBDSDC. + CALL XLAENV( 9, 25 ) +!................................................................................. + + EPS = SLAMCH( 'P' ) ! machine precision WP + + ! Global counters of failures of some particular tests + NFAIL = 0 + NFAIL_REZ = 0 + NFAIL_REZQ = 0 + NFAIL_Z_XV = 0 + NFAIL_F_QR = 0 + NFAIL_AU = 0 + NFAIL_SVDIFF = 0 + NFAIL_TOTAL = 0 + NFAILQ_TOTAL = 0 + + DO LLOOP = 1, 4 + + WRITE(*,*) 'L Loop Index = ', LLOOP + + ! Set the dimensions of the problem ... + READ(*,*) M + WRITE(*,*) 'M = ', M + ! ... and the number of snapshots. + READ(*,*) N + WRITE(*,*) 'N = ', N + + ! Test the dimensions + IF ( ( MIN(M,N) == 0 ) .OR. ( M < N ) ) THEN + WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.' + STOP + END IF +!............. + ! The seed inside the LLOOP so that each pass can be reproduced easily. + ISEED(1) = 4 + ISEED(2) = 3 + ISEED(3) = 2 + ISEED(4) = 1 + + LDA = M + LDF = M + LDX = M + LDY = M + LDW = N + LDZ = M + LDAU = M + LDS = N + + TMP_XW = ZERO + TMP_AU = ZERO + TMP_REZ = ZERO + TMP_REZQ = ZERO + SVDIFF = ZERO + TMP_EX = ZERO + + ALLOCATE( A(LDA,M) ) + ALLOCATE( AC(LDA,M) ) + ALLOCATE( F(LDF,N+1) ) + ALLOCATE( F0(LDF,N+1) ) + ALLOCATE( F1(LDF,N+1) ) + ALLOCATE( X(LDX,N) ) + ALLOCATE( X0(LDX,N) ) + ALLOCATE( Y(LDY,N+1) ) + ALLOCATE( Y0(LDY,N+1) ) + ALLOCATE( Y1(LDY,N+1) ) + ALLOCATE( AU(LDAU,N) ) + ALLOCATE( W(LDW,N) ) + ALLOCATE( S(LDS,N) ) + ALLOCATE( Z(LDZ,N) ) + ALLOCATE( Z1(LDZ,N) ) + ALLOCATE( RES(N) ) + ALLOCATE( RES1(N) ) + ALLOCATE( RESEX(N) ) + ALLOCATE( CEIGS(N) ) + ALLOCATE( SINGVX(N) ) + ALLOCATE( SINGVQX(N) ) + + TOL = 10*M*EPS + TOL2 = 10*M*N*EPS + +!............. + + DO K_traj = 1, 2 + ! Number of initial conditions in the simulation/trajectories (1 or 2) + + COND = 1.0D4 + CMAX = (1.0D1,1.0D1) + RSIGN = 'F' + GRADE = 'N' + MODEL = 6 + CONDL = 1.0D1 + MODER = 6 + CONDR = 1.0D1 + PIVTNG = 'N' + ! Loop over all parameter MODE values for CLATMR (+-1,..,+-6) + + DO MODE = 1, 6 + + ALLOCATE( IWORK(2*M) ) + ALLOCATE( CDA(M) ) + ALLOCATE( CDL(M) ) + ALLOCATE( CDR(M) ) + + CALL CLATMR( M, M, 'N', ISEED, 'N', CDA, MODE, COND, & + CMAX, RSIGN, GRADE, CDL, MODEL, CONDL, & + CDR, MODER, CONDR, PIVTNG, IWORK, M, M, & + ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO ) + DEALLOCATE( CDR ) + DEALLOCATE( CDL ) + DEALLOCATE( CDA ) + DEALLOCATE( IWORK ) + + LCWORK = MAX(1,2*M) + ALLOCATE( CEIGSA(M) ) + ALLOCATE( CWORK(LCWORK) ) + ALLOCATE( WORK(2*M) ) + AC(1:M,1:M) = A(1:M,1:M) + CALL CGEEV( 'N','N', M, AC, LDA, CEIGSA, CDUM2X2, 2, & + CDUM2X2, 2, CWORK, LCWORK, WORK, INFO ) ! LAPACK CALL + DEALLOCATE(WORK) + DEALLOCATE(CWORK) + + TMP = ABS(CEIGSA(ICAMAX(M, CEIGSA, 1))) ! The spectral radius of A + ! Scale the matrix A to have unit spectral radius. + CALL CLASCL( 'G',0, 0, TMP, ONE, M, M, & + A, LDA, INFO ) + CALL CLASCL( 'G',0, 0, TMP, ONE, M, 1, & + CEIGSA, M, INFO ) + ANORM = CLANGE( 'F', M, M, A, LDA, WDUMMY ) + + IF ( K_traj == 2 ) THEN + ! generate data as two trajectories + ! with two initial conditions + CALL CLARNV(2, ISEED, M, F(1,1) ) + DO i = 1, N/2 + CALL CGEMV( 'N', M, M, CONE, A, LDA, F(1,i), 1, & + CZERO, F(1,i+1), 1 ) + END DO + X0(1:M,1:N/2) = F(1:M,1:N/2) + Y0(1:M,1:N/2) = F(1:M,2:N/2+1) + + CALL CLARNV(2, ISEED, M, F(1,1) ) + DO i = 1, N-N/2 + CALL CGEMV( 'N', M, M, CONE, A, LDA, F(1,i), 1, & + CZERO, F(1,i+1), 1 ) + END DO + X0(1:M,N/2+1:N) = F(1:M,1:N-N/2) + Y0(1:M,N/2+1:N) = F(1:M,2:N-N/2+1) + ELSE + CALL CLARNV(2, ISEED, M, F(1,1) ) + DO i = 1, N + CALL CGEMV( 'N', M, M, CONE, A, M, F(1,i), 1, & + CZERO, F(1,i+1), 1 ) + END DO + F0(1:M,1:N+1) = F(1:M,1:N+1) + X0(1:M,1:N) = F0(1:M,1:N) + Y0(1:M,1:N) = F0(1:M,2:N+1) + END IF + + DEALLOCATE( CEIGSA ) +!........................................................................ + + DO iJOBZ = 1, 4 + + SELECT CASE ( iJOBZ ) + CASE(1) + JOBZ = 'V' + RESIDS = 'R' + CASE(2) + JOBZ = 'V' + RESIDS = 'N' + CASE(3) + JOBZ = 'F' + RESIDS = 'N' + CASE(4) + JOBZ = 'N' + RESIDS = 'N' + END SELECT + + DO iJOBREF = 1, 3 + + SELECT CASE ( iJOBREF ) + CASE(1) + JOBREF = 'R' + CASE(2) + JOBREF = 'E' + CASE(3) + JOBREF = 'N' + END SELECT + + DO iSCALE = 1, 4 + + SELECT CASE ( iSCALE ) + CASE(1) + SCALE = 'S' + CASE(2) + SCALE = 'C' + CASE(3) + SCALE = 'Y' + CASE(4) + SCALE = 'N' + END SELECT + + DO iNRNK = -1, -2, -1 + NRNK = iNRNK + + DO iWHTSVD = 1, 3 + ! Check all four options to compute the POD basis + ! via the SVD. + WHTSVD = iWHTSVD + + DO LWMINOPT = 1, 2 + ! Workspace query for the minimal (1) and for the optimal + ! (2) workspace lengths determined by workspace query. + + ! CGEDMD is always tested and its results are also used for + ! comparisons with CGEDMDQ. + + X(1:M,1:N) = X0(1:M,1:N) + Y(1:M,1:N) = Y0(1:M,1:N) + + CALL CGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, X, LDX, Y, LDY, NRNK, TOL, & + K, CEIGS, Z, LDZ, RES, & + AU, LDAU, W, LDW, S, LDS, & + CDUMMY, -1, WDUMMY, -1, IDUMMY, -1, INFO ) + + IF ( (INFO .EQ. 2) .OR. ( INFO .EQ. 3 ) & + .OR. ( INFO < 0 ) ) THEN + WRITE(*,*) 'Call to CGEDMD workspace query failed. & + &Check the calling sequence and the code.' + WRITE(*,*) 'The error code is ', INFO + WRITE(*,*) 'The input parameters were ', & + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL, LDZ, LDAU, LDW, LDS + STOP + ELSE + !WRITE(*,*) '... done. Workspace length computed.' + END IF + + LCWORK = INT(CDUMMY(LWMINOPT)) + ALLOCATE(CWORK(LCWORK)) + LIWORK = IDUMMY(1) + ALLOCATE(IWORK(LIWORK)) + LWORK = INT(WDUMMY(1)) + ALLOCATE(WORK(LWORK)) + + CALL CGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, X, LDX, Y, LDY, NRNK, TOL, & + K, CEIGS, Z, LDZ, RES, & + AU, LDAU, W, LDW, S, LDS, & + CWORK, LCWORK, WORK, LWORK, IWORK, LIWORK, INFO ) + IF ( INFO /= 0 ) THEN + WRITE(*,*) 'Call to CGEDMD failed. & + &Check the calling sequence and the code.' + WRITE(*,*) 'The error code is ', INFO + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + STOP + END IF + SINGVX(1:N) = WORK(1:N) + + !...... CGEDMD check point + IF ( LSAME(JOBZ,'V') ) THEN + ! Check that Z = X*W, on return from CGEDMD + ! This checks that the returned eigenvectors in Z are + ! the product of the SVD'POD basis returned in X + ! and the eigenvectors of the Rayleigh quotient + ! returned in W + CALL CGEMM( 'N', 'N', M, K, K, CONE, X, LDX, W, LDW, & + CZERO, Z1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL CAXPY( M, -CONE, Z(1,i), 1, Z1(1,i), 1) + TMP = MAX(TMP, SCNRM2( M, Z1(1,i), 1 ) ) + END DO + TMP_XW = MAX(TMP_XW, TMP ) + IF ( TMP_XW <= TOL ) THEN + !WRITE(*,*) ' :) .... OK .........CGEDMD PASSED.' + ELSE + NFAIL_Z_XV = NFAIL_Z_XV + 1 + WRITE(*,*) ':( .................CGEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + END IF + !...... CGEDMD check point + + IF ( LSAME(JOBREF,'R') ) THEN + ! The matrix A*U is returned for computing refined Ritz vectors. + ! Check that A*U is computed correctly using the formula + ! A*U = Y * V * inv(SIGMA). This depends on the + ! accuracy in the computed singular values and vectors of X. + ! See the paper for an error analysis. + ! Note that the left singular vectors of the input matrix X + ! are returned in the array X. + CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, X, LDX, & + CZERO, Z1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL CAXPY( M, -CONE, AU(1,i), 1, Z1(1,i), 1) + TMP = MAX( TMP, SCNRM2( M, Z1(1,i),1 ) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_AU = MAX( TMP_AU, TMP ) + IF ( TMP <= TOL2 ) THEN + !WRITE(*,*) ':) .... OK .........CGEDMD PASSED.' + ELSE + NFAIL_AU = NFAIL_AU + 1 + WRITE(*,*) ':( .................CGEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL2 + END IF + ELSEIF ( LSAME(JOBREF,'E') ) THEN + ! The unscaled vectors of the Exact DMD are computed. + ! This option is included for the sake of completeness, + ! for users who prefer the Exact DMD vectors. The + ! returned vectors are in the real form, in the same way + ! as the Ritz vectors. Here we just save the vectors + ! and test them separately using a Matlab script. + CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, AU, LDAU, CZERO, Y1, LDY ) + + DO i=1, K + CALL CAXPY( M, -CEIGS(i), AU(1,i), 1, Y1(1,i), 1 ) + RESEX(i) = SCNRM2( M, Y1(1,i), 1) / SCNRM2(M,AU(1,i),1) + END DO + END IF + !...... CGEDMD check point + + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by CGEDMD with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, Z, LDZ, CZERO, Y1, LDY ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigenvalues. (See the description of Z in CGEDMD,) + + DO i=1, K + ! have a real eigenvalue with real eigenvector + CALL CAXPY( M, -CEIGS(i), Z(1,i), 1, Y1(1,i), 1 ) + RES1(i) = SCNRM2( M, Y1(1,i), 1) + END DO + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_REZ = MAX( TMP_REZ, TMP ) + IF ( TMP <= TOL2 ) THEN + !WRITE(*,*) ':) .... OK ..........CGEDMD PASSED.' + ELSE + NFAIL_REZ = NFAIL_REZ + 1 + WRITE(*,*) ':( ..................CGEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + + + IF ( LSAME(JOBREF,'E') ) THEN + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) ) + END DO + TMP_EX = MAX(TMP_EX,TMP) + END IF + + END IF + + DEALLOCATE(CWORK) + DEALLOCATE(WORK) + DEALLOCATE(IWORK) + +!....................................................................................................... + + IF ( K_traj == 1 ) THEN + + F(1:M,1:N+1) = F0(1:M,1:N+1) + CALL CGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & + WHTSVD, M, N+1, F, LDF, X, LDX, Y, LDY, & + NRNK, TOL, K, CEIGS, Z, LDZ, RES, AU, & + LDAU, W, LDW, S, LDS, CDUMMY, -1, & + WDUMMY, -1, IDUMMY, -1, INFO ) + + LCWORK = INT(CDUMMY(LWMINOPT)) + ALLOCATE(CWORK(LCWORK)) + LIWORK = IDUMMY(1) + ALLOCATE(IWORK(LIWORK)) + LWORK = INT(WDUMMY(1)) + ALLOCATE(WORK(LWORK)) + + CALL CGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & + WHTSVD, M, N+1, F, LDF, X, LDX, Y, LDY, & + NRNK, TOL, KQ, CEIGS, Z, LDZ, RES, AU, & + LDAU, W, LDW, S, LDS, CWORK, LCWORK, & + WORK, LWORK, IWORK, LIWORK, INFO ) + IF ( INFO /= 0 ) THEN + WRITE(*,*) 'Call to CGEDMDQ failed. & + &Check the calling sequence and the code.' + WRITE(*,*) 'The error code is ', INFO + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, WANTQ, WANTR, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + STOP + END IF + SINGVQX(1:N) =WORK(1:N) + + !..... CGEDMDQ check point + + TMP = ZERO + DO i = 1, MIN(K, KQ) + TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / & + SINGVX(1) ) + END DO + SVDIFF = MAX( SVDIFF, TMP ) + IF ( TMP > TOL2 ) THEN + WRITE(*,*) 'FAILED! Something was wrong with the run.' + NFAIL_SVDIFF = NFAIL_SVDIFF + 1 + END IF + !..... CGEDMDQ check point + + !..... CGEDMDQ check point + IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN + ! Check that the QR factors are computed and returned + ! as requested. The residual ||F-Q*R||_F / ||F||_F + ! is compared to M*N*EPS. + F1(1:M,1:N+1) = F0(1:M,1:N+1) + CALL CGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -CONE, F, & + LDF, Y, LDY, CONE, F1, LDF ) + TMP_FQR = CLANGE( 'F', M, N+1, F1, LDF, WORK ) / & + CLANGE( 'F', M, N+1, F0, LDF, WORK ) + IF ( TMP_FQR <= TOL2 ) THEN + !WRITE(*,*) ':) CGEDMDQ ........ PASSED.' + ELSE + WRITE(*,*) ':( CGEDMDQ ........ FAILED.' + NFAIL_F_QR = NFAIL_F_QR + 1 + END IF + END IF + !..... CGEDMDQ checkpoint + !..... CGEDMDQ checkpoint + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by CGEDMDQ with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL CGEMM( 'N', 'N', M, KQ, M, CONE, A, LDA, Z, LDZ, CZERO, Y1, LDY ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigenvalues. (See the description of Z in CGEDMDQ) + DO i = 1, KQ + ! have a real eigenvalue with real eigenvector + CALL CAXPY( M, -CEIGS(i), Z(1,i), 1, Y1(1,i), 1 ) + ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i) + RES1(i) = SCNRM2( M, Y1(1,i), 1) + END DO + TMP = ZERO + DO i = 1, KQ + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVQX(KQ)/(ANORM*SINGVQX(1)) ) + END DO + TMP_REZQ = MAX( TMP_REZQ, TMP ) + IF ( TMP <= TOL2 ) THEN + !WRITE(*,*) '.... OK ........ CGEDMDQ PASSED.' + ELSE + NFAIL_REZQ = NFAIL_REZQ + 1 + WRITE(*,*) '................ CGEDMDQ FAILED!', & + 'Check the code for implementation errors.' + END IF + END IF + + DEALLOCATE(CWORK) + DEALLOCATE(WORK) + DEALLOCATE(IWORK) + + END IF + + END DO ! LWMINOPT + !write(*,*) 'LWMINOPT loop completed' + END DO ! iWHTSVD + !write(*,*) 'WHTSVD loop completed' + END DO ! iNRNK -2:-1 + !write(*,*) 'NRNK loop completed' + END DO ! iSCALE 1:4 + !write(*,*) 'SCALE loop completed' + END DO + !write(*,*) 'JOBREF loop completed' + END DO ! iJOBZ + !write(*,*) 'JOBZ loop completed' + + END DO ! MODE -6:6 + !write(*,*) 'MODE loop completed' + END DO ! 1 or 2 trajectories + !write(*,*) 'trajectories loop completed' + + DEALLOCATE( A ) + DEALLOCATE( AC ) + DEALLOCATE( Z ) + DEALLOCATE( F ) + DEALLOCATE( F0 ) + DEALLOCATE( F1 ) + DEALLOCATE( X ) + DEALLOCATE( X0 ) + DEALLOCATE( Y ) + DEALLOCATE( Y0 ) + DEALLOCATE( Y1 ) + DEALLOCATE( AU ) + DEALLOCATE( W ) + DEALLOCATE( S ) + DEALLOCATE( Z1 ) + DEALLOCATE( RES ) + DEALLOCATE( RES1 ) + DEALLOCATE( RESEX ) + DEALLOCATE( CEIGS ) + DEALLOCATE( SINGVX ) + DEALLOCATE( SINGVQX ) + + END DO ! LLOOP + + WRITE(*,*) + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for CGEDMD :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + IF ( NFAIL_Z_XV == 0 ) THEN + WRITE(*,*) '>>>> Z - U*V test PASSED.' + ELSE + WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)' + WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_XW + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_z_XV + END IF + + IF ( NFAIL_AU == 0 ) THEN + WRITE(*,*) '>>>> A*U test PASSED. ' + ELSE + WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)' + WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU + END IF + + + IF ( NFAIL_REZ == 0 ) THEN + WRITE(*,*) '>>>> Residual computation test PASSED.' + ELSE + WRITE(*,*) 'Residual computation test FAILED ', NFAIL_REZ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ + END IF + IF ( NFAIL_TOTAL == 0 ) THEN + WRITE(*,*) '>>>> CGEDMD :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAIL_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>>>>>>>>> CGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + WRITE(*,*) + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for CGEDMDQ :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + + IF ( NFAIL_SVDIFF == 0 ) THEN + WRITE(*,*) '>>>> CGEDMD and CGEDMDQ computed singular & + &values test PASSED.' + ELSE + WRITE(*,*) 'CGEDMD and CGEDMDQ discrepancies in & + &the singular values unacceptable ', & + NFAIL_SVDIFF, ' times. Test FAILED.' + WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF + END IF + IF ( NFAIL_F_QR == 0 ) THEN + WRITE(*,*) '>>>> F - Q*R test PASSED.' + ELSE + WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)' + WRITE(*,*) 'The largest relative residual was ', TMP_FQR + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR + END IF + + IF ( NFAIL_REZQ == 0 ) THEN + WRITE(*,*) '>>>> Residual computation test PASSED.' + ELSE + WRITE(*,*) 'Residual computation test FAILED ', NFAIL_REZQ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ + END IF + + IF ( NFAILQ_TOTAL == 0 ) THEN + WRITE(*,*) '>>>>>>> CGEDMDQ :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>> CGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + WRITE(*,*) + WRITE(*,*) 'Test completed.' + STOP + END diff --git a/lapack-netlib/TESTING/EIG/cchkee.F b/lapack-netlib/TESTING/EIG/cchkee.F index 7c6f580870..638966afe1 100644 --- a/lapack-netlib/TESTING/EIG/cchkee.F +++ b/lapack-netlib/TESTING/EIG/cchkee.F @@ -2524,7 +2524,7 @@ PROGRAM CCHKEE 9974 FORMAT( ' Tests of CHBTRD', / ' (reduction of a Hermitian band ', $ 'matrix to real tridiagonal form)' ) 9973 FORMAT( / 1X, 71( '-' ) ) - 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I1, '.', I1 ) + 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I2, '.', I1 ) 9971 FORMAT( / ' Tests of the Generalized Linear Regression Model ', $ 'routines' ) 9970 FORMAT( / ' Tests of the Generalized QR and RQ routines' ) diff --git a/lapack-netlib/TESTING/EIG/cdrvsg2stg.f b/lapack-netlib/TESTING/EIG/cdrvsg2stg.f index f7d323247d..77f3865e01 100644 --- a/lapack-netlib/TESTING/EIG/cdrvsg2stg.f +++ b/lapack-netlib/TESTING/EIG/cdrvsg2stg.f @@ -1009,7 +1009,7 @@ SUBROUTINE CDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * CALL CHPGVX( IBTYPE, 'V', 'A', UPLO, N, AP, BP, VL, $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, - $ RWORK, IWORK( N+1 ), IWORK, INFO ) + $ RWORK, IWORK( N+1 ), IWORK, IINFO ) IF( IINFO.NE.0 ) THEN WRITE( NOUNIT, FMT = 9999 )'CHPGVX(V,A' // UPLO // $ ')', IINFO, N, JTYPE, IOLDSD @@ -1055,7 +1055,7 @@ SUBROUTINE CDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, VU = ANORM CALL CHPGVX( IBTYPE, 'V', 'V', UPLO, N, AP, BP, VL, $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, - $ RWORK, IWORK( N+1 ), IWORK, INFO ) + $ RWORK, IWORK( N+1 ), IWORK, IINFO ) IF( IINFO.NE.0 ) THEN WRITE( NOUNIT, FMT = 9999 )'CHPGVX(V,V' // UPLO // $ ')', IINFO, N, JTYPE, IOLDSD @@ -1099,7 +1099,7 @@ SUBROUTINE CDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * CALL CHPGVX( IBTYPE, 'V', 'I', UPLO, N, AP, BP, VL, $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, - $ RWORK, IWORK( N+1 ), IWORK, INFO ) + $ RWORK, IWORK( N+1 ), IWORK, IINFO ) IF( IINFO.NE.0 ) THEN WRITE( NOUNIT, FMT = 9999 )'CHPGVX(V,I' // UPLO // $ ')', IINFO, N, JTYPE, IOLDSD diff --git a/lapack-netlib/TESTING/EIG/dchkdmd.f90 b/lapack-netlib/TESTING/EIG/dchkdmd.f90 index 4fbf7531b3..a2b9966288 100644 --- a/lapack-netlib/TESTING/EIG/dchkdmd.f90 +++ b/lapack-netlib/TESTING/EIG/dchkdmd.f90 @@ -1,813 +1,815 @@ -! This is a test program for checking the implementations of -! the implementations of the following subroutines -! -! DGEDMD for computation of the -! Dynamic Mode Decomposition (DMD) -! DGEDMDQ for computation of a -! QR factorization based compressed DMD -! -! Developed and supported by: -! =========================== -! Developed and coded by Zlatko Drmac, Faculty of Science, -! University of Zagreb; drmac@math.hr -! In cooperation with -! AIMdyn Inc., Santa Barbara, CA. -! ======================================================== -! How to run the code (compiler, link info) -! ======================================================== -! Compile as FORTRAN 90 (or later) and link with BLAS and -! LAPACK libraries. -! NOTE: The code is developed and tested on top of the -! Intel MKL library (versions 2022.0.3 and 2022.2.0), -! using the Intel Fortran compiler. -! -! For developers of the C++ implementation -! ======================================================== -! See the LAPACK++ and Template Numerical Toolkit (TNT) -! -! Note on a development of the GPU HP implementation -! ======================================================== -! Work in progress. See CUDA, MAGMA, SLATE. -! NOTE: The four SVD subroutines used in this code are -! included as a part of R&D and for the completeness. -! This was also an opportunity to test those SVD codes. -! If the scaling option is used all four are essentially -! equally good. For implementations on HP platforms, -! one can use whichever SVD is available. -!... ......................................................... -! NOTE: -! When using the Intel MKL 2022.0.3 the subroutine xGESVDQ -! (optionally used in xGEDMD) may cause access violation -! error for x = S, D, C, Z, but only if called with the -! work space query. (At least in our Windows 10 MSVS 2019.) -! The problem can be mitigated by downloading the source -! code of xGESVDQ from the LAPACK repository and use it -! localy instead of the one in the MKL. This seems to -! indicate that the problem is indeed in the MKL. -! This problem did not appear whith Intel MKL 2022.2.0. -! -! NOTE: -! xGESDD seems to have a problem with workspace. In some -! cases the length of the optimal workspace is returned -! smaller than the minimal workspace, as specified in the -! code. As a precaution, all optimal workspaces are -! set as MAX(minimal, optimal). -! Latest implementations of complex xGESDD have different -! length of the real worksapce. We use max value over -! two versions. -!............................................................ -!............................................................ -! - PROGRAM DMD_TEST - use iso_fortran_env, only: real64 - IMPLICIT NONE - integer, parameter :: WP = real64 - -!............................................................ - REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP - REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP -!............................................................ - REAL(KIND=WP), ALLOCATABLE, DIMENSION(:,:) :: & - A, AC, EIGA, LAMBDA, LAMBDAQ, F, F1, F2,& - Z, Z1, S, AU, W, VA, X, X0, Y, Y0, Y1 - REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: & - DA, DL, DR, REIG, REIGA, REIGQ, IEIG, & - IEIGA, IEIGQ, RES, RES1, RESEX, SINGVX,& - SINGVQX, WORK - INTEGER , ALLOCATABLE, DIMENSION(:) :: IWORK - REAL(KIND=WP) :: AB(2,2), WDUMMY(2) - INTEGER :: IDUMMY(2), ISEED(4), RJOBDATA(8) - REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, DMAX, EPS, & - TOL, TOL2, SVDIFF, TMP, TMP_AU, & - TMP_FQR, TMP_REZ, TMP_REZQ, TMP_ZXW, & - TMP_EX, XNORM, YNORM -!............................................................ - INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY, & - LDZ, LIWORK, LWORK, M, N, L, LLOOP, NRNK - INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j, KDIFF, & - NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ, & - NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, & - NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD - INTEGER iNRNK, iWHTSVD, K_TRAJ, LWMINOPT - CHARACTER(LEN=1) GRADE, JOBREF, JOBZ, PIVTNG, RSIGN, & - SCALE, RESIDS, WANTQ, WANTR - - LOGICAL TEST_QRDMD -!..... external subroutines (BLAS and LAPACK) - EXTERNAL DAXPY, DGEEV, DGEMM, DGEMV, DLACPY, DLASCL - EXTERNAL DLARNV, DLATMR -!.....external subroutines DMD package, part 1 -! subroutines under test - EXTERNAL DGEDMD, DGEDMDQ - -!..... external functions (BLAS and LAPACK) - EXTERNAL DLAMCH, DLANGE, DNRM2 - REAL(KIND=WP) :: DLAMCH, DLANGE, DNRM2 - EXTERNAL LSAME - LOGICAL LSAME - - INTRINSIC ABS, INT, MIN, MAX -!............................................................ - - ! The test is always in pairs : ( DGEDMD and DGEDMDQ ) - ! because the test includes comparing the results (in pairs). -!..................................................................................... - TEST_QRDMD = .TRUE. ! This code by default performs tests on DGEDMDQ - ! Since the QR factorizations based algorithm is designed for - ! single trajectory data, only single trajectory tests will - ! be performed with xGEDMDQ. - WANTQ = 'Q' - WANTR = 'R' -!................................................................................. - - EPS = DLAMCH( 'P' ) ! machine precision DP - - ! Global counters of failures of some particular tests - NFAIL = 0 - NFAIL_REZ = 0 - NFAIL_REZQ = 0 - NFAIL_Z_XV = 0 - NFAIL_F_QR = 0 - NFAIL_AU = 0 - KDIFF = 0 - NFAIL_SVDIFF = 0 - NFAIL_TOTAL = 0 - NFAILQ_TOTAL = 0 - - - DO LLOOP = 1, 4 - - WRITE(*,*) 'L Loop Index = ', LLOOP - - ! Set the dimensions of the problem ... - WRITE(*,*) 'M = ' - READ(*,*) M - WRITE(*,*) M - ! ... and the number of snapshots. - WRITE(*,*) 'N = ' - READ(*,*) N - WRITE(*,*) N - - ! ... Test the dimensions - IF ( ( MIN(M,N) == 0 ) .OR. ( M < N ) ) THEN - WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.' - STOP - END IF -!............. - ! The seed inside the LLOOP so that each pass can be reproduced easily. - - ISEED(1) = 4 - ISEED(2) = 3 - ISEED(3) = 2 - ISEED(4) = 1 - - LDA = M - LDF = M - LDX = MAX(M,N+1) - LDY = MAX(M,N+1) - LDW = N - LDZ = M - LDAU = MAX(M,N+1) - LDS = N - - TMP_ZXW = ZERO - TMP_AU = ZERO - TMP_REZ = ZERO - TMP_REZQ = ZERO - SVDIFF = ZERO - TMP_EX = ZERO - - ! - ! Test the subroutines on real data snapshots. All - ! computation is done in real arithmetic, even when - ! Koopman eigenvalues and modes are real. - ! - ! Allocate memory space - ALLOCATE( A(LDA,M) ) - ALLOCATE( AC(LDA,M) ) - ALLOCATE( DA(M) ) - ALLOCATE( DL(M) ) - ALLOCATE( F(LDF,N+1) ) - ALLOCATE( F1(LDF,N+1) ) - ALLOCATE( F2(LDF,N+1) ) - ALLOCATE( X(LDX,N) ) - ALLOCATE( X0(LDX,N) ) - ALLOCATE( SINGVX(N) ) - ALLOCATE( SINGVQX(N) ) - ALLOCATE( Y(LDY,N+1) ) - ALLOCATE( Y0(LDY,N+1) ) - ALLOCATE( Y1(M,N+1) ) - ALLOCATE( Z(LDZ,N) ) - ALLOCATE( Z1(LDZ,N) ) - ALLOCATE( RES(N) ) - ALLOCATE( RES1(N) ) - ALLOCATE( RESEX(N) ) - ALLOCATE( REIG(N) ) - ALLOCATE( IEIG(N) ) - ALLOCATE( REIGQ(N) ) - ALLOCATE( IEIGQ(N) ) - ALLOCATE( REIGA(M) ) - ALLOCATE( IEIGA(M) ) - ALLOCATE( VA(LDA,M) ) - ALLOCATE( LAMBDA(N,2) ) - ALLOCATE( LAMBDAQ(N,2) ) - ALLOCATE( EIGA(M,2) ) - ALLOCATE( W(LDW,N) ) - ALLOCATE( AU(LDAU,N) ) - ALLOCATE( S(N,N) ) - - TOL = M*EPS - ! This mimics O(M*N)*EPS bound for accumulated roundoff error. - ! The factor 10 is somewhat arbitrary. - TOL2 = 10*M*N*EPS - -!............. - - DO K_TRAJ = 1, 2 - ! Number of intial conditions in the simulation/trajectories (1 or 2) - - COND = 1.0D8 - DMAX = 1.0D2 - RSIGN = 'F' - GRADE = 'N' - MODEL = 6 - CONDL = 1.0D2 - MODER = 6 - CONDR = 1.0D2 - PIVTNG = 'N' - - ! Loop over all parameter MODE values for ZLATMR (+1,..,+6) - DO MODE = 1, 6 - - ALLOCATE( IWORK(2*M) ) - ALLOCATE(DR(N)) - CALL DLATMR( M, M, 'S', ISEED, 'N', DA, MODE, COND, & - DMAX, RSIGN, GRADE, DL, MODEL, CONDL, & - DR, MODER, CONDR, PIVTNG, IWORK, M, M, & - ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO ) - DEALLOCATE(IWORK) - DEALLOCATE(DR) - - LWORK = 4*M+1 - ALLOCATE(WORK(LWORK)) - AC = A - CALL DGEEV( 'N','V', M, AC, M, REIGA, IEIGA, VA, M, & - VA, M, WORK, LWORK, INFO ) ! LAPACK CALL - DEALLOCATE(WORK) - TMP = ZERO - DO i = 1, M - EIGA(i,1) = REIGA(i) - EIGA(i,2) = IEIGA(i) - TMP = MAX( TMP, SQRT(REIGA(i)**2+IEIGA(i)**2)) - END DO - - ! Scale A to have the desirable spectral radius. - CALL DLASCL( 'G', 0, 0, TMP, ONE, M, M, A, M, INFO ) - CALL DLASCL( 'G', 0, 0, TMP, ONE, M, 2, EIGA, M, INFO ) - - ! Compute the norm of A - ANORM = DLANGE( 'F', N, N, A, M, WDUMMY ) - - IF ( K_TRAJ == 2 ) THEN - ! generate data with two inital conditions - CALL DLARNV(2, ISEED, M, F1(1,1) ) - F1(1:M,1) = 1.0E-10*F1(1:M,1) - DO i = 1, N/2 - CALL DGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, & - F1(1,i+1), 1 ) - END DO - X0(1:M,1:N/2) = F1(1:M,1:N/2) - Y0(1:M,1:N/2) = F1(1:M,2:N/2+1) - - CALL DLARNV(2, ISEED, M, F1(1,1) ) - DO i = 1, N-N/2 - CALL DGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, & - F1(1,i+1), 1 ) - END DO - X0(1:M,N/2+1:N) = F1(1:M,1:N-N/2) - Y0(1:M,N/2+1:N) = F1(1:M,2:N-N/2+1) - ELSE - CALL DLARNV(2, ISEED, M, F(1,1) ) - DO i = 1, N - CALL DGEMV( 'N', M, M, ONE, A, M, F(1,i), 1, ZERO, & - F(1,i+1), 1 ) - END DO - X0(1:M,1:N) = F(1:M,1:N) - Y0(1:M,1:N) = F(1:M,2:N+1) - END IF - - XNORM = DLANGE( 'F', M, N, X0, LDX, WDUMMY ) - YNORM = DLANGE( 'F', M, N, Y0, LDX, WDUMMY ) -!............................................................ - - DO iJOBZ = 1, 4 - - SELECT CASE ( iJOBZ ) - CASE(1) - JOBZ = 'V' ! Ritz vectors will be computed - RESIDS = 'R' ! Residuals will be computed - CASE(2) - JOBZ = 'V' - RESIDS = 'N' - CASE(3) - JOBZ = 'F' ! Ritz vectors in factored form - RESIDS = 'N' - CASE(4) - JOBZ = 'N' - RESIDS = 'N' - END SELECT - - DO iJOBREF = 1, 3 - - SELECT CASE ( iJOBREF ) - CASE(1) - JOBREF = 'R' ! Data for refined Ritz vectors - CASE(2) - JOBREF = 'E' ! Exact DMD vectors - CASE(3) - JOBREF = 'N' - END SELECT - - DO iSCALE = 1, 4 - - SELECT CASE ( iSCALE ) - CASE(1) - SCALE = 'S' ! X data normalized - CASE(2) - SCALE = 'C' ! X normalized, consist. check - CASE(3) - SCALE = 'Y' ! Y data normalized - CASE(4) - SCALE = 'N' - END SELECT - - DO iNRNK = -1, -2, -1 - ! Two truncation strategies. The "-2" case for R&D - ! purposes only - it uses possibly low accuracy small - ! singular values, in which case the formulas used in - ! the DMD are highly sensitive. - NRNK = iNRNK - - DO iWHTSVD = 1, 4 - ! Check all four options to compute the POD basis - ! via the SVD. - WHTSVD = iWHTSVD - - DO LWMINOPT = 1, 2 - ! Workspace query for the minimal (1) and for the optimal - ! (2) workspace lengths determined by workspace query. - - X(1:M,1:N) = X0(1:M,1:N) - Y(1:M,1:N) = Y0(1:M,1:N) - - ! DGEDMD: Workspace query and workspace allocation - CALL DGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, & - N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, & - LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, -1, & - IDUMMY, -1, INFO ) - - LIWORK = IDUMMY(1) - ALLOCATE( IWORK(LIWORK) ) - LWORK = INT(WDUMMY(LWMINOPT)) - ALLOCATE( WORK(LWORK) ) - - ! DGEDMD test: CALL DGEDMD - CALL DGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, & - N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, & - LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, LWORK,& - IWORK, LIWORK, INFO ) - - SINGVX(1:N) = WORK(1:N) - - !...... DGEDMD check point - IF ( LSAME(JOBZ,'V') ) THEN - ! Check that Z = X*W, on return from DGEDMD - ! This checks that the returned aigenvectors in Z are - ! the product of the SVD'POD basis returned in X - ! and the eigenvectors of the rayleigh quotient - ! returned in W - CALL DGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, & - ZERO, Z1, LDZ ) - TMP = ZERO - DO i = 1, K - CALL DAXPY( M, -ONE, Z(1,i), 1, Z1(1,i), 1) - TMP = MAX(TMP, DNRM2( M, Z1(1,i), 1 ) ) - END DO - TMP_ZXW = MAX(TMP_ZXW, TMP ) - - IF ( TMP_ZXW > 10*M*EPS ) THEN - NFAIL_Z_XV = NFAIL_Z_XV + 1 - WRITE(*,*) ':( .................DGEDMD FAILED!', & - 'Check the code for implementation errors.' - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL - END IF - - END IF - - !...... DGEDMD check point - IF ( LSAME(JOBREF,'R') ) THEN - ! The matrix A*U is returned for computing refined Ritz vectors. - ! Check that A*U is computed correctly using the formula - ! A*U = Y * V * inv(SIGMA). This depends on the - ! accuracy in the computed singular values and vectors of X. - ! See the paper for an error analysis. - ! Note that the left singular vectors of the input matrix X - ! are returned in the array X. - CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, X, LDX, & - ZERO, Z1, LDZ ) - TMP = ZERO - DO i = 1, K - CALL DAXPY( M, -ONE, AU(1,i), 1, Z1(1,i), 1) - TMP = MAX( TMP, DNRM2( M, Z1(1,i),1 ) * & - SINGVX(K)/(ANORM*SINGVX(1)) ) - END DO - TMP_AU = MAX( TMP_AU, TMP ) - - IF ( TMP > TOL2 ) THEN - NFAIL_AU = NFAIL_AU + 1 - WRITE(*,*) ':( .................DGEDMD FAILED!', & - 'Check the code for implementation errors.' - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL - END IF - - ELSEIF ( LSAME(JOBREF,'E') ) THEN - ! The unscaled vectors of the Exact DMD are computed. - ! This option is included for the sake of completeness, - ! for users who prefer the Exact DMD vectors. The - ! returned vectors are in the real form, in the same way - ! as the Ritz vectors. Here we just save the vectors - ! and test them separately using a Matlab script. - - CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, AU, LDAU, ZERO, Y1, M ) - i=1 - DO WHILE ( i <= K ) - IF ( IEIG(i) == ZERO ) THEN - ! have a real eigenvalue with real eigenvector - CALL DAXPY( M, -REIG(i), AU(1,i), 1, Y1(1,i), 1 ) - RESEX(i) = DNRM2( M, Y1(1,i), 1) / DNRM2(M,AU(1,i),1) - i = i + 1 - ELSE - ! Have a complex conjugate pair - ! REIG(i) +- sqrt(-1)*IMEIG(i). - ! Since all computation is done in real - ! arithmetic, the formula for the residual - ! is recast for real representation of the - ! complex conjugate eigenpair. See the - ! description of RES. - AB(1,1) = REIG(i) - AB(2,1) = -IEIG(i) - AB(1,2) = IEIG(i) - AB(2,2) = REIG(i) - CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, AU(1,i), & - M, AB, 2, ONE, Y1(1,i), M ) - RESEX(i) = DLANGE( 'F', M, 2, Y1(1,i), M, & - WORK )/ DLANGE( 'F', M, 2, AU(1,i), M, & - WORK ) - RESEX(i+1) = RESEX(i) - i = i + 2 - END IF - END DO - - END IF - - !...... DGEDMD check point - IF ( LSAME(RESIDS, 'R') ) THEN - ! Compare the residuals returned by DGEDMD with the - ! explicitly computed residuals using the matrix A. - ! Compute explicitly Y1 = A*Z - CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, M ) - ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms - ! of the invariant subspaces that correspond to complex conjugate - ! pairs of eigencalues. (See the description of Z in DGEDMD,) - i = 1 - DO WHILE ( i <= K ) - IF ( IEIG(i) == ZERO ) THEN - ! have a real eigenvalue with real eigenvector - CALL DAXPY( M, -REIG(i), Z(1,i), 1, Y1(1,i), 1 ) - RES1(i) = DNRM2( M, Y1(1,i), 1) - i = i + 1 - ELSE - ! Have a complex conjugate pair - ! REIG(i) +- sqrt(-1)*IMEIG(i). - ! Since all computation is done in real - ! arithmetic, the formula for the residual - ! is recast for real representation of the - ! complex conjugate eigenpair. See the - ! description of RES. - AB(1,1) = REIG(i) - AB(2,1) = -IEIG(i) - AB(1,2) = IEIG(i) - AB(2,2) = REIG(i) - CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), & - M, AB, 2, ONE, Y1(1,i), M ) - RES1(i) = DLANGE( 'F', M, 2, Y1(1,i), M, & - WORK ) - RES1(i+1) = RES1(i) - i = i + 2 - END IF - END DO - TMP = ZERO - DO i = 1, K - TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & - SINGVX(K)/(ANORM*SINGVX(1)) ) - END DO - TMP_REZ = MAX( TMP_REZ, TMP ) - - IF ( TMP > TOL2 ) THEN - NFAIL_REZ = NFAIL_REZ + 1 - WRITE(*,*) ':( ..................DGEDMD FAILED!', & - 'Check the code for implementation errors.' - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL - END IF - - IF ( LSAME(JOBREF,'E') ) THEN - TMP = ZERO - DO i = 1, K - TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) ) - END DO - TMP_EX = MAX(TMP_EX,TMP) - END IF - - END IF - - !..... store the results for inspection - DO i = 1, K - LAMBDA(i,1) = REIG(i) - LAMBDA(i,2) = IEIG(i) - END DO - - DEALLOCATE(IWORK) - DEALLOCATE(WORK) - - !====================================================================== - ! Now test the DGEDMDQ - !====================================================================== - IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN - RJOBDATA(2) = 1 - F1 = F - - ! DGEDMDQ test: Workspace query and workspace allocation - CALL DGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, & - JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, & - LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ, & - RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, & - -1, IDUMMY, -1, INFO ) - LIWORK = IDUMMY(1) - ALLOCATE( IWORK(LIWORK) ) - LWORK = INT(WDUMMY(LWMINOPT)) - ALLOCATE(WORK(LWORK)) - ! DGEDMDQ test: CALL DGEDMDQ - CALL DGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, & - JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, & - LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ, & - RES, AU, LDAU, W, LDW, S, LDS, & - WORK, LWORK, IWORK, LIWORK, INFO ) - - SINGVQX(1:KQ) = WORK(MIN(M,N+1)+1: MIN(M,N+1)+KQ) - - !..... DGEDMDQ check point - IF ( KQ /= K ) THEN - KDIFF = KDIFF+1 - END IF - - TMP = ZERO - DO i = 1, MIN(K, KQ) - TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / & - SINGVX(1) ) - END DO - SVDIFF = MAX( SVDIFF, TMP ) - IF ( TMP > M*N*EPS ) THEN - WRITE(*,*) 'FAILED! Something was wrong with the run.' - NFAIL_SVDIFF = NFAIL_SVDIFF + 1 - DO j =1, 3 - write(*,*) j, SINGVX(j), SINGVQX(j) - read(*,*) - END DO - END IF - - !..... DGEDMDQ check point - IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN - ! Check that the QR factors are computed and returned - ! as requested. The residual ||F-Q*R||_F / ||F||_F - ! is compared to M*N*EPS. - F2 = F - CALL DGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ONE, F1, & - LDF, Y, LDY, ONE, F2, LDF ) - TMP_FQR = DLANGE( 'F', M, N+1, F2, LDF, WORK ) / & - DLANGE( 'F', M, N+1, F, LDF, WORK ) - IF ( TMP_FQR > TOL2 ) THEN - WRITE(*,*) 'FAILED! Something was wrong with the run.' - NFAIL_F_QR = NFAIL_F_QR + 1 - END IF - END IF - - !..... DGEDMDQ check point - IF ( LSAME(RESIDS, 'R') ) THEN - ! Compare the residuals returned by DGEDMDQ with the - ! explicitly computed residuals using the matrix A. - ! Compute explicitly Y1 = A*Z - CALL DGEMM( 'N', 'N', M, KQ, M, ONE, A, M, Z, M, ZERO, Y1, M ) - ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms - ! of the invariant subspaces that correspond to complex conjugate - ! pairs of eigencalues. (See the description of Z in DGEDMDQ) - i = 1 - DO WHILE ( i <= KQ ) - IF ( IEIGQ(i) == ZERO ) THEN - ! have a real eigenvalue with real eigenvector - CALL DAXPY( M, -REIGQ(i), Z(1,i), 1, Y1(1,i), 1 ) - ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i) - RES1(i) = DNRM2( M, Y1(1,i), 1) - i = i + 1 - ELSE - ! Have a complex conjugate pair - ! REIG(i) +- sqrt(-1)*IMEIG(i). - ! Since all computation is done in real - ! arithmetic, the formula for the residual - ! is recast for real representation of the - ! complex conjugate eigenpair. See the - ! description of RES. - AB(1,1) = REIGQ(i) - AB(2,1) = -IEIGQ(i) - AB(1,2) = IEIGQ(i) - AB(2,2) = REIGQ(i) - CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), & - M, AB, 2, ONE, Y1(1,i), M ) ! BLAS CALL - ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB ! INTRINSIC - RES1(i) = DLANGE( 'F', M, 2, Y1(1,i), M, & - WORK ) ! LAPACK CALL - RES1(i+1) = RES1(i) - i = i + 2 - END IF - END DO - TMP = ZERO - DO i = 1, KQ - TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & - SINGVQX(K)/(ANORM*SINGVQX(1)) ) - END DO - TMP_REZQ = MAX( TMP_REZQ, TMP ) - IF ( TMP > TOL2 ) THEN - NFAIL_REZQ = NFAIL_REZQ + 1 - WRITE(*,*) '................ DGEDMDQ FAILED!', & - 'Check the code for implementation errors.' - STOP - END IF - - END IF - - DO i = 1, KQ - LAMBDAQ(i,1) = REIGQ(i) - LAMBDAQ(i,2) = IEIGQ(i) - END DO - - DEALLOCATE(WORK) - DEALLOCATE(IWORK) - END IF ! TEST_QRDMD -!====================================================================== - - END DO ! LWMINOPT - !write(*,*) 'LWMINOPT loop completed' - END DO ! WHTSVD LOOP - !write(*,*) 'WHTSVD loop completed' - END DO ! NRNK LOOP - !write(*,*) 'NRNK loop completed' - END DO ! SCALE LOOP - !write(*,*) 'SCALE loop completed' - END DO ! JOBF LOOP - !write(*,*) 'JOBREF loop completed' - END DO ! JOBZ LOOP - !write(*,*) 'JOBZ loop completed' - - END DO ! MODE -6:6 - !write(*,*) 'MODE loop completed' - END DO ! 1 or 2 trajectories - !write(*,*) 'trajectories loop completed' - - DEALLOCATE(A) - DEALLOCATE(AC) - DEALLOCATE(DA) - DEALLOCATE(DL) - DEALLOCATE(F) - DEALLOCATE(F1) - DEALLOCATE(F2) - DEALLOCATE(X) - DEALLOCATE(X0) - DEALLOCATE(SINGVX) - DEALLOCATE(SINGVQX) - DEALLOCATE(Y) - DEALLOCATE(Y0) - DEALLOCATE(Y1) - DEALLOCATE(Z) - DEALLOCATE(Z1) - DEALLOCATE(RES) - DEALLOCATE(RES1) - DEALLOCATE(RESEX) - DEALLOCATE(REIG) - DEALLOCATE(IEIG) - DEALLOCATE(REIGQ) - DEALLOCATE(IEIGQ) - DEALLOCATE(REIGA) - DEALLOCATE(IEIGA) - DEALLOCATE(VA) - DEALLOCATE(LAMBDA) - DEALLOCATE(LAMBDAQ) - DEALLOCATE(EIGA) - DEALLOCATE(W) - DEALLOCATE(AU) - DEALLOCATE(S) - -!............................................................ - ! Generate random M-by-M matrix A. Use DLATMR from - END DO ! LLOOP - - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) ' Test summary for DGEDMD :' - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) - IF ( NFAIL_Z_XV == 0 ) THEN - WRITE(*,*) '>>>> Z - U*V test PASSED.' - ELSE - WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)' - WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW - NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV - END IF - IF ( NFAIL_AU == 0 ) THEN - WRITE(*,*) '>>>> A*U test PASSED. ' - ELSE - WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)' - WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU - END IF - - IF ( NFAIL_REZ == 0 ) THEN - WRITE(*,*) '>>>> Rezidual computation test PASSED.' - ELSE - WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)' - WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ - END IF - - IF ( NFAIL_TOTAL == 0 ) THEN - WRITE(*,*) '>>>> DGEDMD :: ALL TESTS PASSED.' - ELSE - WRITE(*,*) NFAIL_TOTAL, 'FAILURES!' - WRITE(*,*) '>>>>>>>>>>>>>> DGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.' - END IF - - IF ( TEST_QRDMD ) THEN - WRITE(*,*) - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) ' Test summary for DGEDMDQ :' - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) - - IF ( NFAIL_SVDIFF == 0 ) THEN - WRITE(*,*) '>>>> DGEDMD and DGEDMDQ computed singular & - &values test PASSED.' - ELSE - WRITE(*,*) 'DGEDMD and DGEDMDQ discrepancies in & - &the singular values unacceptable ', & - NFAIL_SVDIFF, ' times. Test FAILED.' - WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF - END IF - - IF ( NFAIL_F_QR == 0 ) THEN - WRITE(*,*) '>>>> F - Q*R test PASSED.' - ELSE - WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)' - WRITE(*,*) 'The largest relative residual was ', TMP_FQR - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR - END IF - - IF ( NFAIL_REZQ == 0 ) THEN - WRITE(*,*) '>>>> Rezidual computation test PASSED.' - ELSE - WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)' - WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ - END IF - - IF ( NFAILQ_TOTAL == 0 ) THEN - WRITE(*,*) '>>>>>>> DGEDMDQ :: ALL TESTS PASSED.' - ELSE - WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!' - WRITE(*,*) '>>>>>>> DGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.' - END IF - - END IF - - WRITE(*,*) - WRITE(*,*) 'Test completed.' - STOP - END +! This is a test program for checking the implementations of +! the implementations of the following subroutines +! +! DGEDMD for computation of the +! Dynamic Mode Decomposition (DMD) +! DGEDMDQ for computation of a +! QR factorization based compressed DMD +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! ======================================================== +! How to run the code (compiler, link info) +! ======================================================== +! Compile as FORTRAN 90 (or later) and link with BLAS and +! LAPACK libraries. +! NOTE: The code is developed and tested on top of the +! Intel MKL library (versions 2022.0.3 and 2022.2.0), +! using the Intel Fortran compiler. +! +! For developers of the C++ implementation +! ======================================================== +! See the LAPACK++ and Template Numerical Toolkit (TNT) +! +! Note on a development of the GPU HP implementation +! ======================================================== +! Work in progress. See CUDA, MAGMA, SLATE. +! NOTE: The four SVD subroutines used in this code are +! included as a part of R&D and for the completeness. +! This was also an opportunity to test those SVD codes. +! If the scaling option is used all four are essentially +! equally good. For implementations on HP platforms, +! one can use whichever SVD is available. +!... ......................................................... +! NOTE: +! When using the Intel MKL 2022.0.3 the subroutine xGESVDQ +! (optionally used in xGEDMD) may cause access violation +! error for x = S, D, C, Z, but only if called with the +! work space query. (At least in our Windows 10 MSVS 2019.) +! The problem can be mitigated by downloading the source +! code of xGESVDQ from the LAPACK repository and use it +! locally instead of the one in the MKL. This seems to +! indicate that the problem is indeed in the MKL. +! This problem did not appear with Intel MKL 2022.2.0. +! +! NOTE: +! xGESDD seems to have a problem with workspace. In some +! cases the length of the optimal workspace is returned +! smaller than the minimal workspace, as specified in the +! code. As a precaution, all optimal workspaces are +! set as MAX(minimal, optimal). +! Latest implementations of complex xGESDD have different +! length of the real workspace. We use max value over +! two versions. +!............................................................ +!............................................................ +! + PROGRAM DMD_TEST + use iso_fortran_env, only: real64 + IMPLICIT NONE + integer, parameter :: WP = real64 + +!............................................................ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP +!............................................................ + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:,:) :: & + A, AC, EIGA, LAMBDA, LAMBDAQ, F, F1, F2,& + Z, Z1, S, AU, W, VA, X, X0, Y, Y0, Y1 + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: & + DA, DL, DR, REIG, REIGA, REIGQ, IEIG, & + IEIGA, IEIGQ, RES, RES1, RESEX, SINGVX,& + SINGVQX, WORK + INTEGER , ALLOCATABLE, DIMENSION(:) :: IWORK + REAL(KIND=WP) :: AB(2,2), WDUMMY(2) + INTEGER :: IDUMMY(2), ISEED(4), RJOBDATA(8) + REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, DMAX, EPS, & + TOL, TOL2, SVDIFF, TMP, TMP_AU, & + TMP_FQR, TMP_REZ, TMP_REZQ, TMP_ZXW, & + TMP_EX, XNORM, YNORM +!............................................................ + INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY, & + LDZ, LIWORK, LWORK, M, N, L, LLOOP, NRNK + INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j, KDIFF, & + NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ, & + NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, & + NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD + INTEGER iNRNK, iWHTSVD, K_TRAJ, LWMINOPT + CHARACTER(LEN=1) GRADE, JOBREF, JOBZ, PIVTNG, RSIGN, & + SCALE, RESIDS, WANTQ, WANTR + + LOGICAL TEST_QRDMD +!..... external subroutines (BLAS and LAPACK) + EXTERNAL DAXPY, DGEEV, DGEMM, DGEMV, DLACPY, DLASCL + EXTERNAL DLARNV, DLATMR, XLAENV +!.....external subroutines DMD package, part 1 +! subroutines under test + EXTERNAL DGEDMD, DGEDMDQ + +!..... external functions (BLAS and LAPACK) + EXTERNAL DLAMCH, DLANGE, DNRM2 + REAL(KIND=WP) :: DLAMCH, DLANGE, DNRM2 + EXTERNAL LSAME + LOGICAL LSAME + + INTRINSIC ABS, INT, MIN, MAX +!............................................................ + + ! The test is always in pairs : ( DGEDMD and DGEDMDQ ) + ! because the test includes comparing the results (in pairs). +!..................................................................................... + TEST_QRDMD = .TRUE. ! This code by default performs tests on DGEDMDQ + ! Since the QR factorizations based algorithm is designed for + ! single trajectory data, only single trajectory tests will + ! be performed with xGEDMDQ. + WANTQ = 'Q' + WANTR = 'R' + ! Initialize the divide-and-conquer cutoff used by xGESDD/xBDSDC. + CALL XLAENV( 9, 25 ) +!................................................................................. + + EPS = DLAMCH( 'P' ) ! machine precision DP + + ! Global counters of failures of some particular tests + NFAIL = 0 + NFAIL_REZ = 0 + NFAIL_REZQ = 0 + NFAIL_Z_XV = 0 + NFAIL_F_QR = 0 + NFAIL_AU = 0 + KDIFF = 0 + NFAIL_SVDIFF = 0 + NFAIL_TOTAL = 0 + NFAILQ_TOTAL = 0 + + + DO LLOOP = 1, 4 + + WRITE(*,*) 'L Loop Index = ', LLOOP + + ! Set the dimensions of the problem ... + WRITE(*,*) 'M = ' + READ(*,*) M + WRITE(*,*) M + ! ... and the number of snapshots. + WRITE(*,*) 'N = ' + READ(*,*) N + WRITE(*,*) N + + ! ... Test the dimensions + IF ( ( MIN(M,N) == 0 ) .OR. ( M < N ) ) THEN + WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.' + STOP + END IF +!............. + ! The seed inside the LLOOP so that each pass can be reproduced easily. + + ISEED(1) = 4 + ISEED(2) = 3 + ISEED(3) = 2 + ISEED(4) = 1 + + LDA = M + LDF = M + LDX = MAX(M,N+1) + LDY = MAX(M,N+1) + LDW = N + LDZ = M + LDAU = MAX(M,N+1) + LDS = N + + TMP_ZXW = ZERO + TMP_AU = ZERO + TMP_REZ = ZERO + TMP_REZQ = ZERO + SVDIFF = ZERO + TMP_EX = ZERO + + ! + ! Test the subroutines on real data snapshots. All + ! computation is done in real arithmetic, even when + ! Koopman eigenvalues and modes are real. + ! + ! Allocate memory space + ALLOCATE( A(LDA,M) ) + ALLOCATE( AC(LDA,M) ) + ALLOCATE( DA(M) ) + ALLOCATE( DL(M) ) + ALLOCATE( F(LDF,N+1) ) + ALLOCATE( F1(LDF,N+1) ) + ALLOCATE( F2(LDF,N+1) ) + ALLOCATE( X(LDX,N) ) + ALLOCATE( X0(LDX,N) ) + ALLOCATE( SINGVX(N) ) + ALLOCATE( SINGVQX(N) ) + ALLOCATE( Y(LDY,N+1) ) + ALLOCATE( Y0(LDY,N+1) ) + ALLOCATE( Y1(M,N+1) ) + ALLOCATE( Z(LDZ,N) ) + ALLOCATE( Z1(LDZ,N) ) + ALLOCATE( RES(N) ) + ALLOCATE( RES1(N) ) + ALLOCATE( RESEX(N) ) + ALLOCATE( REIG(N) ) + ALLOCATE( IEIG(N) ) + ALLOCATE( REIGQ(N) ) + ALLOCATE( IEIGQ(N) ) + ALLOCATE( REIGA(M) ) + ALLOCATE( IEIGA(M) ) + ALLOCATE( VA(LDA,M) ) + ALLOCATE( LAMBDA(N,2) ) + ALLOCATE( LAMBDAQ(N,2) ) + ALLOCATE( EIGA(M,2) ) + ALLOCATE( W(LDW,N) ) + ALLOCATE( AU(LDAU,N) ) + ALLOCATE( S(N,N) ) + + TOL = M*EPS + ! This mimics O(M*N)*EPS bound for accumulated roundoff error. + ! The factor 10 is somewhat arbitrary. + TOL2 = 10*M*N*EPS + +!............. + + DO K_TRAJ = 1, 2 + ! Number of initial conditions in the simulation/trajectories (1 or 2) + + COND = 1.0D8 + DMAX = 1.0D2 + RSIGN = 'F' + GRADE = 'N' + MODEL = 6 + CONDL = 1.0D2 + MODER = 6 + CONDR = 1.0D2 + PIVTNG = 'N' + + ! Loop over all parameter MODE values for ZLATMR (+1,..,+6) + DO MODE = 1, 6 + + ALLOCATE( IWORK(2*M) ) + ALLOCATE(DR(N)) + CALL DLATMR( M, M, 'S', ISEED, 'N', DA, MODE, COND, & + DMAX, RSIGN, GRADE, DL, MODEL, CONDL, & + DR, MODER, CONDR, PIVTNG, IWORK, M, M, & + ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO ) + DEALLOCATE(IWORK) + DEALLOCATE(DR) + + LWORK = 4*M+1 + ALLOCATE(WORK(LWORK)) + AC = A + CALL DGEEV( 'N','V', M, AC, M, REIGA, IEIGA, VA, M, & + VA, M, WORK, LWORK, INFO ) ! LAPACK CALL + DEALLOCATE(WORK) + TMP = ZERO + DO i = 1, M + EIGA(i,1) = REIGA(i) + EIGA(i,2) = IEIGA(i) + TMP = MAX( TMP, SQRT(REIGA(i)**2+IEIGA(i)**2)) + END DO + + ! Scale A to have the desirable spectral radius. + CALL DLASCL( 'G', 0, 0, TMP, ONE, M, M, A, M, INFO ) + CALL DLASCL( 'G', 0, 0, TMP, ONE, M, 2, EIGA, M, INFO ) + + ! Compute the norm of A + ANORM = DLANGE( 'F', N, N, A, M, WDUMMY ) + + IF ( K_TRAJ == 2 ) THEN + ! generate data with two initial conditions + CALL DLARNV(2, ISEED, M, F1(1,1) ) + F1(1:M,1) = 1.0E-10*F1(1:M,1) + DO i = 1, N/2 + CALL DGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, & + F1(1,i+1), 1 ) + END DO + X0(1:M,1:N/2) = F1(1:M,1:N/2) + Y0(1:M,1:N/2) = F1(1:M,2:N/2+1) + + CALL DLARNV(2, ISEED, M, F1(1,1) ) + DO i = 1, N-N/2 + CALL DGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, & + F1(1,i+1), 1 ) + END DO + X0(1:M,N/2+1:N) = F1(1:M,1:N-N/2) + Y0(1:M,N/2+1:N) = F1(1:M,2:N-N/2+1) + ELSE + CALL DLARNV(2, ISEED, M, F(1,1) ) + DO i = 1, N + CALL DGEMV( 'N', M, M, ONE, A, M, F(1,i), 1, ZERO, & + F(1,i+1), 1 ) + END DO + X0(1:M,1:N) = F(1:M,1:N) + Y0(1:M,1:N) = F(1:M,2:N+1) + END IF + + XNORM = DLANGE( 'F', M, N, X0, LDX, WDUMMY ) + YNORM = DLANGE( 'F', M, N, Y0, LDX, WDUMMY ) +!............................................................ + + DO iJOBZ = 1, 4 + + SELECT CASE ( iJOBZ ) + CASE(1) + JOBZ = 'V' ! Ritz vectors will be computed + RESIDS = 'R' ! Residuals will be computed + CASE(2) + JOBZ = 'V' + RESIDS = 'N' + CASE(3) + JOBZ = 'F' ! Ritz vectors in factored form + RESIDS = 'N' + CASE(4) + JOBZ = 'N' + RESIDS = 'N' + END SELECT + + DO iJOBREF = 1, 3 + + SELECT CASE ( iJOBREF ) + CASE(1) + JOBREF = 'R' ! Data for refined Ritz vectors + CASE(2) + JOBREF = 'E' ! Exact DMD vectors + CASE(3) + JOBREF = 'N' + END SELECT + + DO iSCALE = 1, 4 + + SELECT CASE ( iSCALE ) + CASE(1) + SCALE = 'S' ! X data normalized + CASE(2) + SCALE = 'C' ! X normalized, consist. check + CASE(3) + SCALE = 'Y' ! Y data normalized + CASE(4) + SCALE = 'N' + END SELECT + + DO iNRNK = -1, -2, -1 + ! Two truncation strategies. The "-2" case for R&D + ! purposes only - it uses possibly low accuracy small + ! singular values, in which case the formulas used in + ! the DMD are highly sensitive. + NRNK = iNRNK + + DO iWHTSVD = 1, 4 + ! Check all four options to compute the POD basis + ! via the SVD. + WHTSVD = iWHTSVD + + DO LWMINOPT = 1, 2 + ! Workspace query for the minimal (1) and for the optimal + ! (2) workspace lengths determined by workspace query. + + X(1:M,1:N) = X0(1:M,1:N) + Y(1:M,1:N) = Y0(1:M,1:N) + + ! DGEDMD: Workspace query and workspace allocation + CALL DGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, & + N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, & + LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, -1, & + IDUMMY, -1, INFO ) + + LIWORK = IDUMMY(1) + ALLOCATE( IWORK(LIWORK) ) + LWORK = INT(WDUMMY(LWMINOPT)) + ALLOCATE( WORK(LWORK) ) + + ! DGEDMD test: CALL DGEDMD + CALL DGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, & + N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, & + LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, LWORK,& + IWORK, LIWORK, INFO ) + + SINGVX(1:N) = WORK(1:N) + + !...... DGEDMD check point + IF ( LSAME(JOBZ,'V') ) THEN + ! Check that Z = X*W, on return from DGEDMD + ! This checks that the returned eigenvectors in Z are + ! the product of the SVD'POD basis returned in X + ! and the eigenvectors of the Rayleigh quotient + ! returned in W + CALL DGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, & + ZERO, Z1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL DAXPY( M, -ONE, Z(1,i), 1, Z1(1,i), 1) + TMP = MAX(TMP, DNRM2( M, Z1(1,i), 1 ) ) + END DO + TMP_ZXW = MAX(TMP_ZXW, TMP ) + + IF ( TMP_ZXW > 10*M*EPS ) THEN + NFAIL_Z_XV = NFAIL_Z_XV + 1 + WRITE(*,*) ':( .................DGEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + + END IF + + !...... DGEDMD check point + IF ( LSAME(JOBREF,'R') ) THEN + ! The matrix A*U is returned for computing refined Ritz vectors. + ! Check that A*U is computed correctly using the formula + ! A*U = Y * V * inv(SIGMA). This depends on the + ! accuracy in the computed singular values and vectors of X. + ! See the paper for an error analysis. + ! Note that the left singular vectors of the input matrix X + ! are returned in the array X. + CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, X, LDX, & + ZERO, Z1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL DAXPY( M, -ONE, AU(1,i), 1, Z1(1,i), 1) + TMP = MAX( TMP, DNRM2( M, Z1(1,i),1 ) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_AU = MAX( TMP_AU, TMP ) + + IF ( TMP > TOL2 ) THEN + NFAIL_AU = NFAIL_AU + 1 + WRITE(*,*) ':( .................DGEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + + ELSEIF ( LSAME(JOBREF,'E') ) THEN + ! The unscaled vectors of the Exact DMD are computed. + ! This option is included for the sake of completeness, + ! for users who prefer the Exact DMD vectors. The + ! returned vectors are in the real form, in the same way + ! as the Ritz vectors. Here we just save the vectors + ! and test them separately using a Matlab script. + + CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, AU, LDAU, ZERO, Y1, M ) + i=1 + DO WHILE ( i <= K ) + IF ( IEIG(i) == ZERO ) THEN + ! have a real eigenvalue with real eigenvector + CALL DAXPY( M, -REIG(i), AU(1,i), 1, Y1(1,i), 1 ) + RESEX(i) = DNRM2( M, Y1(1,i), 1) / DNRM2(M,AU(1,i),1) + i = i + 1 + ELSE + ! Have a complex conjugate pair + ! REIG(i) +- sqrt(-1)*IMEIG(i). + ! Since all computation is done in real + ! arithmetic, the formula for the residual + ! is recast for real representation of the + ! complex conjugate eigenpair. See the + ! description of RES. + AB(1,1) = REIG(i) + AB(2,1) = -IEIG(i) + AB(1,2) = IEIG(i) + AB(2,2) = REIG(i) + CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, AU(1,i), & + M, AB, 2, ONE, Y1(1,i), M ) + RESEX(i) = DLANGE( 'F', M, 2, Y1(1,i), M, & + WORK )/ DLANGE( 'F', M, 2, AU(1,i), M, & + WORK ) + RESEX(i+1) = RESEX(i) + i = i + 2 + END IF + END DO + + END IF + + !...... DGEDMD check point + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by DGEDMD with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, M ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigenvalues. (See the description of Z in DGEDMD,) + i = 1 + DO WHILE ( i <= K ) + IF ( IEIG(i) == ZERO ) THEN + ! have a real eigenvalue with real eigenvector + CALL DAXPY( M, -REIG(i), Z(1,i), 1, Y1(1,i), 1 ) + RES1(i) = DNRM2( M, Y1(1,i), 1) + i = i + 1 + ELSE + ! Have a complex conjugate pair + ! REIG(i) +- sqrt(-1)*IMEIG(i). + ! Since all computation is done in real + ! arithmetic, the formula for the residual + ! is recast for real representation of the + ! complex conjugate eigenpair. See the + ! description of RES. + AB(1,1) = REIG(i) + AB(2,1) = -IEIG(i) + AB(1,2) = IEIG(i) + AB(2,2) = REIG(i) + CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), & + M, AB, 2, ONE, Y1(1,i), M ) + RES1(i) = DLANGE( 'F', M, 2, Y1(1,i), M, & + WORK ) + RES1(i+1) = RES1(i) + i = i + 2 + END IF + END DO + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_REZ = MAX( TMP_REZ, TMP ) + + IF ( TMP > TOL2 ) THEN + NFAIL_REZ = NFAIL_REZ + 1 + WRITE(*,*) ':( ..................DGEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + + IF ( LSAME(JOBREF,'E') ) THEN + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) ) + END DO + TMP_EX = MAX(TMP_EX,TMP) + END IF + + END IF + + !..... store the results for inspection + DO i = 1, K + LAMBDA(i,1) = REIG(i) + LAMBDA(i,2) = IEIG(i) + END DO + + DEALLOCATE(IWORK) + DEALLOCATE(WORK) + + !====================================================================== + ! Now test the DGEDMDQ + !====================================================================== + IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN + RJOBDATA(2) = 1 + F1 = F + + ! DGEDMDQ test: Workspace query and workspace allocation + CALL DGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, & + JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, & + LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ, & + RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, & + -1, IDUMMY, -1, INFO ) + LIWORK = IDUMMY(1) + ALLOCATE( IWORK(LIWORK) ) + LWORK = INT(WDUMMY(LWMINOPT)) + ALLOCATE(WORK(LWORK)) + ! DGEDMDQ test: CALL DGEDMDQ + CALL DGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, & + JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, & + LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ, & + RES, AU, LDAU, W, LDW, S, LDS, & + WORK, LWORK, IWORK, LIWORK, INFO ) + + SINGVQX(1:KQ) = WORK(MIN(M,N+1)+1: MIN(M,N+1)+KQ) + + !..... DGEDMDQ check point + IF ( KQ /= K ) THEN + KDIFF = KDIFF+1 + END IF + + TMP = ZERO + DO i = 1, MIN(K, KQ) + TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / & + SINGVX(1) ) + END DO + SVDIFF = MAX( SVDIFF, TMP ) + IF ( TMP > M*N*EPS ) THEN + WRITE(*,*) 'FAILED! Something was wrong with the run.' + NFAIL_SVDIFF = NFAIL_SVDIFF + 1 + DO j =1, 3 + write(*,*) j, SINGVX(j), SINGVQX(j) + read(*,*) + END DO + END IF + + !..... DGEDMDQ check point + IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN + ! Check that the QR factors are computed and returned + ! as requested. The residual ||F-Q*R||_F / ||F||_F + ! is compared to M*N*EPS. + F2 = F + CALL DGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ONE, F1, & + LDF, Y, LDY, ONE, F2, LDF ) + TMP_FQR = DLANGE( 'F', M, N+1, F2, LDF, WORK ) / & + DLANGE( 'F', M, N+1, F, LDF, WORK ) + IF ( TMP_FQR > TOL2 ) THEN + WRITE(*,*) 'FAILED! Something was wrong with the run.' + NFAIL_F_QR = NFAIL_F_QR + 1 + END IF + END IF + + !..... DGEDMDQ check point + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by DGEDMDQ with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL DGEMM( 'N', 'N', M, KQ, M, ONE, A, M, Z, M, ZERO, Y1, M ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigenvalues. (See the description of Z in DGEDMDQ) + i = 1 + DO WHILE ( i <= KQ ) + IF ( IEIGQ(i) == ZERO ) THEN + ! have a real eigenvalue with real eigenvector + CALL DAXPY( M, -REIGQ(i), Z(1,i), 1, Y1(1,i), 1 ) + ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i) + RES1(i) = DNRM2( M, Y1(1,i), 1) + i = i + 1 + ELSE + ! Have a complex conjugate pair + ! REIG(i) +- sqrt(-1)*IMEIG(i). + ! Since all computation is done in real + ! arithmetic, the formula for the residual + ! is recast for real representation of the + ! complex conjugate eigenpair. See the + ! description of RES. + AB(1,1) = REIGQ(i) + AB(2,1) = -IEIGQ(i) + AB(1,2) = IEIGQ(i) + AB(2,2) = REIGQ(i) + CALL DGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), & + M, AB, 2, ONE, Y1(1,i), M ) ! BLAS CALL + ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB ! INTRINSIC + RES1(i) = DLANGE( 'F', M, 2, Y1(1,i), M, & + WORK ) ! LAPACK CALL + RES1(i+1) = RES1(i) + i = i + 2 + END IF + END DO + TMP = ZERO + DO i = 1, KQ + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVQX(K)/(ANORM*SINGVQX(1)) ) + END DO + TMP_REZQ = MAX( TMP_REZQ, TMP ) + IF ( TMP > TOL2 ) THEN + NFAIL_REZQ = NFAIL_REZQ + 1 + WRITE(*,*) '................ DGEDMDQ FAILED!', & + 'Check the code for implementation errors.' + STOP + END IF + + END IF + + DO i = 1, KQ + LAMBDAQ(i,1) = REIGQ(i) + LAMBDAQ(i,2) = IEIGQ(i) + END DO + + DEALLOCATE(WORK) + DEALLOCATE(IWORK) + END IF ! TEST_QRDMD +!====================================================================== + + END DO ! LWMINOPT + !write(*,*) 'LWMINOPT loop completed' + END DO ! WHTSVD LOOP + !write(*,*) 'WHTSVD loop completed' + END DO ! NRNK LOOP + !write(*,*) 'NRNK loop completed' + END DO ! SCALE LOOP + !write(*,*) 'SCALE loop completed' + END DO ! JOBF LOOP + !write(*,*) 'JOBREF loop completed' + END DO ! JOBZ LOOP + !write(*,*) 'JOBZ loop completed' + + END DO ! MODE -6:6 + !write(*,*) 'MODE loop completed' + END DO ! 1 or 2 trajectories + !write(*,*) 'trajectories loop completed' + + DEALLOCATE(A) + DEALLOCATE(AC) + DEALLOCATE(DA) + DEALLOCATE(DL) + DEALLOCATE(F) + DEALLOCATE(F1) + DEALLOCATE(F2) + DEALLOCATE(X) + DEALLOCATE(X0) + DEALLOCATE(SINGVX) + DEALLOCATE(SINGVQX) + DEALLOCATE(Y) + DEALLOCATE(Y0) + DEALLOCATE(Y1) + DEALLOCATE(Z) + DEALLOCATE(Z1) + DEALLOCATE(RES) + DEALLOCATE(RES1) + DEALLOCATE(RESEX) + DEALLOCATE(REIG) + DEALLOCATE(IEIG) + DEALLOCATE(REIGQ) + DEALLOCATE(IEIGQ) + DEALLOCATE(REIGA) + DEALLOCATE(IEIGA) + DEALLOCATE(VA) + DEALLOCATE(LAMBDA) + DEALLOCATE(LAMBDAQ) + DEALLOCATE(EIGA) + DEALLOCATE(W) + DEALLOCATE(AU) + DEALLOCATE(S) + +!............................................................ + ! Generate random M-by-M matrix A. Use DLATMR from + END DO ! LLOOP + + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for DGEDMD :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + IF ( NFAIL_Z_XV == 0 ) THEN + WRITE(*,*) '>>>> Z - U*V test PASSED.' + ELSE + WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)' + WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV + END IF + IF ( NFAIL_AU == 0 ) THEN + WRITE(*,*) '>>>> A*U test PASSED. ' + ELSE + WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)' + WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU + END IF + + IF ( NFAIL_REZ == 0 ) THEN + WRITE(*,*) '>>>> Residual computation test PASSED.' + ELSE + WRITE(*,*) 'Residual computation test FAILED ', NFAIL_REZ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ + END IF + + IF ( NFAIL_TOTAL == 0 ) THEN + WRITE(*,*) '>>>> DGEDMD :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAIL_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>>>>>>>>> DGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + IF ( TEST_QRDMD ) THEN + WRITE(*,*) + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for DGEDMDQ :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + + IF ( NFAIL_SVDIFF == 0 ) THEN + WRITE(*,*) '>>>> DGEDMD and DGEDMDQ computed singular & + &values test PASSED.' + ELSE + WRITE(*,*) 'DGEDMD and DGEDMDQ discrepancies in & + &the singular values unacceptable ', & + NFAIL_SVDIFF, ' times. Test FAILED.' + WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF + END IF + + IF ( NFAIL_F_QR == 0 ) THEN + WRITE(*,*) '>>>> F - Q*R test PASSED.' + ELSE + WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)' + WRITE(*,*) 'The largest relative residual was ', TMP_FQR + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR + END IF + + IF ( NFAIL_REZQ == 0 ) THEN + WRITE(*,*) '>>>> Residual computation test PASSED.' + ELSE + WRITE(*,*) 'Residual computation test FAILED ', NFAIL_REZQ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ + END IF + + IF ( NFAILQ_TOTAL == 0 ) THEN + WRITE(*,*) '>>>>>>> DGEDMDQ :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>> DGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + END IF + + WRITE(*,*) + WRITE(*,*) 'Test completed.' + STOP + END diff --git a/lapack-netlib/TESTING/EIG/dchkee.F b/lapack-netlib/TESTING/EIG/dchkee.F index 2b8e0b371d..20c4cb47c2 100644 --- a/lapack-netlib/TESTING/EIG/dchkee.F +++ b/lapack-netlib/TESTING/EIG/dchkee.F @@ -2508,7 +2508,7 @@ PROGRAM DCHKEE 9974 FORMAT( ' Tests of DSBTRD', / ' (reduction of a symmetric band ', $ 'matrix to tridiagonal form)' ) 9973 FORMAT( / 1X, 71( '-' ) ) - 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I1, '.', I1 ) + 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I2, '.', I1 ) 9971 FORMAT( / ' Tests of the Generalized Linear Regression Model ', $ 'routines' ) 9970 FORMAT( / ' Tests of the Generalized QR and RQ routines' ) diff --git a/lapack-netlib/TESTING/EIG/ddrvsg2stg.f b/lapack-netlib/TESTING/EIG/ddrvsg2stg.f index 0fe31cab17..68ca74270b 100644 --- a/lapack-netlib/TESTING/EIG/ddrvsg2stg.f +++ b/lapack-netlib/TESTING/EIG/ddrvsg2stg.f @@ -989,7 +989,7 @@ SUBROUTINE DDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * CALL DSPGVX( IBTYPE, 'V', 'A', UPLO, N, AP, BP, VL, $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, - $ IWORK( N+1 ), IWORK, INFO ) + $ IWORK( N+1 ), IWORK, IINFO ) IF( IINFO.NE.0 ) THEN WRITE( NOUNIT, FMT = 9999 )'DSPGVX(V,A' // UPLO // $ ')', IINFO, N, JTYPE, IOLDSD @@ -1035,7 +1035,7 @@ SUBROUTINE DDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, VU = ANORM CALL DSPGVX( IBTYPE, 'V', 'V', UPLO, N, AP, BP, VL, $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, - $ IWORK( N+1 ), IWORK, INFO ) + $ IWORK( N+1 ), IWORK, IINFO ) IF( IINFO.NE.0 ) THEN WRITE( NOUNIT, FMT = 9999 )'DSPGVX(V,V' // UPLO // $ ')', IINFO, N, JTYPE, IOLDSD @@ -1079,7 +1079,7 @@ SUBROUTINE DDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * CALL DSPGVX( IBTYPE, 'V', 'I', UPLO, N, AP, BP, VL, $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, - $ IWORK( N+1 ), IWORK, INFO ) + $ IWORK( N+1 ), IWORK, IINFO ) IF( IINFO.NE.0 ) THEN WRITE( NOUNIT, FMT = 9999 )'DSPGVX(V,I' // UPLO // $ ')', IINFO, N, JTYPE, IOLDSD diff --git a/lapack-netlib/TESTING/EIG/schkdmd.f90 b/lapack-netlib/TESTING/EIG/schkdmd.f90 index 77e3e46c05..ce0166763e 100644 --- a/lapack-netlib/TESTING/EIG/schkdmd.f90 +++ b/lapack-netlib/TESTING/EIG/schkdmd.f90 @@ -1,792 +1,794 @@ -! This is a test program for checking the implementations of -! the implementations of the following subroutines -! -! SGEDMD for computation of the -! Dynamic Mode Decomposition (DMD) -! SGEDMDQ for computation of a -! QR factorization based compressed DMD -! -! Developed and supported by: -! =========================== -! Developed and coded by Zlatko Drmac, Faculty of Science, -! University of Zagreb; drmac@math.hr -! In cooperation with -! AIMdyn Inc., Santa Barbara, CA. -! ======================================================== -! How to run the code (compiler, link info) -! ======================================================== -! Compile as FORTRAN 90 (or later) and link with BLAS and -! LAPACK libraries. -! NOTE: The code is developed and tested on top of the -! Intel MKL library (versions 2022.0.3 and 2022.2.0), -! using the Intel Fortran compiler. -! -! For developers of the C++ implementation -! ======================================================== -! See the LAPACK++ and Template Numerical Toolkit (TNT) -! -! Note on a development of the GPU HP implementation -! ======================================================== -! Work in progress. See CUDA, MAGMA, SLATE. -! NOTE: The four SVD subroutines used in this code are -! included as a part of R&D and for the completeness. -! This was also an opportunity to test those SVD codes. -! If the scaling option is used all four are essentially -! equally good. For implementations on HP platforms, -! one can use whichever SVD is available. -!... ......................................................... -! NOTE: -! When using the Intel MKL 2022.0.3 the subroutine xGESVDQ -! (optionally used in xGEDMD) may cause access violation -! error for x = S, D, C, Z, but only if called with the -! work space query. (At least in our Windows 10 MSVS 2019.) -! The problem can be mitigated by downloading the source -! code of xGESVDQ from the LAPACK repository and use it -! localy instead of the one in the MKL. This seems to -! indicate that the problem is indeed in the MKL. -! This problem did not appear whith Intel MKL 2022.2.0. -! -! NOTE: -! xGESDD seems to have a problem with workspace. In some -! cases the length of the optimal workspace is returned -! smaller than the minimal workspace, as specified in the -! code. As a precaution, all optimal workspaces are -! set as MAX(minimal, optimal). -! Latest implementations of complex xGESDD have different -! length of the real worksapce. We use max value over -! two versions. -!............................................................ -!............................................................ -! - PROGRAM DMD_TEST - use iso_fortran_env, only: real32 - IMPLICIT NONE - integer, parameter :: WP = real32 - -!............................................................ - REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP - REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP -!............................................................ - REAL(KIND=WP), ALLOCATABLE, DIMENSION(:,:) :: & - A, AC, EIGA, LAMBDA, LAMBDAQ, F, F1, F2,& - Z, Z1, S, AU, W, VA, X, X0, Y, Y0, Y1 - REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: & - DA, DL, DR, REIG, REIGA, REIGQ, IEIG, & - IEIGA, IEIGQ, RES, RES1, RESEX, SINGVX,& - SINGVQX, WORK - INTEGER , ALLOCATABLE, DIMENSION(:) :: IWORK - REAL(KIND=WP) :: AB(2,2), WDUMMY(2) - INTEGER :: IDUMMY(2), ISEED(4), RJOBDATA(8) - REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, DMAX, EPS, & - TOL, TOL2, SVDIFF, TMP, TMP_AU, & - TMP_FQR, TMP_REZ, TMP_REZQ, TMP_ZXW, & - TMP_EX, XNORM, YNORM -!............................................................ - INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY, & - LDZ, LIWORK, LWORK, M, N, L, LLOOP, NRNK - INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, KDIFF, & - NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ, & - NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, & - NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD - INTEGER iNRNK, iWHTSVD, K_TRAJ, LWMINOPT - CHARACTER(LEN=1) GRADE, JOBREF, JOBZ, PIVTNG, RSIGN, & - SCALE, RESIDS, WANTQ, WANTR - - LOGICAL TEST_QRDMD -!..... external subroutines (BLAS and LAPACK) - EXTERNAL SAXPY, SGEEV, SGEMM, SGEMV, SLACPY, SLASCL - EXTERNAL SLARNV, SLATMR -!.....external subroutines DMD package, part 1 -! subroutines under test - EXTERNAL SGEDMD, SGEDMDQ - -!..... external functions (BLAS and LAPACK) - EXTERNAL SLAMCH, SLANGE, SNRM2 - REAL(KIND=WP) :: SLAMCH, SLANGE, SNRM2 - EXTERNAL LSAME - LOGICAL LSAME - - INTRINSIC ABS, INT, MIN, MAX -!............................................................ - - ! The test is always in pairs : ( SGEDMD and SGEDMDQ ) - ! because the test includes comparing the results (in pairs). -!..................................................................................... - TEST_QRDMD = .TRUE. ! This code by default performs tests on SGEDMDQ - ! Since the QR factorizations based algorithm is designed for - ! single trajectory data, only single trajectory tests will - ! be performed with xGEDMDQ. - WANTQ = 'Q' - WANTR = 'R' -!................................................................................. - - EPS = SLAMCH( 'P' ) ! machine precision SP - - ! Global counters of failures of some particular tests - NFAIL = 0 - NFAIL_REZ = 0 - NFAIL_REZQ = 0 - NFAIL_Z_XV = 0 - NFAIL_F_QR = 0 - NFAIL_AU = 0 - KDIFF = 0 - NFAIL_SVDIFF = 0 - NFAIL_TOTAL = 0 - NFAILQ_TOTAL = 0 - - - DO LLOOP = 1, 4 - - WRITE(*,*) 'L Loop Index = ', LLOOP - - ! Set the dimensions of the problem ... - WRITE(*,*) 'M = ' - READ(*,*) M - WRITE(*,*) M - ! ... and the number of snapshots. - WRITE(*,*) 'N = ' - READ(*,*) N - WRITE(*,*) N - - ! ... Test the dimensions - IF ( ( MIN(M,N) == 0 ) .OR. ( M < N ) ) THEN - WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.' - STOP - END IF -!............. - ! The seed inside the LLOOP so that each pass can be reproduced easily. - - ISEED(1) = 4 - ISEED(2) = 3 - ISEED(3) = 2 - ISEED(4) = 1 - - LDA = M - LDF = M - LDX = MAX(M,N+1) - LDY = MAX(M,N+1) - LDW = N - LDZ = M - LDAU = MAX(M,N+1) - LDS = N - - TMP_ZXW = ZERO - TMP_AU = ZERO - TMP_REZ = ZERO - TMP_REZQ = ZERO - SVDIFF = ZERO - TMP_EX = ZERO - - ! - ! Test the subroutines on real data snapshots. All - ! computation is done in real arithmetic, even when - ! Koopman eigenvalues and modes are real. - ! - ! Allocate memory space - ALLOCATE( A(LDA,M) ) - ALLOCATE( AC(LDA,M) ) - ALLOCATE( DA(M) ) - ALLOCATE( DL(M) ) - ALLOCATE( F(LDF,N+1) ) - ALLOCATE( F1(LDF,N+1) ) - ALLOCATE( F2(LDF,N+1) ) - ALLOCATE( X(LDX,N) ) - ALLOCATE( X0(LDX,N) ) - ALLOCATE( SINGVX(N) ) - ALLOCATE( SINGVQX(N) ) - ALLOCATE( Y(LDY,N+1) ) - ALLOCATE( Y0(LDY,N+1) ) - ALLOCATE( Y1(M,N+1) ) - ALLOCATE( Z(LDZ,N) ) - ALLOCATE( Z1(LDZ,N) ) - ALLOCATE( RES(N) ) - ALLOCATE( RES1(N) ) - ALLOCATE( RESEX(N) ) - ALLOCATE( REIG(N) ) - ALLOCATE( IEIG(N) ) - ALLOCATE( REIGQ(N) ) - ALLOCATE( IEIGQ(N) ) - ALLOCATE( REIGA(M) ) - ALLOCATE( IEIGA(M) ) - ALLOCATE( VA(LDA,M) ) - ALLOCATE( LAMBDA(N,2) ) - ALLOCATE( LAMBDAQ(N,2) ) - ALLOCATE( EIGA(M,2) ) - ALLOCATE( W(LDW,N) ) - ALLOCATE( AU(LDAU,N) ) - ALLOCATE( S(N,N) ) - - TOL = M*EPS - ! This mimics O(M*N)*EPS bound for accumulated roundoff error. - ! The factor 10 is somewhat arbitrary. - TOL2 = 10*M*N*EPS - -!............. - - DO K_TRAJ = 1, 2 - ! Number of intial conditions in the simulation/trajectories (1 or 2) - - COND = 1.0D8 - DMAX = 1.0D2 - RSIGN = 'F' - GRADE = 'N' - MODEL = 6 - CONDL = 1.0D2 - MODER = 6 - CONDR = 1.0D2 - PIVTNG = 'N' - - ! Loop over all parameter MODE values for ZLATMR (+1,..,+6) - DO MODE = 1, 6 - - ALLOCATE( IWORK(2*M) ) - ALLOCATE(DR(N)) - CALL SLATMR( M, M, 'S', ISEED, 'N', DA, MODE, COND, & - DMAX, RSIGN, GRADE, DL, MODEL, CONDL, & - DR, MODER, CONDR, PIVTNG, IWORK, M, M, & - ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO ) - DEALLOCATE(IWORK) - DEALLOCATE(DR) - - LWORK = 4*M+1 - ALLOCATE(WORK(LWORK)) - AC = A - CALL SGEEV( 'N','V', M, AC, M, REIGA, IEIGA, VA, M, & - VA, M, WORK, LWORK, INFO ) ! LAPACK CALL - DEALLOCATE(WORK) - TMP = ZERO - DO i = 1, M - EIGA(i,1) = REIGA(i) - EIGA(i,2) = IEIGA(i) - TMP = MAX( TMP, SQRT(REIGA(i)**2+IEIGA(i)**2)) - END DO - - ! Scale A to have the desirable spectral radius. - CALL SLASCL( 'G', 0, 0, TMP, ONE, M, M, A, M, INFO ) - CALL SLASCL( 'G', 0, 0, TMP, ONE, M, 2, EIGA, M, INFO ) - - ! Compute the norm of A - ANORM = SLANGE( 'F', N, N, A, M, WDUMMY ) - - IF ( K_TRAJ == 2 ) THEN - ! generate data with two inital conditions - CALL SLARNV(2, ISEED, M, F1(1,1) ) - F1(1:M,1) = 1.0E-10*F1(1:M,1) - DO i = 1, N/2 - CALL SGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, & - F1(1,i+1), 1 ) - END DO - X0(1:M,1:N/2) = F1(1:M,1:N/2) - Y0(1:M,1:N/2) = F1(1:M,2:N/2+1) - - CALL SLARNV(2, ISEED, M, F1(1,1) ) - DO i = 1, N-N/2 - CALL SGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, & - F1(1,i+1), 1 ) - END DO - X0(1:M,N/2+1:N) = F1(1:M,1:N-N/2) - Y0(1:M,N/2+1:N) = F1(1:M,2:N-N/2+1) - ELSE - ! single trajectory - CALL SLARNV(2, ISEED, M, F(1,1) ) - DO i = 1, N - CALL SGEMV( 'N', M, M, ONE, A, M, F(1,i), 1, ZERO, & - F(1,i+1), 1 ) - END DO - X0(1:M,1:N) = F(1:M,1:N) - Y0(1:M,1:N) = F(1:M,2:N+1) - END IF - - XNORM = SLANGE( 'F', M, N, X0, LDX, WDUMMY ) - YNORM = SLANGE( 'F', M, N, Y0, LDX, WDUMMY ) -!............................................................ - - DO iJOBZ = 1, 4 - - SELECT CASE ( iJOBZ ) - CASE(1) - JOBZ = 'V' ! Ritz vectors will be computed - RESIDS = 'R' ! Residuals will be computed - CASE(2) - JOBZ = 'V' - RESIDS = 'N' - CASE(3) - JOBZ = 'F' ! Ritz vectors in factored form - RESIDS = 'N' - CASE(4) - JOBZ = 'N' - RESIDS = 'N' - END SELECT - - DO iJOBREF = 1, 3 - - SELECT CASE ( iJOBREF ) - CASE(1) - JOBREF = 'R' ! Data for refined Ritz vectors - CASE(2) - JOBREF = 'E' ! Exact DMD vectors - CASE(3) - JOBREF = 'N' - END SELECT - - DO iSCALE = 1, 4 - - SELECT CASE ( iSCALE ) - CASE(1) - SCALE = 'S' ! X data normalized - CASE(2) - SCALE = 'C' ! X normalized, consist. check - CASE(3) - SCALE = 'Y' ! Y data normalized - CASE(4) - SCALE = 'N' - END SELECT - - DO iNRNK = -1, -2, -1 - ! Two truncation strategies. The "-2" case for R&D - ! purposes only - it uses possibly low accuracy small - ! singular values, in which case the formulas used in - ! the DMD are highly sensitive. - NRNK = iNRNK - - DO iWHTSVD = 1, 4 - ! Check all four options to compute the POD basis - ! via the SVD. - WHTSVD = iWHTSVD - - DO LWMINOPT = 1, 2 - ! Workspace query for the minimal (1) and for the optimal - ! (2) workspace lengths determined by workspace query. - - X(1:M,1:N) = X0(1:M,1:N) - Y(1:M,1:N) = Y0(1:M,1:N) - - ! SGEDMD: Workspace query and workspace allocation - CALL SGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, & - N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, & - LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, -1, & - IDUMMY, -1, INFO ) - - LIWORK = IDUMMY(1) - ALLOCATE( IWORK(LIWORK) ) - LWORK = INT(WDUMMY(LWMINOPT)) - ALLOCATE( WORK(LWORK) ) - - ! SGEDMD test: CALL SGEDMD - CALL SGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, & - N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, & - LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, LWORK,& - IWORK, LIWORK, INFO ) - - SINGVX(1:N) = WORK(1:N) - - !...... SGEDMD check point - IF ( LSAME(JOBZ,'V') ) THEN - ! Check that Z = X*W, on return from SGEDMD - ! This checks that the returned aigenvectors in Z are - ! the product of the SVD'POD basis returned in X - ! and the eigenvectors of the rayleigh quotient - ! returned in W - CALL SGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, & - ZERO, Z1, LDZ ) - TMP = ZERO - DO i = 1, K - CALL SAXPY( M, -ONE, Z(1,i), 1, Z1(1,i), 1) - TMP = MAX(TMP, SNRM2( M, Z1(1,i), 1 ) ) - END DO - TMP_ZXW = MAX(TMP_ZXW, TMP ) - - IF ( TMP_ZXW > 10*M*EPS ) THEN - NFAIL_Z_XV = NFAIL_Z_XV + 1 - END IF - - END IF - - !...... SGEDMD check point - IF ( LSAME(JOBREF,'R') ) THEN - ! The matrix A*U is returned for computing refined Ritz vectors. - ! Check that A*U is computed correctly using the formula - ! A*U = Y * V * inv(SIGMA). This depends on the - ! accuracy in the computed singular values and vectors of X. - ! See the paper for an error analysis. - ! Note that the left singular vectors of the input matrix X - ! are returned in the array X. - CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, X, LDX, & - ZERO, Z1, LDZ ) - TMP = ZERO - DO i = 1, K - CALL SAXPY( M, -ONE, AU(1,i), 1, Z1(1,i), 1) - TMP = MAX( TMP, SNRM2( M, Z1(1,i),1 ) * & - SINGVX(K)/(ANORM*SINGVX(1)) ) - END DO - TMP_AU = MAX( TMP_AU, TMP ) - - IF ( TMP > TOL2 ) THEN - NFAIL_AU = NFAIL_AU + 1 - END IF - - ELSEIF ( LSAME(JOBREF,'E') ) THEN - ! The unscaled vectors of the Exact DMD are computed. - ! This option is included for the sake of completeness, - ! for users who prefer the Exact DMD vectors. The - ! returned vectors are in the real form, in the same way - ! as the Ritz vectors. Here we just save the vectors - ! and test them separately using a Matlab script. - - CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, AU, LDAU, ZERO, Y1, M ) - i=1 - DO WHILE ( i <= K ) - IF ( IEIG(i) == ZERO ) THEN - ! have a real eigenvalue with real eigenvector - CALL SAXPY( M, -REIG(i), AU(1,i), 1, Y1(1,i), 1 ) - RESEX(i) = SNRM2( M, Y1(1,i), 1) / SNRM2(M,AU(1,i),1) - i = i + 1 - ELSE - ! Have a complex conjugate pair - ! REIG(i) +- sqrt(-1)*IMEIG(i). - ! Since all computation is done in real - ! arithmetic, the formula for the residual - ! is recast for real representation of the - ! complex conjugate eigenpair. See the - ! description of RES. - AB(1,1) = REIG(i) - AB(2,1) = -IEIG(i) - AB(1,2) = IEIG(i) - AB(2,2) = REIG(i) - CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, AU(1,i), & - M, AB, 2, ONE, Y1(1,i), M ) - RESEX(i) = SLANGE( 'F', M, 2, Y1(1,i), M, & - WORK )/ SLANGE( 'F', M, 2, AU(1,i), M, & - WORK ) - RESEX(i+1) = RESEX(i) - i = i + 2 - END IF - END DO - - END IF - - !...... SGEDMD check point - IF ( LSAME(RESIDS, 'R') ) THEN - ! Compare the residuals returned by SGEDMD with the - ! explicitly computed residuals using the matrix A. - ! Compute explicitly Y1 = A*Z - CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, M ) - ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms - ! of the invariant subspaces that correspond to complex conjugate - ! pairs of eigencalues. (See the description of Z in SGEDMD,) - i = 1 - DO WHILE ( i <= K ) - IF ( IEIG(i) == ZERO ) THEN - ! have a real eigenvalue with real eigenvector - CALL SAXPY( M, -REIG(i), Z(1,i), 1, Y1(1,i), 1 ) - RES1(i) = SNRM2( M, Y1(1,i), 1) - i = i + 1 - ELSE - ! Have a complex conjugate pair - ! REIG(i) +- sqrt(-1)*IMEIG(i). - ! Since all computation is done in real - ! arithmetic, the formula for the residual - ! is recast for real representation of the - ! complex conjugate eigenpair. See the - ! description of RES. - AB(1,1) = REIG(i) - AB(2,1) = -IEIG(i) - AB(1,2) = IEIG(i) - AB(2,2) = REIG(i) - CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), & - M, AB, 2, ONE, Y1(1,i), M ) - RES1(i) = SLANGE( 'F', M, 2, Y1(1,i), M, & - WORK ) - RES1(i+1) = RES1(i) - i = i + 2 - END IF - END DO - TMP = ZERO - DO i = 1, K - TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & - SINGVX(K)/(ANORM*SINGVX(1)) ) - END DO - TMP_REZ = MAX( TMP_REZ, TMP ) - - IF ( TMP > TOL2 ) THEN - NFAIL_REZ = NFAIL_REZ + 1 - END IF - - IF ( LSAME(JOBREF,'E') ) THEN - TMP = ZERO - DO i = 1, K - TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) ) - END DO - TMP_EX = MAX(TMP_EX,TMP) - END IF - - END IF - - ! ... store the results for inspection - DO i = 1, K - LAMBDA(i,1) = REIG(i) - LAMBDA(i,2) = IEIG(i) - END DO - - DEALLOCATE(IWORK) - DEALLOCATE(WORK) - - !====================================================================== - ! Now test the SGEDMDQ, if requested. - !====================================================================== - IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN - RJOBDATA(2) = 1 - F1 = F - - ! SGEDMDQ test: Workspace query and workspace allocation - CALL SGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, & - JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, & - LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ, & - RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, & - -1, IDUMMY, -1, INFO ) - LIWORK = IDUMMY(1) - ALLOCATE( IWORK(LIWORK) ) - LWORK = INT(WDUMMY(LWMINOPT)) - ALLOCATE(WORK(LWORK)) - - ! SGEDMDQ test: CALL SGEDMDQ - CALL SGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, & - JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, & - LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ, & - RES, AU, LDAU, W, LDW, S, LDS, & - WORK, LWORK, IWORK, LIWORK, INFO ) - - SINGVQX(1:KQ) = WORK(MIN(M,N+1)+1: MIN(M,N+1)+KQ) - - !..... SGEDMDQ check point - IF ( KQ /= K ) THEN - KDIFF = KDIFF+1 - END IF - - TMP = ZERO - DO i = 1, MIN(K, KQ) - TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / & - SINGVX(1) ) - END DO - SVDIFF = MAX( SVDIFF, TMP ) - IF ( TMP > M*N*EPS ) THEN - NFAIL_SVDIFF = NFAIL_SVDIFF + 1 - END IF - - !..... SGEDMDQ check point - IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN - ! Check that the QR factors are computed and returned - ! as requested. The residual ||F-Q*R||_F / ||F||_F - ! is compared to M*N*EPS. - F2 = F - CALL SGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ONE, F1, & - LDF, Y, LDY, ONE, F2, LDF ) - TMP_FQR = SLANGE( 'F', M, N+1, F2, LDF, WORK ) / & - SLANGE( 'F', M, N+1, F, LDF, WORK ) - IF ( TMP_FQR > TOL2 ) THEN - NFAIL_F_QR = NFAIL_F_QR + 1 - END IF - END IF - - !..... SGEDMDQ checkpoint - IF ( LSAME(RESIDS, 'R') ) THEN - ! Compare the residuals returned by SGEDMDQ with the - ! explicitly computed residuals using the matrix A. - ! Compute explicitly Y1 = A*Z - CALL SGEMM( 'N', 'N', M, KQ, M, ONE, A, M, Z, M, ZERO, Y1, M ) - ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms - ! of the invariant subspaces that correspond to complex conjugate - ! pairs of eigencalues. (See the description of Z in SGEDMDQ) - i = 1 - DO WHILE ( i <= KQ ) - IF ( IEIGQ(i) == ZERO ) THEN - ! have a real eigenvalue with real eigenvector - CALL SAXPY( M, -REIGQ(i), Z(1,i), 1, Y1(1,i), 1 ) - ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i) - RES1(i) = SNRM2( M, Y1(1,i), 1) - i = i + 1 - ELSE - ! Have a complex conjugate pair - ! REIG(i) +- sqrt(-1)*IMEIG(i). - ! Since all computation is done in real - ! arithmetic, the formula for the residual - ! is recast for real representation of the - ! complex conjugate eigenpair. See the - ! description of RES. - AB(1,1) = REIGQ(i) - AB(2,1) = -IEIGQ(i) - AB(1,2) = IEIGQ(i) - AB(2,2) = REIGQ(i) - CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), & - M, AB, 2, ONE, Y1(1,i), M ) ! BLAS CALL - ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB ! INTRINSIC - RES1(i) = SLANGE( 'F', M, 2, Y1(1,i), M, & - WORK ) ! LAPACK CALL - RES1(i+1) = RES1(i) - i = i + 2 - END IF - END DO - TMP = ZERO - DO i = 1, KQ - TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & - SINGVQX(K)/(ANORM*SINGVQX(1)) ) - END DO - TMP_REZQ = MAX( TMP_REZQ, TMP ) - IF ( TMP > TOL2 ) THEN - NFAIL_REZQ = NFAIL_REZQ + 1 - END IF - - END IF - - DO i = 1, KQ - LAMBDAQ(i,1) = REIGQ(i) - LAMBDAQ(i,2) = IEIGQ(i) - END DO - - DEALLOCATE(WORK) - DEALLOCATE(IWORK) - END IF ! TEST_QRDMD -!====================================================================== - - END DO ! LWMINOPT - !write(*,*) 'LWMINOPT loop completed' - END DO ! WHTSVD LOOP - !write(*,*) 'WHTSVD loop completed' - END DO ! NRNK LOOP - !write(*,*) 'NRNK loop completed' - END DO ! SCALE LOOP - !write(*,*) 'SCALE loop completed' - END DO ! JOBF LOOP - !write(*,*) 'JOBREF loop completed' - END DO ! JOBZ LOOP - !write(*,*) 'JOBZ loop completed' - - END DO ! MODE -6:6 - !write(*,*) 'MODE loop completed' - END DO ! 1 or 2 trajectories - !write(*,*) 'trajectories loop completed' - - DEALLOCATE(A) - DEALLOCATE(AC) - DEALLOCATE(DA) - DEALLOCATE(DL) - DEALLOCATE(F) - DEALLOCATE(F1) - DEALLOCATE(F2) - DEALLOCATE(X) - DEALLOCATE(X0) - DEALLOCATE(SINGVX) - DEALLOCATE(SINGVQX) - DEALLOCATE(Y) - DEALLOCATE(Y0) - DEALLOCATE(Y1) - DEALLOCATE(Z) - DEALLOCATE(Z1) - DEALLOCATE(RES) - DEALLOCATE(RES1) - DEALLOCATE(RESEX) - DEALLOCATE(REIG) - DEALLOCATE(IEIG) - DEALLOCATE(REIGQ) - DEALLOCATE(IEIGQ) - DEALLOCATE(REIGA) - DEALLOCATE(IEIGA) - DEALLOCATE(VA) - DEALLOCATE(LAMBDA) - DEALLOCATE(LAMBDAQ) - DEALLOCATE(EIGA) - DEALLOCATE(W) - DEALLOCATE(AU) - DEALLOCATE(S) - -!............................................................ - ! Generate random M-by-M matrix A. Use DLATMR from - END DO ! LLOOP - - - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) ' Test summary for SGEDMD :' - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) - IF ( NFAIL_Z_XV == 0 ) THEN - WRITE(*,*) '>>>> Z - U*V test PASSED.' - ELSE - WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)' - WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW - NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV - END IF - IF ( NFAIL_AU == 0 ) THEN - WRITE(*,*) '>>>> A*U test PASSED. ' - ELSE - WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)' - WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU - END IF - - IF ( NFAIL_REZ == 0 ) THEN - WRITE(*,*) '>>>> Rezidual computation test PASSED.' - ELSE - WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)' - WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ - END IF - - IF ( NFAIL_TOTAL == 0 ) THEN - WRITE(*,*) '>>>> SGEDMD :: ALL TESTS PASSED.' - ELSE - WRITE(*,*) NFAIL_TOTAL, 'FAILURES!' - WRITE(*,*) '>>>>>>>>>>>>>> SGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.' - END IF - - IF ( TEST_QRDMD ) THEN - WRITE(*,*) - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) ' Test summary for SGEDMDQ :' - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) - - IF ( NFAIL_SVDIFF == 0 ) THEN - WRITE(*,*) '>>>> SGEDMD and SGEDMDQ computed singular & - &values test PASSED.' - ELSE - WRITE(*,*) 'SGEDMD and SGEDMDQ discrepancies in & - &the singular values unacceptable ', & - NFAIL_SVDIFF, ' times. Test FAILED.' - WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF - END IF - - IF ( NFAIL_F_QR == 0 ) THEN - WRITE(*,*) '>>>> F - Q*R test PASSED.' - ELSE - WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)' - WRITE(*,*) 'The largest relative residual was ', TMP_FQR - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR - END IF - - IF ( NFAIL_REZQ == 0 ) THEN - WRITE(*,*) '>>>> Rezidual computation test PASSED.' - ELSE - WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)' - WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ - END IF - - IF ( NFAILQ_TOTAL == 0 ) THEN - WRITE(*,*) '>>>>>>> SGEDMDQ :: ALL TESTS PASSED.' - ELSE - WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!' - WRITE(*,*) '>>>>>>> SGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.' - END IF - - END IF - - WRITE(*,*) - WRITE(*,*) 'Test completed.' - STOP - END +! This is a test program for checking the implementations of +! the implementations of the following subroutines +! +! SGEDMD for computation of the +! Dynamic Mode Decomposition (DMD) +! SGEDMDQ for computation of a +! QR factorization based compressed DMD +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! ======================================================== +! How to run the code (compiler, link info) +! ======================================================== +! Compile as FORTRAN 90 (or later) and link with BLAS and +! LAPACK libraries. +! NOTE: The code is developed and tested on top of the +! Intel MKL library (versions 2022.0.3 and 2022.2.0), +! using the Intel Fortran compiler. +! +! For developers of the C++ implementation +! ======================================================== +! See the LAPACK++ and Template Numerical Toolkit (TNT) +! +! Note on a development of the GPU HP implementation +! ======================================================== +! Work in progress. See CUDA, MAGMA, SLATE. +! NOTE: The four SVD subroutines used in this code are +! included as a part of R&D and for the completeness. +! This was also an opportunity to test those SVD codes. +! If the scaling option is used all four are essentially +! equally good. For implementations on HP platforms, +! one can use whichever SVD is available. +!... ......................................................... +! NOTE: +! When using the Intel MKL 2022.0.3 the subroutine xGESVDQ +! (optionally used in xGEDMD) may cause access violation +! error for x = S, D, C, Z, but only if called with the +! work space query. (At least in our Windows 10 MSVS 2019.) +! The problem can be mitigated by downloading the source +! code of xGESVDQ from the LAPACK repository and use it +! locally instead of the one in the MKL. This seems to +! indicate that the problem is indeed in the MKL. +! This problem did not appear with Intel MKL 2022.2.0. +! +! NOTE: +! xGESDD seems to have a problem with workspace. In some +! cases the length of the optimal workspace is returned +! smaller than the minimal workspace, as specified in the +! code. As a precaution, all optimal workspaces are +! set as MAX(minimal, optimal). +! Latest implementations of complex xGESDD have different +! length of the real workspace. We use max value over +! two versions. +!............................................................ +!............................................................ +! + PROGRAM DMD_TEST + use iso_fortran_env, only: real32 + IMPLICIT NONE + integer, parameter :: WP = real32 + +!............................................................ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP +!............................................................ + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:,:) :: & + A, AC, EIGA, LAMBDA, LAMBDAQ, F, F1, F2,& + Z, Z1, S, AU, W, VA, X, X0, Y, Y0, Y1 + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: & + DA, DL, DR, REIG, REIGA, REIGQ, IEIG, & + IEIGA, IEIGQ, RES, RES1, RESEX, SINGVX,& + SINGVQX, WORK + INTEGER , ALLOCATABLE, DIMENSION(:) :: IWORK + REAL(KIND=WP) :: AB(2,2), WDUMMY(2) + INTEGER :: IDUMMY(2), ISEED(4), RJOBDATA(8) + REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, DMAX, EPS, & + TOL, TOL2, SVDIFF, TMP, TMP_AU, & + TMP_FQR, TMP_REZ, TMP_REZQ, TMP_ZXW, & + TMP_EX, XNORM, YNORM +!............................................................ + INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY, & + LDZ, LIWORK, LWORK, M, N, L, LLOOP, NRNK + INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, KDIFF, & + NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ, & + NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, & + NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD + INTEGER iNRNK, iWHTSVD, K_TRAJ, LWMINOPT + CHARACTER(LEN=1) GRADE, JOBREF, JOBZ, PIVTNG, RSIGN, & + SCALE, RESIDS, WANTQ, WANTR + + LOGICAL TEST_QRDMD +!..... external subroutines (BLAS and LAPACK) + EXTERNAL SAXPY, SGEEV, SGEMM, SGEMV, SLACPY, SLASCL + EXTERNAL SLARNV, SLATMR, XLAENV +!.....external subroutines DMD package, part 1 +! subroutines under test + EXTERNAL SGEDMD, SGEDMDQ + +!..... external functions (BLAS and LAPACK) + EXTERNAL SLAMCH, SLANGE, SNRM2 + REAL(KIND=WP) :: SLAMCH, SLANGE, SNRM2 + EXTERNAL LSAME + LOGICAL LSAME + + INTRINSIC ABS, INT, MIN, MAX +!............................................................ + + ! The test is always in pairs : ( SGEDMD and SGEDMDQ ) + ! because the test includes comparing the results (in pairs). +!..................................................................................... + TEST_QRDMD = .TRUE. ! This code by default performs tests on SGEDMDQ + ! Since the QR factorizations based algorithm is designed for + ! single trajectory data, only single trajectory tests will + ! be performed with xGEDMDQ. + WANTQ = 'Q' + WANTR = 'R' + ! Initialize the divide-and-conquer cutoff used by xGESDD/xBDSDC. + CALL XLAENV( 9, 25 ) +!................................................................................. + + EPS = SLAMCH( 'P' ) ! machine precision SP + + ! Global counters of failures of some particular tests + NFAIL = 0 + NFAIL_REZ = 0 + NFAIL_REZQ = 0 + NFAIL_Z_XV = 0 + NFAIL_F_QR = 0 + NFAIL_AU = 0 + KDIFF = 0 + NFAIL_SVDIFF = 0 + NFAIL_TOTAL = 0 + NFAILQ_TOTAL = 0 + + + DO LLOOP = 1, 4 + + WRITE(*,*) 'L Loop Index = ', LLOOP + + ! Set the dimensions of the problem ... + WRITE(*,*) 'M = ' + READ(*,*) M + WRITE(*,*) M + ! ... and the number of snapshots. + WRITE(*,*) 'N = ' + READ(*,*) N + WRITE(*,*) N + + ! ... Test the dimensions + IF ( ( MIN(M,N) == 0 ) .OR. ( M < N ) ) THEN + WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.' + STOP + END IF +!............. + ! The seed inside the LLOOP so that each pass can be reproduced easily. + + ISEED(1) = 4 + ISEED(2) = 3 + ISEED(3) = 2 + ISEED(4) = 1 + + LDA = M + LDF = M + LDX = MAX(M,N+1) + LDY = MAX(M,N+1) + LDW = N + LDZ = M + LDAU = MAX(M,N+1) + LDS = N + + TMP_ZXW = ZERO + TMP_AU = ZERO + TMP_REZ = ZERO + TMP_REZQ = ZERO + SVDIFF = ZERO + TMP_EX = ZERO + + ! + ! Test the subroutines on real data snapshots. All + ! computation is done in real arithmetic, even when + ! Koopman eigenvalues and modes are real. + ! + ! Allocate memory space + ALLOCATE( A(LDA,M) ) + ALLOCATE( AC(LDA,M) ) + ALLOCATE( DA(M) ) + ALLOCATE( DL(M) ) + ALLOCATE( F(LDF,N+1) ) + ALLOCATE( F1(LDF,N+1) ) + ALLOCATE( F2(LDF,N+1) ) + ALLOCATE( X(LDX,N) ) + ALLOCATE( X0(LDX,N) ) + ALLOCATE( SINGVX(N) ) + ALLOCATE( SINGVQX(N) ) + ALLOCATE( Y(LDY,N+1) ) + ALLOCATE( Y0(LDY,N+1) ) + ALLOCATE( Y1(M,N+1) ) + ALLOCATE( Z(LDZ,N) ) + ALLOCATE( Z1(LDZ,N) ) + ALLOCATE( RES(N) ) + ALLOCATE( RES1(N) ) + ALLOCATE( RESEX(N) ) + ALLOCATE( REIG(N) ) + ALLOCATE( IEIG(N) ) + ALLOCATE( REIGQ(N) ) + ALLOCATE( IEIGQ(N) ) + ALLOCATE( REIGA(M) ) + ALLOCATE( IEIGA(M) ) + ALLOCATE( VA(LDA,M) ) + ALLOCATE( LAMBDA(N,2) ) + ALLOCATE( LAMBDAQ(N,2) ) + ALLOCATE( EIGA(M,2) ) + ALLOCATE( W(LDW,N) ) + ALLOCATE( AU(LDAU,N) ) + ALLOCATE( S(N,N) ) + + TOL = M*EPS + ! This mimics O(M*N)*EPS bound for accumulated roundoff error. + ! The factor 10 is somewhat arbitrary. + TOL2 = 10*M*N*EPS + +!............. + + DO K_TRAJ = 1, 2 + ! Number of initial conditions in the simulation/trajectories (1 or 2) + + COND = 1.0D8 + DMAX = 1.0D2 + RSIGN = 'F' + GRADE = 'N' + MODEL = 6 + CONDL = 1.0D2 + MODER = 6 + CONDR = 1.0D2 + PIVTNG = 'N' + + ! Loop over all parameter MODE values for ZLATMR (+1,..,+6) + DO MODE = 1, 6 + + ALLOCATE( IWORK(2*M) ) + ALLOCATE(DR(N)) + CALL SLATMR( M, M, 'S', ISEED, 'N', DA, MODE, COND, & + DMAX, RSIGN, GRADE, DL, MODEL, CONDL, & + DR, MODER, CONDR, PIVTNG, IWORK, M, M, & + ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO ) + DEALLOCATE(IWORK) + DEALLOCATE(DR) + + LWORK = 4*M+1 + ALLOCATE(WORK(LWORK)) + AC = A + CALL SGEEV( 'N','V', M, AC, M, REIGA, IEIGA, VA, M, & + VA, M, WORK, LWORK, INFO ) ! LAPACK CALL + DEALLOCATE(WORK) + TMP = ZERO + DO i = 1, M + EIGA(i,1) = REIGA(i) + EIGA(i,2) = IEIGA(i) + TMP = MAX( TMP, SQRT(REIGA(i)**2+IEIGA(i)**2)) + END DO + + ! Scale A to have the desirable spectral radius. + CALL SLASCL( 'G', 0, 0, TMP, ONE, M, M, A, M, INFO ) + CALL SLASCL( 'G', 0, 0, TMP, ONE, M, 2, EIGA, M, INFO ) + + ! Compute the norm of A + ANORM = SLANGE( 'F', N, N, A, M, WDUMMY ) + + IF ( K_TRAJ == 2 ) THEN + ! generate data with two initial conditions + CALL SLARNV(2, ISEED, M, F1(1,1) ) + F1(1:M,1) = 1.0E-10*F1(1:M,1) + DO i = 1, N/2 + CALL SGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, & + F1(1,i+1), 1 ) + END DO + X0(1:M,1:N/2) = F1(1:M,1:N/2) + Y0(1:M,1:N/2) = F1(1:M,2:N/2+1) + + CALL SLARNV(2, ISEED, M, F1(1,1) ) + DO i = 1, N-N/2 + CALL SGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, & + F1(1,i+1), 1 ) + END DO + X0(1:M,N/2+1:N) = F1(1:M,1:N-N/2) + Y0(1:M,N/2+1:N) = F1(1:M,2:N-N/2+1) + ELSE + ! single trajectory + CALL SLARNV(2, ISEED, M, F(1,1) ) + DO i = 1, N + CALL SGEMV( 'N', M, M, ONE, A, M, F(1,i), 1, ZERO, & + F(1,i+1), 1 ) + END DO + X0(1:M,1:N) = F(1:M,1:N) + Y0(1:M,1:N) = F(1:M,2:N+1) + END IF + + XNORM = SLANGE( 'F', M, N, X0, LDX, WDUMMY ) + YNORM = SLANGE( 'F', M, N, Y0, LDX, WDUMMY ) +!............................................................ + + DO iJOBZ = 1, 4 + + SELECT CASE ( iJOBZ ) + CASE(1) + JOBZ = 'V' ! Ritz vectors will be computed + RESIDS = 'R' ! Residuals will be computed + CASE(2) + JOBZ = 'V' + RESIDS = 'N' + CASE(3) + JOBZ = 'F' ! Ritz vectors in factored form + RESIDS = 'N' + CASE(4) + JOBZ = 'N' + RESIDS = 'N' + END SELECT + + DO iJOBREF = 1, 3 + + SELECT CASE ( iJOBREF ) + CASE(1) + JOBREF = 'R' ! Data for refined Ritz vectors + CASE(2) + JOBREF = 'E' ! Exact DMD vectors + CASE(3) + JOBREF = 'N' + END SELECT + + DO iSCALE = 1, 4 + + SELECT CASE ( iSCALE ) + CASE(1) + SCALE = 'S' ! X data normalized + CASE(2) + SCALE = 'C' ! X normalized, consist. check + CASE(3) + SCALE = 'Y' ! Y data normalized + CASE(4) + SCALE = 'N' + END SELECT + + DO iNRNK = -1, -2, -1 + ! Two truncation strategies. The "-2" case for R&D + ! purposes only - it uses possibly low accuracy small + ! singular values, in which case the formulas used in + ! the DMD are highly sensitive. + NRNK = iNRNK + + DO iWHTSVD = 1, 4 + ! Check all four options to compute the POD basis + ! via the SVD. + WHTSVD = iWHTSVD + + DO LWMINOPT = 1, 2 + ! Workspace query for the minimal (1) and for the optimal + ! (2) workspace lengths determined by workspace query. + + X(1:M,1:N) = X0(1:M,1:N) + Y(1:M,1:N) = Y0(1:M,1:N) + + ! SGEDMD: Workspace query and workspace allocation + CALL SGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, & + N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, & + LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, -1, & + IDUMMY, -1, INFO ) + + LIWORK = IDUMMY(1) + ALLOCATE( IWORK(LIWORK) ) + LWORK = INT(WDUMMY(LWMINOPT)) + ALLOCATE( WORK(LWORK) ) + + ! SGEDMD test: CALL SGEDMD + CALL SGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, M, & + N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, IEIG, Z, & + LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, LWORK,& + IWORK, LIWORK, INFO ) + + SINGVX(1:N) = WORK(1:N) + + !...... SGEDMD check point + IF ( LSAME(JOBZ,'V') ) THEN + ! Check that Z = X*W, on return from SGEDMD + ! This checks that the returned eigenvectors in Z are + ! the product of the SVD'POD basis returned in X + ! and the eigenvectors of the Rayleigh quotient + ! returned in W + CALL SGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, & + ZERO, Z1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL SAXPY( M, -ONE, Z(1,i), 1, Z1(1,i), 1) + TMP = MAX(TMP, SNRM2( M, Z1(1,i), 1 ) ) + END DO + TMP_ZXW = MAX(TMP_ZXW, TMP ) + + IF ( TMP_ZXW > 10*M*EPS ) THEN + NFAIL_Z_XV = NFAIL_Z_XV + 1 + END IF + + END IF + + !...... SGEDMD check point + IF ( LSAME(JOBREF,'R') ) THEN + ! The matrix A*U is returned for computing refined Ritz vectors. + ! Check that A*U is computed correctly using the formula + ! A*U = Y * V * inv(SIGMA). This depends on the + ! accuracy in the computed singular values and vectors of X. + ! See the paper for an error analysis. + ! Note that the left singular vectors of the input matrix X + ! are returned in the array X. + CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, X, LDX, & + ZERO, Z1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL SAXPY( M, -ONE, AU(1,i), 1, Z1(1,i), 1) + TMP = MAX( TMP, SNRM2( M, Z1(1,i),1 ) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_AU = MAX( TMP_AU, TMP ) + + IF ( TMP > TOL2 ) THEN + NFAIL_AU = NFAIL_AU + 1 + END IF + + ELSEIF ( LSAME(JOBREF,'E') ) THEN + ! The unscaled vectors of the Exact DMD are computed. + ! This option is included for the sake of completeness, + ! for users who prefer the Exact DMD vectors. The + ! returned vectors are in the real form, in the same way + ! as the Ritz vectors. Here we just save the vectors + ! and test them separately using a Matlab script. + + CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, AU, LDAU, ZERO, Y1, M ) + i=1 + DO WHILE ( i <= K ) + IF ( IEIG(i) == ZERO ) THEN + ! have a real eigenvalue with real eigenvector + CALL SAXPY( M, -REIG(i), AU(1,i), 1, Y1(1,i), 1 ) + RESEX(i) = SNRM2( M, Y1(1,i), 1) / SNRM2(M,AU(1,i),1) + i = i + 1 + ELSE + ! Have a complex conjugate pair + ! REIG(i) +- sqrt(-1)*IMEIG(i). + ! Since all computation is done in real + ! arithmetic, the formula for the residual + ! is recast for real representation of the + ! complex conjugate eigenpair. See the + ! description of RES. + AB(1,1) = REIG(i) + AB(2,1) = -IEIG(i) + AB(1,2) = IEIG(i) + AB(2,2) = REIG(i) + CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, AU(1,i), & + M, AB, 2, ONE, Y1(1,i), M ) + RESEX(i) = SLANGE( 'F', M, 2, Y1(1,i), M, & + WORK )/ SLANGE( 'F', M, 2, AU(1,i), M, & + WORK ) + RESEX(i+1) = RESEX(i) + i = i + 2 + END IF + END DO + + END IF + + !...... SGEDMD check point + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by SGEDMD with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, M ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigenvalues. (See the description of Z in SGEDMD,) + i = 1 + DO WHILE ( i <= K ) + IF ( IEIG(i) == ZERO ) THEN + ! have a real eigenvalue with real eigenvector + CALL SAXPY( M, -REIG(i), Z(1,i), 1, Y1(1,i), 1 ) + RES1(i) = SNRM2( M, Y1(1,i), 1) + i = i + 1 + ELSE + ! Have a complex conjugate pair + ! REIG(i) +- sqrt(-1)*IMEIG(i). + ! Since all computation is done in real + ! arithmetic, the formula for the residual + ! is recast for real representation of the + ! complex conjugate eigenpair. See the + ! description of RES. + AB(1,1) = REIG(i) + AB(2,1) = -IEIG(i) + AB(1,2) = IEIG(i) + AB(2,2) = REIG(i) + CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), & + M, AB, 2, ONE, Y1(1,i), M ) + RES1(i) = SLANGE( 'F', M, 2, Y1(1,i), M, & + WORK ) + RES1(i+1) = RES1(i) + i = i + 2 + END IF + END DO + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_REZ = MAX( TMP_REZ, TMP ) + + IF ( TMP > TOL2 ) THEN + NFAIL_REZ = NFAIL_REZ + 1 + END IF + + IF ( LSAME(JOBREF,'E') ) THEN + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) ) + END DO + TMP_EX = MAX(TMP_EX,TMP) + END IF + + END IF + + ! ... store the results for inspection + DO i = 1, K + LAMBDA(i,1) = REIG(i) + LAMBDA(i,2) = IEIG(i) + END DO + + DEALLOCATE(IWORK) + DEALLOCATE(WORK) + + !====================================================================== + ! Now test the SGEDMDQ, if requested. + !====================================================================== + IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN + RJOBDATA(2) = 1 + F1 = F + + ! SGEDMDQ test: Workspace query and workspace allocation + CALL SGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, & + JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, & + LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ, & + RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, & + -1, IDUMMY, -1, INFO ) + LIWORK = IDUMMY(1) + ALLOCATE( IWORK(LIWORK) ) + LWORK = INT(WDUMMY(LWMINOPT)) + ALLOCATE(WORK(LWORK)) + + ! SGEDMDQ test: CALL SGEDMDQ + CALL SGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, & + JOBREF, WHTSVD, M, N+1, F1, LDF, X, LDX, Y, & + LDY, NRNK, TOL, KQ, REIGQ, IEIGQ, Z, LDZ, & + RES, AU, LDAU, W, LDW, S, LDS, & + WORK, LWORK, IWORK, LIWORK, INFO ) + + SINGVQX(1:KQ) = WORK(MIN(M,N+1)+1: MIN(M,N+1)+KQ) + + !..... SGEDMDQ check point + IF ( KQ /= K ) THEN + KDIFF = KDIFF+1 + END IF + + TMP = ZERO + DO i = 1, MIN(K, KQ) + TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / & + SINGVX(1) ) + END DO + SVDIFF = MAX( SVDIFF, TMP ) + IF ( TMP > M*N*EPS ) THEN + NFAIL_SVDIFF = NFAIL_SVDIFF + 1 + END IF + + !..... SGEDMDQ check point + IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN + ! Check that the QR factors are computed and returned + ! as requested. The residual ||F-Q*R||_F / ||F||_F + ! is compared to M*N*EPS. + F2 = F + CALL SGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ONE, F1, & + LDF, Y, LDY, ONE, F2, LDF ) + TMP_FQR = SLANGE( 'F', M, N+1, F2, LDF, WORK ) / & + SLANGE( 'F', M, N+1, F, LDF, WORK ) + IF ( TMP_FQR > TOL2 ) THEN + NFAIL_F_QR = NFAIL_F_QR + 1 + END IF + END IF + + !..... SGEDMDQ checkpoint + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by SGEDMDQ with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL SGEMM( 'N', 'N', M, KQ, M, ONE, A, M, Z, M, ZERO, Y1, M ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigenvalues. (See the description of Z in SGEDMDQ) + i = 1 + DO WHILE ( i <= KQ ) + IF ( IEIGQ(i) == ZERO ) THEN + ! have a real eigenvalue with real eigenvector + CALL SAXPY( M, -REIGQ(i), Z(1,i), 1, Y1(1,i), 1 ) + ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i) + RES1(i) = SNRM2( M, Y1(1,i), 1) + i = i + 1 + ELSE + ! Have a complex conjugate pair + ! REIG(i) +- sqrt(-1)*IMEIG(i). + ! Since all computation is done in real + ! arithmetic, the formula for the residual + ! is recast for real representation of the + ! complex conjugate eigenpair. See the + ! description of RES. + AB(1,1) = REIGQ(i) + AB(2,1) = -IEIGQ(i) + AB(1,2) = IEIGQ(i) + AB(2,2) = REIGQ(i) + CALL SGEMM( 'N', 'N', M, 2, 2, -ONE, Z(1,i), & + M, AB, 2, ONE, Y1(1,i), M ) ! BLAS CALL + ! Y(1:M,i:i+1) = Y(1:M,i:i+1) - Z(1:M,i:i+1) * AB ! INTRINSIC + RES1(i) = SLANGE( 'F', M, 2, Y1(1,i), M, & + WORK ) ! LAPACK CALL + RES1(i+1) = RES1(i) + i = i + 2 + END IF + END DO + TMP = ZERO + DO i = 1, KQ + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVQX(K)/(ANORM*SINGVQX(1)) ) + END DO + TMP_REZQ = MAX( TMP_REZQ, TMP ) + IF ( TMP > TOL2 ) THEN + NFAIL_REZQ = NFAIL_REZQ + 1 + END IF + + END IF + + DO i = 1, KQ + LAMBDAQ(i,1) = REIGQ(i) + LAMBDAQ(i,2) = IEIGQ(i) + END DO + + DEALLOCATE(WORK) + DEALLOCATE(IWORK) + END IF ! TEST_QRDMD +!====================================================================== + + END DO ! LWMINOPT + !write(*,*) 'LWMINOPT loop completed' + END DO ! WHTSVD LOOP + !write(*,*) 'WHTSVD loop completed' + END DO ! NRNK LOOP + !write(*,*) 'NRNK loop completed' + END DO ! SCALE LOOP + !write(*,*) 'SCALE loop completed' + END DO ! JOBF LOOP + !write(*,*) 'JOBREF loop completed' + END DO ! JOBZ LOOP + !write(*,*) 'JOBZ loop completed' + + END DO ! MODE -6:6 + !write(*,*) 'MODE loop completed' + END DO ! 1 or 2 trajectories + !write(*,*) 'trajectories loop completed' + + DEALLOCATE(A) + DEALLOCATE(AC) + DEALLOCATE(DA) + DEALLOCATE(DL) + DEALLOCATE(F) + DEALLOCATE(F1) + DEALLOCATE(F2) + DEALLOCATE(X) + DEALLOCATE(X0) + DEALLOCATE(SINGVX) + DEALLOCATE(SINGVQX) + DEALLOCATE(Y) + DEALLOCATE(Y0) + DEALLOCATE(Y1) + DEALLOCATE(Z) + DEALLOCATE(Z1) + DEALLOCATE(RES) + DEALLOCATE(RES1) + DEALLOCATE(RESEX) + DEALLOCATE(REIG) + DEALLOCATE(IEIG) + DEALLOCATE(REIGQ) + DEALLOCATE(IEIGQ) + DEALLOCATE(REIGA) + DEALLOCATE(IEIGA) + DEALLOCATE(VA) + DEALLOCATE(LAMBDA) + DEALLOCATE(LAMBDAQ) + DEALLOCATE(EIGA) + DEALLOCATE(W) + DEALLOCATE(AU) + DEALLOCATE(S) + +!............................................................ + ! Generate random M-by-M matrix A. Use DLATMR from + END DO ! LLOOP + + + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for SGEDMD :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + IF ( NFAIL_Z_XV == 0 ) THEN + WRITE(*,*) '>>>> Z - U*V test PASSED.' + ELSE + WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)' + WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV + END IF + IF ( NFAIL_AU == 0 ) THEN + WRITE(*,*) '>>>> A*U test PASSED. ' + ELSE + WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)' + WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU + END IF + + IF ( NFAIL_REZ == 0 ) THEN + WRITE(*,*) '>>>> Residual computation test PASSED.' + ELSE + WRITE(*,*) 'Residual computation test FAILED ', NFAIL_REZ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ + END IF + + IF ( NFAIL_TOTAL == 0 ) THEN + WRITE(*,*) '>>>> SGEDMD :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAIL_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>>>>>>>>> SGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + IF ( TEST_QRDMD ) THEN + WRITE(*,*) + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for SGEDMDQ :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + + IF ( NFAIL_SVDIFF == 0 ) THEN + WRITE(*,*) '>>>> SGEDMD and SGEDMDQ computed singular & + &values test PASSED.' + ELSE + WRITE(*,*) 'SGEDMD and SGEDMDQ discrepancies in & + &the singular values unacceptable ', & + NFAIL_SVDIFF, ' times. Test FAILED.' + WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF + END IF + + IF ( NFAIL_F_QR == 0 ) THEN + WRITE(*,*) '>>>> F - Q*R test PASSED.' + ELSE + WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)' + WRITE(*,*) 'The largest relative residual was ', TMP_FQR + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR + END IF + + IF ( NFAIL_REZQ == 0 ) THEN + WRITE(*,*) '>>>> Residual computation test PASSED.' + ELSE + WRITE(*,*) 'Residual computation test FAILED ', NFAIL_REZQ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ + END IF + + IF ( NFAILQ_TOTAL == 0 ) THEN + WRITE(*,*) '>>>>>>> SGEDMDQ :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>> SGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + END IF + + WRITE(*,*) + WRITE(*,*) 'Test completed.' + STOP + END diff --git a/lapack-netlib/TESTING/EIG/schkee.F b/lapack-netlib/TESTING/EIG/schkee.F index bf04b5e5b6..fa780b695c 100644 --- a/lapack-netlib/TESTING/EIG/schkee.F +++ b/lapack-netlib/TESTING/EIG/schkee.F @@ -2511,7 +2511,7 @@ PROGRAM SCHKEE 9974 FORMAT( ' Tests of SSBTRD', / ' (reduction of a symmetric band ', $ 'matrix to tridiagonal form)' ) 9973 FORMAT( / 1X, 71( '-' ) ) - 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I1, '.', I1 ) + 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I2, '.', I1 ) 9971 FORMAT( / ' Tests of the Generalized Linear Regression Model ', $ 'routines' ) 9970 FORMAT( / ' Tests of the Generalized QR and RQ routines' ) diff --git a/lapack-netlib/TESTING/EIG/sdrvsg2stg.f b/lapack-netlib/TESTING/EIG/sdrvsg2stg.f index 38ed616281..5a5e9d8da5 100644 --- a/lapack-netlib/TESTING/EIG/sdrvsg2stg.f +++ b/lapack-netlib/TESTING/EIG/sdrvsg2stg.f @@ -990,7 +990,7 @@ SUBROUTINE SDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * CALL SSPGVX( IBTYPE, 'V', 'A', UPLO, N, AP, BP, VL, $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, - $ IWORK( N+1 ), IWORK, INFO ) + $ IWORK( N+1 ), IWORK, IINFO ) IF( IINFO.NE.0 ) THEN WRITE( NOUNIT, FMT = 9999 )'SSPGVX(V,A' // UPLO // $ ')', IINFO, N, JTYPE, IOLDSD @@ -1036,7 +1036,7 @@ SUBROUTINE SDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, VU = ANORM CALL SSPGVX( IBTYPE, 'V', 'V', UPLO, N, AP, BP, VL, $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, - $ IWORK( N+1 ), IWORK, INFO ) + $ IWORK( N+1 ), IWORK, IINFO ) IF( IINFO.NE.0 ) THEN WRITE( NOUNIT, FMT = 9999 )'SSPGVX(V,V' // UPLO // $ ')', IINFO, N, JTYPE, IOLDSD @@ -1080,7 +1080,7 @@ SUBROUTINE SDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * CALL SSPGVX( IBTYPE, 'V', 'I', UPLO, N, AP, BP, VL, $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, - $ IWORK( N+1 ), IWORK, INFO ) + $ IWORK( N+1 ), IWORK, IINFO ) IF( IINFO.NE.0 ) THEN WRITE( NOUNIT, FMT = 9999 )'SSPGVX(V,I' // UPLO // $ ')', IINFO, N, JTYPE, IOLDSD diff --git a/lapack-netlib/TESTING/EIG/zchkdmd.f90 b/lapack-netlib/TESTING/EIG/zchkdmd.f90 index 873d956c40..09c34f8686 100644 --- a/lapack-netlib/TESTING/EIG/zchkdmd.f90 +++ b/lapack-netlib/TESTING/EIG/zchkdmd.f90 @@ -1,745 +1,747 @@ -! This is a test program for checking the implementations of -! the implementations of the following subroutines -! -! ZGEDMD, for computation of the -! Dynamic Mode Decomposition (DMD) -! ZGEDMDQ, for computation of a -! QR factorization based compressed DMD -! -! Developed and supported by: -! =========================== -! Developed and coded by Zlatko Drmac, Faculty of Science, -! University of Zagreb; drmac@math.hr -! In cooperation with -! AIMdyn Inc., Santa Barbara, CA. -! ======================================================== -! How to run the code (compiler, link info) -! ======================================================== -! Compile as FORTRAN 90 (or later) and link with BLAS and -! LAPACK libraries. -! NOTE: The code is developed and tested on top of the -! Intel MKL library (versions 2022.0.3 and 2022.2.0), -! using the Intel Fortran compiler. -! -! For developers of the C++ implementation -! ======================================================== -! See the LAPACK++ and Template Numerical Toolkit (TNT) -! -! Note on a development of the GPU HP implementation -! ======================================================== -! Work in progress. See CUDA, MAGMA, SLATE. -! NOTE: The four SVD subroutines used in this code are -! included as a part of R&D and for the completeness. -! This was also an opportunity to test those SVD codes. -! If the scaling option is used all four are essentially -! equally good. For implementations on HP platforms, -! one can use whichever SVD is available. -!............................................................ - -!............................................................ -!............................................................ -! - PROGRAM DMD_TEST - use iso_fortran_env, only: real64 - IMPLICIT NONE - integer, parameter :: WP = real64 - -!............................................................ - REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP - REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP - - COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) - COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP ) -!............................................................ - REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: RES, & - RES1, RESEX, SINGVX, SINGVQX, WORK - INTEGER , ALLOCATABLE, DIMENSION(:) :: IWORK - REAL(KIND=WP) :: WDUMMY(2) - INTEGER :: IDUMMY(4), ISEED(4) - REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, EPS, & - TOL, TOL2, SVDIFF, TMP, TMP_AU, & - TMP_FQR, TMP_REZ, TMP_REZQ, TMP_ZXW, & - TMP_EX - -!............................................................ - COMPLEX(KIND=WP) :: ZMAX - INTEGER :: LZWORK - COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:,:) :: ZA, ZAC, & - ZAU, ZF, ZF0, ZF1, ZS, ZW, & - ZX, ZX0, ZY, ZY0, ZY1, ZZ, ZZ1 - COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:) :: ZDA, ZDR, & - ZDL, ZEIGS, ZEIGSA, ZWORK - COMPLEX(KIND=WP) :: ZDUMMY(22), ZDUM2X2(2,2) -!............................................................ - INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY, & - LDZ, LIWORK, LWORK, M, N, LLOOP, NRNK, NRNKsp - INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j, & - NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ, & - NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, & - NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD, & - WHTSVDsp - INTEGER :: iNRNK, iWHTSVD, K_TRAJ, LWMINOPT - CHARACTER :: GRADE, JOBREF, JOBZ, PIVTNG, RSIGN, & - SCALE, RESIDS, WANTQ, WANTR - LOGICAL :: TEST_QRDMD - -!.....external subroutines (BLAS and LAPACK) - EXTERNAL DAXPY, DGEEV, DGEMM, DGEMV, DLACPY, DLASCL - EXTERNAL ZGEEV, ZGEMV, ZLASCL - EXTERNAL ZLARNV, ZLATMR - EXTERNAL ZAXPY, ZGEMM -!.....external subroutines DMD package, part 1 -! subroutines under test - EXTERNAL ZGEDMD, ZGEDMDQ -!.....external functions (BLAS and LAPACK) - EXTERNAL DLAMCH, DZNRM2 - REAL(KIND=WP) :: DLAMCH, DZNRM2 - REAL(KIND=WP) :: ZLANGE - EXTERNAL IZAMAX - INTEGER IZAMAX - EXTERNAL LSAME - LOGICAL LSAME - - INTRINSIC ABS, INT, MIN, MAX, SIGN -!............................................................ - - ! The test is always in pairs : ( ZGEDMD and ZGEDMDQ ) - ! because the test includes comparing the results (in pairs). -!..................................................................................... - TEST_QRDMD = .TRUE. ! This code by default performs tests on ZGEDMDQ - ! Since the QR factorizations based algorithm is designed for - ! single trajectory data, only single trajectory tests will - ! be performed with xGEDMDQ. - WANTQ = 'Q' - WANTR = 'R' -!................................................................................. - - EPS = DLAMCH( 'P' ) ! machine precision DP - - ! Global counters of failures of some particular tests - NFAIL = 0 - NFAIL_REZ = 0 - NFAIL_REZQ = 0 - NFAIL_Z_XV = 0 - NFAIL_F_QR = 0 - NFAIL_AU = 0 - NFAIL_SVDIFF = 0 - NFAIL_TOTAL = 0 - NFAILQ_TOTAL = 0 - - DO LLOOP = 1, 4 - - WRITE(*,*) 'L Loop Index = ', LLOOP - - ! Set the dimensions of the problem ... - WRITE(*,*) 'M = ' - READ(*,*) M - WRITE(*,*) M - ! ... and the number of snapshots. - WRITE(*,*) 'N = ' - READ(*,*) N - WRITE(*,*) N - - ! ... Test the dimensions - IF ( ( MIN(M,N) == 0 ) .OR. ( M < N ) ) THEN - WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.' - STOP - END IF -!............. - ! The seed inside the LLOOP so that each pass can be reproduced easily. - ISEED(1) = 4 - ISEED(2) = 3 - ISEED(3) = 2 - ISEED(4) = 1 - - LDA = M - LDF = M - LDX = M - LDY = M - LDW = N - LDZ = M - LDAU = M - LDS = N - - TMP_ZXW = ZERO - TMP_AU = ZERO - TMP_REZ = ZERO - TMP_REZQ = ZERO - SVDIFF = ZERO - TMP_EX = ZERO - - ALLOCATE( ZA(LDA,M) ) - ALLOCATE( ZAC(LDA,M) ) - ALLOCATE( ZF(LDF,N+1) ) - ALLOCATE( ZF0(LDF,N+1) ) - ALLOCATE( ZF1(LDF,N+1) ) - ALLOCATE( ZX(LDX,N) ) - ALLOCATE( ZX0(LDX,N) ) - ALLOCATE( ZY(LDY,N+1) ) - ALLOCATE( ZY0(LDY,N+1) ) - ALLOCATE( ZY1(LDY,N+1) ) - ALLOCATE( ZAU(LDAU,N) ) - ALLOCATE( ZW(LDW,N) ) - ALLOCATE( ZS(LDS,N) ) - ALLOCATE( ZZ(LDZ,N) ) - ALLOCATE( ZZ1(LDZ,N) ) - ALLOCATE( RES(N) ) - ALLOCATE( RES1(N) ) - ALLOCATE( RESEX(N) ) - ALLOCATE( ZEIGS(N) ) - ALLOCATE( SINGVX(N) ) - ALLOCATE( SINGVQX(N) ) - - TOL = M*EPS - ! This mimics O(M*N)*EPS bound for accumulated roundoff error. - ! The factor 10 is somewhat arbitrary. - TOL2 = 10*M*N*EPS - -!............. - - DO K_TRAJ = 1, 2 - ! Number of intial conditions in the simulation/trajectories (1 or 2) - - COND = 1.0D4 - ZMAX = (1.0D1,1.0D1) - RSIGN = 'F' - GRADE = 'N' - MODEL = 6 - CONDL = 1.0D1 - MODER = 6 - CONDR = 1.0D1 - PIVTNG = 'N' - - ! Loop over all parameter MODE values for ZLATMR (+1,..,+6) - DO MODE = 1, 6 - - ALLOCATE( IWORK(2*M) ) - ALLOCATE( ZDA(M) ) - ALLOCATE( ZDL(M) ) - ALLOCATE( ZDR(M) ) - - CALL ZLATMR( M, M, 'N', ISEED, 'N', ZDA, MODE, COND, & - ZMAX, RSIGN, GRADE, ZDL, MODEL, CONDL, & - ZDR, MODER, CONDR, PIVTNG, IWORK, M, M, & - ZERO, -ONE, 'N', ZA, LDA, IWORK(M+1), INFO ) - DEALLOCATE( ZDR ) - DEALLOCATE( ZDL ) - DEALLOCATE( ZDA ) - DEALLOCATE( IWORK ) - - LZWORK = MAX(1,2*M) - ALLOCATE( ZEIGSA(M) ) - ALLOCATE( ZWORK(LZWORK) ) - ALLOCATE( WORK(2*M) ) - ZAC(1:M,1:M) = ZA(1:M,1:M) - CALL ZGEEV( 'N','N', M, ZAC, LDA, ZEIGSA, ZDUM2X2, 2, & - ZDUM2X2, 2, ZWORK, LZWORK, WORK, INFO ) ! LAPACK CALL - DEALLOCATE(WORK) - DEALLOCATE(ZWORK) - - TMP = ABS(ZEIGSA(IZAMAX(M, ZEIGSA, 1))) ! The spectral radius of ZA - ! Scale the matrix ZA to have unit spectral radius. - CALL ZLASCL( 'G',0, 0, TMP, ONE, M, M, & - ZA, LDA, INFO ) - CALL ZLASCL( 'G',0, 0, TMP, ONE, M, 1, & - ZEIGSA, M, INFO ) - ANORM = ZLANGE( 'F', M, M, ZA, LDA, WDUMMY ) - - IF ( K_TRAJ == 2 ) THEN - ! generate data as two trajectories - ! with two inital conditions - CALL ZLARNV(2, ISEED, M, ZF(1,1) ) - DO i = 1, N/2 - CALL ZGEMV( 'N', M, M, ZONE, ZA, LDA, ZF(1,i), 1, & - ZZERO, ZF(1,i+1), 1 ) - END DO - ZX0(1:M,1:N/2) = ZF(1:M,1:N/2) - ZY0(1:M,1:N/2) = ZF(1:M,2:N/2+1) - - CALL ZLARNV(2, ISEED, M, ZF(1,1) ) - DO i = 1, N-N/2 - CALL ZGEMV( 'N', M, M, ZONE, ZA, LDA, ZF(1,i), 1, & - ZZERO, ZF(1,i+1), 1 ) - END DO - ZX0(1:M,N/2+1:N) = ZF(1:M,1:N-N/2) - ZY0(1:M,N/2+1:N) = ZF(1:M,2:N-N/2+1) - ELSE - CALL ZLARNV(2, ISEED, M, ZF(1,1) ) - DO i = 1, N - CALL ZGEMV( 'N', M, M, ZONE, ZA, M, ZF(1,i), 1, & - ZZERO, ZF(1,i+1), 1 ) - END DO - ZF0(1:M,1:N+1) = ZF(1:M,1:N+1) - ZX0(1:M,1:N) = ZF0(1:M,1:N) - ZY0(1:M,1:N) = ZF0(1:M,2:N+1) - END IF - - DEALLOCATE( ZEIGSA ) -!........................................................................ - - DO iJOBZ = 1, 4 - - SELECT CASE ( iJOBZ ) - CASE(1) - JOBZ = 'V' - RESIDS = 'R' - CASE(2) - JOBZ = 'V' - RESIDS = 'N' - CASE(3) - JOBZ = 'F' - RESIDS = 'N' - CASE(4) - JOBZ = 'N' - RESIDS = 'N' - END SELECT - - DO iJOBREF = 1, 3 - - SELECT CASE ( iJOBREF ) - CASE(1) - JOBREF = 'R' - CASE(2) - JOBREF = 'E' - CASE(3) - JOBREF = 'N' - END SELECT - - DO iSCALE = 1, 4 - - SELECT CASE ( iSCALE ) - CASE(1) - SCALE = 'S' - CASE(2) - SCALE = 'C' - CASE(3) - SCALE = 'Y' - CASE(4) - SCALE = 'N' - END SELECT - - DO iNRNK = -1, -2, -1 - NRNK = iNRNK - NRNKsp = iNRNK - - DO iWHTSVD = 1, 3 - ! Check all four options to compute the POD basis - ! via the SVD. - WHTSVD = iWHTSVD - WHTSVDsp = iWHTSVD - - DO LWMINOPT = 1, 2 - ! Workspace query for the minimal (1) and for the optimal - ! (2) workspace lengths determined by workspace query. - - ! ZGEDMD is always tested and its results are also used for - ! comparisons with ZGEDMDQ. - - ZX(1:M,1:N) = ZX0(1:M,1:N) - ZY(1:M,1:N) = ZY0(1:M,1:N) - - CALL ZGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, ZX, LDX, ZY, LDY, NRNK, TOL, & - K, ZEIGS, ZZ, LDZ, RES, ZAU, LDAU, & - ZW, LDW, ZS, LDS, ZDUMMY, -1, & - WDUMMY, -1, IDUMMY, -1, INFO ) - IF ( (INFO .EQ. 2) .OR. ( INFO .EQ. 3 ) & - .OR. ( INFO < 0 ) ) THEN - WRITE(*,*) 'Call to ZGEDMD workspace query failed. & - &Check the calling sequence and the code.' - WRITE(*,*) 'The error code is ', INFO - WRITE(*,*) 'The input parameters were ', & - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL, LDZ, LDAU, LDW, LDS - STOP - END IF - - LZWORK = INT(ZDUMMY(LWMINOPT)) - LWORK = INT(WDUMMY(1)) - LIWORK = IDUMMY(1) - - ALLOCATE(ZWORK(LZWORK)) - ALLOCATE( WORK(LWORK)) - ALLOCATE(IWORK(LIWORK)) - - CALL ZGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, ZX, LDX, ZY, LDY, NRNK, TOL, & - K, ZEIGS, ZZ, LDZ, RES, ZAU, LDAU, & - ZW, LDW, ZS, LDS, ZWORK, LZWORK, & - WORK, LWORK, IWORK, LIWORK, INFO ) - - IF ( INFO /= 0 ) THEN - WRITE(*,*) 'Call to ZGEDMD failed. & - &Check the calling sequence and the code.' - WRITE(*,*) 'The error code is ', INFO - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL - STOP - END IF - - SINGVX(1:N) = WORK(1:N) - - !...... ZGEDMD check point - IF ( LSAME(JOBZ,'V') ) THEN - ! Check that Z = X*W, on return from ZGEDMD - ! This checks that the returned eigenvectors in Z are - ! the product of the SVD'POD basis returned in X - ! and the eigenvectors of the rayleigh quotient - ! returned in W - CALL ZGEMM( 'N', 'N', M, K, K, ZONE, ZX, LDX, ZW, LDW, & - ZZERO, ZZ1, LDZ ) - TMP = ZERO - DO i = 1, K - CALL ZAXPY( M, -ZONE, ZZ(1,i), 1, ZZ1(1,i), 1) - TMP = MAX(TMP, DZNRM2( M, ZZ1(1,i), 1 ) ) - END DO - TMP_ZXW = MAX(TMP_ZXW, TMP ) - IF ( TMP_ZXW <= 10*M*EPS ) THEN - !WRITE(*,*) ' :) .... OK .........ZGEDMD PASSED.' - ELSE - NFAIL_Z_XV = NFAIL_Z_XV + 1 - WRITE(*,*) ':( .................ZGEDMD FAILED!', & - 'Check the code for implementation errors.' - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL - END IF - END IF - - - !...... ZGEDMD check point - IF ( LSAME(JOBREF,'R') ) THEN - ! The matrix A*U is returned for computing refined Ritz vectors. - ! Check that A*U is computed correctly using the formula - ! A*U = Y * V * inv(SIGMA). This depends on the - ! accuracy in the computed singular values and vectors of X. - ! See the paper for an error analysis. - ! Note that the left singular vectors of the input matrix X - ! are returned in the array X. - CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZX, LDX, & - ZZERO, ZZ1, LDZ ) - TMP = ZERO - DO i = 1, K - CALL ZAXPY( M, -ZONE, ZAU(1,i), 1, ZZ1(1,i), 1) - TMP = MAX( TMP, DZNRM2( M, ZZ1(1,i),1 ) * & - SINGVX(K)/(ANORM*SINGVX(1)) ) - END DO - TMP_AU = MAX( TMP_AU, TMP ) - IF ( TMP <= TOL2 ) THEN - !WRITE(*,*) ':) .... OK .........ZGEDMD PASSED.' - ELSE - NFAIL_AU = NFAIL_AU + 1 - WRITE(*,*) ':( .................ZGEDMD FAILED!', & - 'Check the code for implementation errors.' - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL - END IF - ELSEIF ( LSAME(JOBREF,'E') ) THEN - ! The unscaled vectors of the Exact DMD are computed. - ! This option is included for the sake of completeness, - ! for users who prefer the Exact DMD vectors. The - ! returned vectors are in the real form, in the same way - ! as the Ritz vectors. Here we just save the vectors - ! and test them separately using a Matlab script. - - - CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZAU, LDAU, ZZERO, ZY1, LDY ) - - DO i=1, K - ! have a real eigenvalue with real eigenvector - CALL ZAXPY( M, -ZEIGS(i), ZAU(1,i), 1, ZY1(1,i), 1 ) - RESEX(i) = DZNRM2( M, ZY1(1,i), 1) / DZNRM2(M,ZAU(1,i),1) - END DO - END IF - !...... ZGEDMD check point - - IF ( LSAME(RESIDS, 'R') ) THEN - ! Compare the residuals returned by ZGEDMD with the - ! explicitly computed residuals using the matrix A. - ! Compute explicitly Y1 = A*Z - CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZZ, LDZ, ZZERO, ZY1, LDY ) - ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms - ! of the invariant subspaces that correspond to complex conjugate - ! pairs of eigencalues. (See the description of Z in ZGEDMD,) - - DO i=1, K - ! have a real eigenvalue with real eigenvector - CALL ZAXPY( M, -ZEIGS(i), ZZ(1,i), 1, ZY1(1,i), 1 ) - RES1(i) = DZNRM2( M, ZY1(1,i), 1) - END DO - TMP = ZERO - DO i = 1, K - TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & - SINGVX(K)/(ANORM*SINGVX(1)) ) - END DO - TMP_REZ = MAX( TMP_REZ, TMP ) - IF ( TMP <= TOL2 ) THEN - !WRITE(*,*) ':) .... OK ..........ZGEDMD PASSED.' - ELSE - NFAIL_REZ = NFAIL_REZ + 1 - WRITE(*,*) ':( ..................ZGEDMD FAILED!', & - 'Check the code for implementation errors.' - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL - END IF - - - IF ( LSAME(JOBREF,'E') ) THEN - TMP = ZERO - DO i = 1, K - TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) ) - END DO - TMP_EX = MAX(TMP_EX,TMP) - END IF - - END IF - - DEALLOCATE(ZWORK) - DEALLOCATE(WORK) - DEALLOCATE(IWORK) - - IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN - - ZF(1:M,1:N+1) = ZF0(1:M,1:N+1) - - CALL ZGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & - WHTSVD, M, N+1, ZF, LDF, ZX, LDX, ZY, LDY, & - NRNK, TOL, K, ZEIGS, ZZ, LDZ, RES, ZAU, & - LDAU, ZW, LDW, ZS, LDS, ZDUMMY, -1, & - WDUMMY, -1, IDUMMY, -1, INFO ) - - LZWORK = INT(ZDUMMY(LWMINOPT)) - ALLOCATE( ZWORK(LZWORK) ) - LIWORK = IDUMMY(1) - ALLOCATE(IWORK(LIWORK)) - LWORK = INT(WDUMMY(1)) - ALLOCATE(WORK(LWORK)) - - CALL ZGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & - WHTSVD, M, N+1, ZF, LDF, ZX, LDX, ZY, LDY, & - NRNK, TOL, KQ, ZEIGS, ZZ, LDZ, RES, ZAU, & - LDAU, ZW, LDW, ZS, LDS, ZWORK, LZWORK, & - WORK, LWORK, IWORK, LIWORK, INFO ) - - IF ( INFO /= 0 ) THEN - WRITE(*,*) 'Call to ZGEDMDQ failed. & - &Check the calling sequence and the code.' - WRITE(*,*) 'The error code is ', INFO - WRITE(*,*) 'The input parameters were ',& - SCALE, JOBZ, RESIDS, WANTQ, WANTR, WHTSVD, & - M, N, LDX, LDY, NRNK, TOL - STOP - END IF - SINGVQX(1:N) = WORK(1:N) - - !..... ZGEDMDQ check point - - IF ( 1 == 0 ) THEN - ! Comparison of ZGEDMD and ZGEDMDQ singular values disabled - TMP = ZERO - DO i = 1, MIN(K, KQ) - TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / & - SINGVX(1) ) - END DO - SVDIFF = MAX( SVDIFF, TMP ) - IF ( TMP > M*N*EPS ) THEN - WRITE(*,*) 'FAILED! Something was wrong with the run.' - NFAIL_SVDIFF = NFAIL_SVDIFF + 1 - DO j =1, 3 - write(*,*) j, SINGVX(j), SINGVQX(j) - read(*,*) - END DO - - END IF - END IF - - !..... ZGEDMDQ check point - IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN - ! Check that the QR factors are computed and returned - ! as requested. The residual ||F-Q*R||_F / ||F||_F - ! is compared to M*N*EPS. - ZF1(1:M,1:N+1) = ZF0(1:M,1:N+1) - CALL ZGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ZONE, ZF, & - LDF, ZY, LDY, ZONE, ZF1, LDF ) - TMP_FQR = ZLANGE( 'F', M, N+1, ZF1, LDF, WORK ) / & - ZLANGE( 'F', M, N+1, ZF0, LDF, WORK ) - IF ( TMP_FQR > TOL2 ) THEN - WRITE(*,*) 'FAILED! Something was wrong with the run.' - NFAIL_F_QR = NFAIL_F_QR + 1 - ELSE - !WRITE(*,*) '........ PASSED.' - END IF - END IF - - !..... ZGEDMDQ check point - IF ( LSAME(RESIDS, 'R') ) THEN - ! Compare the residuals returned by ZGEDMDQ with the - ! explicitly computed residuals using the matrix A. - ! Compute explicitly Y1 = A*Z - CALL ZGEMM( 'N', 'N', M, KQ, M, ZONE, ZA, LDA, ZZ, LDZ, ZZERO, ZY1, LDY ) - ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms - ! of the invariant subspaces that correspond to complex conjugate - ! pairs of eigencalues. (See the description of Z in ZGEDMDQ) - - DO i=1, KQ - ! have a real eigenvalue with real eigenvector - CALL ZAXPY( M, -ZEIGS(i), ZZ(1,i), 1, ZY1(1,i), 1 ) - ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i) - RES1(i) = DZNRM2( M, ZY1(1,i), 1) - END DO - TMP = ZERO - DO i = 1, KQ - TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & - SINGVQX(KQ)/(ANORM*SINGVQX(1)) ) - END DO - TMP_REZQ = MAX( TMP_REZQ, TMP ) - IF ( TMP <= TOL2 ) THEN - !WRITE(*,*) '.... OK ........ ZGEDMDQ PASSED.' - ELSE - NFAIL_REZQ = NFAIL_REZQ + 1 - WRITE(*,*) '................ ZGEDMDQ FAILED!', & - 'Check the code for implementation errors.' - STOP - END IF - - END IF - - DEALLOCATE( ZWORK ) - DEALLOCATE( WORK ) - DEALLOCATE( IWORK ) - - END IF ! ZGEDMDQ - -!....................................................................................................... - - END DO ! LWMINOPT - !write(*,*) 'LWMINOPT loop completed' - END DO ! iWHTSVD - !write(*,*) 'WHTSVD loop completed' - END DO ! iNRNK -2:-1 - !write(*,*) 'NRNK loop completed' - END DO ! iSCALE 1:4 - !write(*,*) 'SCALE loop completed' - END DO - !write(*,*) 'JOBREF loop completed' - END DO ! iJOBZ - !write(*,*) 'JOBZ loop completed' - - END DO ! MODE -6:6 - !write(*,*) 'MODE loop completed' - END DO ! 1 or 2 trajectories - !write(*,*) 'trajectories loop completed' - - DEALLOCATE( ZA ) - DEALLOCATE( ZAC ) - DEALLOCATE( ZZ ) - DEALLOCATE( ZF ) - DEALLOCATE( ZF0 ) - DEALLOCATE( ZF1 ) - DEALLOCATE( ZX ) - DEALLOCATE( ZX0 ) - DEALLOCATE( ZY ) - DEALLOCATE( ZY0 ) - DEALLOCATE( ZY1 ) - DEALLOCATE( ZAU ) - DEALLOCATE( ZW ) - DEALLOCATE( ZS ) - DEALLOCATE( ZZ1 ) - DEALLOCATE( RES ) - DEALLOCATE( RES1 ) - DEALLOCATE( RESEX ) - DEALLOCATE( ZEIGS ) - DEALLOCATE( SINGVX ) - DEALLOCATE( SINGVQX ) - - END DO ! LLOOP - - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) ' Test summary for ZGEDMD :' - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) - IF ( NFAIL_Z_XV == 0 ) THEN - WRITE(*,*) '>>>> Z - U*V test PASSED.' - ELSE - WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)' - WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW - NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV - END IF - IF ( NFAIL_AU == 0 ) THEN - WRITE(*,*) '>>>> A*U test PASSED. ' - ELSE - WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)' - WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU - END IF - - IF ( NFAIL_REZ == 0 ) THEN - WRITE(*,*) '>>>> Rezidual computation test PASSED.' - ELSE - WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)' - WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ - END IF - - IF ( NFAIL_TOTAL == 0 ) THEN - WRITE(*,*) '>>>> ZGEDMD :: ALL TESTS PASSED.' - ELSE - WRITE(*,*) NFAIL_TOTAL, 'FAILURES!' - WRITE(*,*) '>>>>>>>>>>>>>> ZGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.' - END IF - - IF ( TEST_QRDMD ) THEN - WRITE(*,*) - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) ' Test summary for ZGEDMDQ :' - WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' - WRITE(*,*) - - IF ( NFAIL_SVDIFF == 0 ) THEN - WRITE(*,*) '>>>> ZGEDMD and ZGEDMDQ computed singular & - &values test PASSED.' - ELSE - WRITE(*,*) 'ZGEDMD and ZGEDMDQ discrepancies in & - &the singular values unacceptable ', & - NFAIL_SVDIFF, ' times. Test FAILED.' - WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF - END IF - - IF ( NFAIL_F_QR == 0 ) THEN - WRITE(*,*) '>>>> F - Q*R test PASSED.' - ELSE - WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)' - WRITE(*,*) 'The largest relative residual was ', TMP_FQR - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR - END IF - - IF ( NFAIL_REZQ == 0 ) THEN - WRITE(*,*) '>>>> Rezidual computation test PASSED.' - ELSE - WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)' - WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ - WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS - NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ - END IF - - IF ( NFAILQ_TOTAL == 0 ) THEN - WRITE(*,*) '>>>>>>> ZGEDMDQ :: ALL TESTS PASSED.' - ELSE - WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!' - WRITE(*,*) '>>>>>>> ZGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.' - END IF - - END IF - - WRITE(*,*) - WRITE(*,*) 'Test completed.' - STOP - END +! This is a test program for checking the implementations of +! the implementations of the following subroutines +! +! ZGEDMD, for computation of the +! Dynamic Mode Decomposition (DMD) +! ZGEDMDQ, for computation of a +! QR factorization based compressed DMD +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! ======================================================== +! How to run the code (compiler, link info) +! ======================================================== +! Compile as FORTRAN 90 (or later) and link with BLAS and +! LAPACK libraries. +! NOTE: The code is developed and tested on top of the +! Intel MKL library (versions 2022.0.3 and 2022.2.0), +! using the Intel Fortran compiler. +! +! For developers of the C++ implementation +! ======================================================== +! See the LAPACK++ and Template Numerical Toolkit (TNT) +! +! Note on a development of the GPU HP implementation +! ======================================================== +! Work in progress. See CUDA, MAGMA, SLATE. +! NOTE: The four SVD subroutines used in this code are +! included as a part of R&D and for the completeness. +! This was also an opportunity to test those SVD codes. +! If the scaling option is used all four are essentially +! equally good. For implementations on HP platforms, +! one can use whichever SVD is available. +!............................................................ + +!............................................................ +!............................................................ +! + PROGRAM DMD_TEST + use iso_fortran_env, only: real64 + IMPLICIT NONE + integer, parameter :: WP = real64 + +!............................................................ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + + COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) + COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP ) +!............................................................ + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: RES, & + RES1, RESEX, SINGVX, SINGVQX, WORK + INTEGER , ALLOCATABLE, DIMENSION(:) :: IWORK + REAL(KIND=WP) :: WDUMMY(2) + INTEGER :: IDUMMY(4), ISEED(4) + REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, EPS, & + TOL, TOL2, SVDIFF, TMP, TMP_AU, & + TMP_FQR, TMP_REZ, TMP_REZQ, TMP_ZXW, & + TMP_EX + +!............................................................ + COMPLEX(KIND=WP) :: ZMAX + INTEGER :: LZWORK + COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:,:) :: ZA, ZAC, & + ZAU, ZF, ZF0, ZF1, ZS, ZW, & + ZX, ZX0, ZY, ZY0, ZY1, ZZ, ZZ1 + COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:) :: ZDA, ZDR, & + ZDL, ZEIGS, ZEIGSA, ZWORK + COMPLEX(KIND=WP) :: ZDUMMY(22), ZDUM2X2(2,2) +!............................................................ + INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY, & + LDZ, LIWORK, LWORK, M, N, LLOOP, NRNK, NRNKsp + INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j, & + NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ, & + NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, & + NFAIL_Z_XV, MODE, MODEL, MODER, WHTSVD, & + WHTSVDsp + INTEGER :: iNRNK, iWHTSVD, K_TRAJ, LWMINOPT + CHARACTER :: GRADE, JOBREF, JOBZ, PIVTNG, RSIGN, & + SCALE, RESIDS, WANTQ, WANTR + LOGICAL :: TEST_QRDMD + +!.....external subroutines (BLAS and LAPACK) + EXTERNAL DAXPY, DGEEV, DGEMM, DGEMV, DLACPY, DLASCL + EXTERNAL ZGEEV, ZGEMV, ZLASCL + EXTERNAL ZLARNV, ZLATMR, XLAENV + EXTERNAL ZAXPY, ZGEMM +!.....external subroutines DMD package, part 1 +! subroutines under test + EXTERNAL ZGEDMD, ZGEDMDQ +!.....external functions (BLAS and LAPACK) + EXTERNAL DLAMCH, DZNRM2 + REAL(KIND=WP) :: DLAMCH, DZNRM2 + REAL(KIND=WP) :: ZLANGE + EXTERNAL IZAMAX + INTEGER IZAMAX + EXTERNAL LSAME + LOGICAL LSAME + + INTRINSIC ABS, INT, MIN, MAX, SIGN +!............................................................ + + ! The test is always in pairs : ( ZGEDMD and ZGEDMDQ ) + ! because the test includes comparing the results (in pairs). +!..................................................................................... + TEST_QRDMD = .TRUE. ! This code by default performs tests on ZGEDMDQ + ! Since the QR factorizations based algorithm is designed for + ! single trajectory data, only single trajectory tests will + ! be performed with xGEDMDQ. + WANTQ = 'Q' + WANTR = 'R' + ! Initialize the divide-and-conquer cutoff used by xGESDD/xBDSDC. + CALL XLAENV( 9, 25 ) +!................................................................................. + + EPS = DLAMCH( 'P' ) ! machine precision DP + + ! Global counters of failures of some particular tests + NFAIL = 0 + NFAIL_REZ = 0 + NFAIL_REZQ = 0 + NFAIL_Z_XV = 0 + NFAIL_F_QR = 0 + NFAIL_AU = 0 + NFAIL_SVDIFF = 0 + NFAIL_TOTAL = 0 + NFAILQ_TOTAL = 0 + + DO LLOOP = 1, 4 + + WRITE(*,*) 'L Loop Index = ', LLOOP + + ! Set the dimensions of the problem ... + WRITE(*,*) 'M = ' + READ(*,*) M + WRITE(*,*) M + ! ... and the number of snapshots. + WRITE(*,*) 'N = ' + READ(*,*) N + WRITE(*,*) N + + ! ... Test the dimensions + IF ( ( MIN(M,N) == 0 ) .OR. ( M < N ) ) THEN + WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.' + STOP + END IF +!............. + ! The seed inside the LLOOP so that each pass can be reproduced easily. + ISEED(1) = 4 + ISEED(2) = 3 + ISEED(3) = 2 + ISEED(4) = 1 + + LDA = M + LDF = M + LDX = M + LDY = M + LDW = N + LDZ = M + LDAU = M + LDS = N + + TMP_ZXW = ZERO + TMP_AU = ZERO + TMP_REZ = ZERO + TMP_REZQ = ZERO + SVDIFF = ZERO + TMP_EX = ZERO + + ALLOCATE( ZA(LDA,M) ) + ALLOCATE( ZAC(LDA,M) ) + ALLOCATE( ZF(LDF,N+1) ) + ALLOCATE( ZF0(LDF,N+1) ) + ALLOCATE( ZF1(LDF,N+1) ) + ALLOCATE( ZX(LDX,N) ) + ALLOCATE( ZX0(LDX,N) ) + ALLOCATE( ZY(LDY,N+1) ) + ALLOCATE( ZY0(LDY,N+1) ) + ALLOCATE( ZY1(LDY,N+1) ) + ALLOCATE( ZAU(LDAU,N) ) + ALLOCATE( ZW(LDW,N) ) + ALLOCATE( ZS(LDS,N) ) + ALLOCATE( ZZ(LDZ,N) ) + ALLOCATE( ZZ1(LDZ,N) ) + ALLOCATE( RES(N) ) + ALLOCATE( RES1(N) ) + ALLOCATE( RESEX(N) ) + ALLOCATE( ZEIGS(N) ) + ALLOCATE( SINGVX(N) ) + ALLOCATE( SINGVQX(N) ) + + TOL = M*EPS + ! This mimics O(M*N)*EPS bound for accumulated roundoff error. + ! The factor 10 is somewhat arbitrary. + TOL2 = 10*M*N*EPS + +!............. + + DO K_TRAJ = 1, 2 + ! Number of initial conditions in the simulation/trajectories (1 or 2) + + COND = 1.0D4 + ZMAX = (1.0D1,1.0D1) + RSIGN = 'F' + GRADE = 'N' + MODEL = 6 + CONDL = 1.0D1 + MODER = 6 + CONDR = 1.0D1 + PIVTNG = 'N' + + ! Loop over all parameter MODE values for ZLATMR (+1,..,+6) + DO MODE = 1, 6 + + ALLOCATE( IWORK(2*M) ) + ALLOCATE( ZDA(M) ) + ALLOCATE( ZDL(M) ) + ALLOCATE( ZDR(M) ) + + CALL ZLATMR( M, M, 'N', ISEED, 'N', ZDA, MODE, COND, & + ZMAX, RSIGN, GRADE, ZDL, MODEL, CONDL, & + ZDR, MODER, CONDR, PIVTNG, IWORK, M, M, & + ZERO, -ONE, 'N', ZA, LDA, IWORK(M+1), INFO ) + DEALLOCATE( ZDR ) + DEALLOCATE( ZDL ) + DEALLOCATE( ZDA ) + DEALLOCATE( IWORK ) + + LZWORK = MAX(1,2*M) + ALLOCATE( ZEIGSA(M) ) + ALLOCATE( ZWORK(LZWORK) ) + ALLOCATE( WORK(2*M) ) + ZAC(1:M,1:M) = ZA(1:M,1:M) + CALL ZGEEV( 'N','N', M, ZAC, LDA, ZEIGSA, ZDUM2X2, 2, & + ZDUM2X2, 2, ZWORK, LZWORK, WORK, INFO ) ! LAPACK CALL + DEALLOCATE(WORK) + DEALLOCATE(ZWORK) + + TMP = ABS(ZEIGSA(IZAMAX(M, ZEIGSA, 1))) ! The spectral radius of ZA + ! Scale the matrix ZA to have unit spectral radius. + CALL ZLASCL( 'G',0, 0, TMP, ONE, M, M, & + ZA, LDA, INFO ) + CALL ZLASCL( 'G',0, 0, TMP, ONE, M, 1, & + ZEIGSA, M, INFO ) + ANORM = ZLANGE( 'F', M, M, ZA, LDA, WDUMMY ) + + IF ( K_TRAJ == 2 ) THEN + ! generate data as two trajectories + ! with two initial conditions + CALL ZLARNV(2, ISEED, M, ZF(1,1) ) + DO i = 1, N/2 + CALL ZGEMV( 'N', M, M, ZONE, ZA, LDA, ZF(1,i), 1, & + ZZERO, ZF(1,i+1), 1 ) + END DO + ZX0(1:M,1:N/2) = ZF(1:M,1:N/2) + ZY0(1:M,1:N/2) = ZF(1:M,2:N/2+1) + + CALL ZLARNV(2, ISEED, M, ZF(1,1) ) + DO i = 1, N-N/2 + CALL ZGEMV( 'N', M, M, ZONE, ZA, LDA, ZF(1,i), 1, & + ZZERO, ZF(1,i+1), 1 ) + END DO + ZX0(1:M,N/2+1:N) = ZF(1:M,1:N-N/2) + ZY0(1:M,N/2+1:N) = ZF(1:M,2:N-N/2+1) + ELSE + CALL ZLARNV(2, ISEED, M, ZF(1,1) ) + DO i = 1, N + CALL ZGEMV( 'N', M, M, ZONE, ZA, M, ZF(1,i), 1, & + ZZERO, ZF(1,i+1), 1 ) + END DO + ZF0(1:M,1:N+1) = ZF(1:M,1:N+1) + ZX0(1:M,1:N) = ZF0(1:M,1:N) + ZY0(1:M,1:N) = ZF0(1:M,2:N+1) + END IF + + DEALLOCATE( ZEIGSA ) +!........................................................................ + + DO iJOBZ = 1, 4 + + SELECT CASE ( iJOBZ ) + CASE(1) + JOBZ = 'V' + RESIDS = 'R' + CASE(2) + JOBZ = 'V' + RESIDS = 'N' + CASE(3) + JOBZ = 'F' + RESIDS = 'N' + CASE(4) + JOBZ = 'N' + RESIDS = 'N' + END SELECT + + DO iJOBREF = 1, 3 + + SELECT CASE ( iJOBREF ) + CASE(1) + JOBREF = 'R' + CASE(2) + JOBREF = 'E' + CASE(3) + JOBREF = 'N' + END SELECT + + DO iSCALE = 1, 4 + + SELECT CASE ( iSCALE ) + CASE(1) + SCALE = 'S' + CASE(2) + SCALE = 'C' + CASE(3) + SCALE = 'Y' + CASE(4) + SCALE = 'N' + END SELECT + + DO iNRNK = -1, -2, -1 + NRNK = iNRNK + NRNKsp = iNRNK + + DO iWHTSVD = 1, 3 + ! Check all four options to compute the POD basis + ! via the SVD. + WHTSVD = iWHTSVD + WHTSVDsp = iWHTSVD + + DO LWMINOPT = 1, 2 + ! Workspace query for the minimal (1) and for the optimal + ! (2) workspace lengths determined by workspace query. + + ! ZGEDMD is always tested and its results are also used for + ! comparisons with ZGEDMDQ. + + ZX(1:M,1:N) = ZX0(1:M,1:N) + ZY(1:M,1:N) = ZY0(1:M,1:N) + + CALL ZGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, ZX, LDX, ZY, LDY, NRNK, TOL, & + K, ZEIGS, ZZ, LDZ, RES, ZAU, LDAU, & + ZW, LDW, ZS, LDS, ZDUMMY, -1, & + WDUMMY, -1, IDUMMY, -1, INFO ) + IF ( (INFO .EQ. 2) .OR. ( INFO .EQ. 3 ) & + .OR. ( INFO < 0 ) ) THEN + WRITE(*,*) 'Call to ZGEDMD workspace query failed. & + &Check the calling sequence and the code.' + WRITE(*,*) 'The error code is ', INFO + WRITE(*,*) 'The input parameters were ', & + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL, LDZ, LDAU, LDW, LDS + STOP + END IF + + LZWORK = INT(ZDUMMY(LWMINOPT)) + LWORK = INT(WDUMMY(1)) + LIWORK = IDUMMY(1) + + ALLOCATE(ZWORK(LZWORK)) + ALLOCATE( WORK(LWORK)) + ALLOCATE(IWORK(LIWORK)) + + CALL ZGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, ZX, LDX, ZY, LDY, NRNK, TOL, & + K, ZEIGS, ZZ, LDZ, RES, ZAU, LDAU, & + ZW, LDW, ZS, LDS, ZWORK, LZWORK, & + WORK, LWORK, IWORK, LIWORK, INFO ) + + IF ( INFO /= 0 ) THEN + WRITE(*,*) 'Call to ZGEDMD failed. & + &Check the calling sequence and the code.' + WRITE(*,*) 'The error code is ', INFO + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + STOP + END IF + + SINGVX(1:N) = WORK(1:N) + + !...... ZGEDMD check point + IF ( LSAME(JOBZ,'V') ) THEN + ! Check that Z = X*W, on return from ZGEDMD + ! This checks that the returned eigenvectors in Z are + ! the product of the SVD'POD basis returned in X + ! and the eigenvectors of the Rayleigh quotient + ! returned in W + CALL ZGEMM( 'N', 'N', M, K, K, ZONE, ZX, LDX, ZW, LDW, & + ZZERO, ZZ1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL ZAXPY( M, -ZONE, ZZ(1,i), 1, ZZ1(1,i), 1) + TMP = MAX(TMP, DZNRM2( M, ZZ1(1,i), 1 ) ) + END DO + TMP_ZXW = MAX(TMP_ZXW, TMP ) + IF ( TMP_ZXW <= 10*M*EPS ) THEN + !WRITE(*,*) ' :) .... OK .........ZGEDMD PASSED.' + ELSE + NFAIL_Z_XV = NFAIL_Z_XV + 1 + WRITE(*,*) ':( .................ZGEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + END IF + + + !...... ZGEDMD check point + IF ( LSAME(JOBREF,'R') ) THEN + ! The matrix A*U is returned for computing refined Ritz vectors. + ! Check that A*U is computed correctly using the formula + ! A*U = Y * V * inv(SIGMA). This depends on the + ! accuracy in the computed singular values and vectors of X. + ! See the paper for an error analysis. + ! Note that the left singular vectors of the input matrix X + ! are returned in the array X. + CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZX, LDX, & + ZZERO, ZZ1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL ZAXPY( M, -ZONE, ZAU(1,i), 1, ZZ1(1,i), 1) + TMP = MAX( TMP, DZNRM2( M, ZZ1(1,i),1 ) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_AU = MAX( TMP_AU, TMP ) + IF ( TMP <= TOL2 ) THEN + !WRITE(*,*) ':) .... OK .........ZGEDMD PASSED.' + ELSE + NFAIL_AU = NFAIL_AU + 1 + WRITE(*,*) ':( .................ZGEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + ELSEIF ( LSAME(JOBREF,'E') ) THEN + ! The unscaled vectors of the Exact DMD are computed. + ! This option is included for the sake of completeness, + ! for users who prefer the Exact DMD vectors. The + ! returned vectors are in the real form, in the same way + ! as the Ritz vectors. Here we just save the vectors + ! and test them separately using a Matlab script. + + + CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZAU, LDAU, ZZERO, ZY1, LDY ) + + DO i=1, K + ! have a real eigenvalue with real eigenvector + CALL ZAXPY( M, -ZEIGS(i), ZAU(1,i), 1, ZY1(1,i), 1 ) + RESEX(i) = DZNRM2( M, ZY1(1,i), 1) / DZNRM2(M,ZAU(1,i),1) + END DO + END IF + !...... ZGEDMD check point + + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by ZGEDMD with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZZ, LDZ, ZZERO, ZY1, LDY ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigenvalues. (See the description of Z in ZGEDMD,) + + DO i=1, K + ! have a real eigenvalue with real eigenvector + CALL ZAXPY( M, -ZEIGS(i), ZZ(1,i), 1, ZY1(1,i), 1 ) + RES1(i) = DZNRM2( M, ZY1(1,i), 1) + END DO + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_REZ = MAX( TMP_REZ, TMP ) + IF ( TMP <= TOL2 ) THEN + !WRITE(*,*) ':) .... OK ..........ZGEDMD PASSED.' + ELSE + NFAIL_REZ = NFAIL_REZ + 1 + WRITE(*,*) ':( ..................ZGEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + + + IF ( LSAME(JOBREF,'E') ) THEN + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) ) + END DO + TMP_EX = MAX(TMP_EX,TMP) + END IF + + END IF + + DEALLOCATE(ZWORK) + DEALLOCATE(WORK) + DEALLOCATE(IWORK) + + IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN + + ZF(1:M,1:N+1) = ZF0(1:M,1:N+1) + + CALL ZGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & + WHTSVD, M, N+1, ZF, LDF, ZX, LDX, ZY, LDY, & + NRNK, TOL, K, ZEIGS, ZZ, LDZ, RES, ZAU, & + LDAU, ZW, LDW, ZS, LDS, ZDUMMY, -1, & + WDUMMY, -1, IDUMMY, -1, INFO ) + + LZWORK = INT(ZDUMMY(LWMINOPT)) + ALLOCATE( ZWORK(LZWORK) ) + LIWORK = IDUMMY(1) + ALLOCATE(IWORK(LIWORK)) + LWORK = INT(WDUMMY(1)) + ALLOCATE(WORK(LWORK)) + + CALL ZGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & + WHTSVD, M, N+1, ZF, LDF, ZX, LDX, ZY, LDY, & + NRNK, TOL, KQ, ZEIGS, ZZ, LDZ, RES, ZAU, & + LDAU, ZW, LDW, ZS, LDS, ZWORK, LZWORK, & + WORK, LWORK, IWORK, LIWORK, INFO ) + + IF ( INFO /= 0 ) THEN + WRITE(*,*) 'Call to ZGEDMDQ failed. & + &Check the calling sequence and the code.' + WRITE(*,*) 'The error code is ', INFO + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, WANTQ, WANTR, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + STOP + END IF + SINGVQX(1:N) = WORK(1:N) + + !..... ZGEDMDQ check point + + IF ( 1 == 0 ) THEN + ! Comparison of ZGEDMD and ZGEDMDQ singular values disabled + TMP = ZERO + DO i = 1, MIN(K, KQ) + TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / & + SINGVX(1) ) + END DO + SVDIFF = MAX( SVDIFF, TMP ) + IF ( TMP > M*N*EPS ) THEN + WRITE(*,*) 'FAILED! Something was wrong with the run.' + NFAIL_SVDIFF = NFAIL_SVDIFF + 1 + DO j =1, 3 + write(*,*) j, SINGVX(j), SINGVQX(j) + read(*,*) + END DO + + END IF + END IF + + !..... ZGEDMDQ check point + IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN + ! Check that the QR factors are computed and returned + ! as requested. The residual ||F-Q*R||_F / ||F||_F + ! is compared to M*N*EPS. + ZF1(1:M,1:N+1) = ZF0(1:M,1:N+1) + CALL ZGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ZONE, ZF, & + LDF, ZY, LDY, ZONE, ZF1, LDF ) + TMP_FQR = ZLANGE( 'F', M, N+1, ZF1, LDF, WORK ) / & + ZLANGE( 'F', M, N+1, ZF0, LDF, WORK ) + IF ( TMP_FQR > TOL2 ) THEN + WRITE(*,*) 'FAILED! Something was wrong with the run.' + NFAIL_F_QR = NFAIL_F_QR + 1 + ELSE + !WRITE(*,*) '........ PASSED.' + END IF + END IF + + !..... ZGEDMDQ check point + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by ZGEDMDQ with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL ZGEMM( 'N', 'N', M, KQ, M, ZONE, ZA, LDA, ZZ, LDZ, ZZERO, ZY1, LDY ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigenvalues. (See the description of Z in ZGEDMDQ) + + DO i=1, KQ + ! have a real eigenvalue with real eigenvector + CALL ZAXPY( M, -ZEIGS(i), ZZ(1,i), 1, ZY1(1,i), 1 ) + ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i) + RES1(i) = DZNRM2( M, ZY1(1,i), 1) + END DO + TMP = ZERO + DO i = 1, KQ + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVQX(KQ)/(ANORM*SINGVQX(1)) ) + END DO + TMP_REZQ = MAX( TMP_REZQ, TMP ) + IF ( TMP <= TOL2 ) THEN + !WRITE(*,*) '.... OK ........ ZGEDMDQ PASSED.' + ELSE + NFAIL_REZQ = NFAIL_REZQ + 1 + WRITE(*,*) '................ ZGEDMDQ FAILED!', & + 'Check the code for implementation errors.' + STOP + END IF + + END IF + + DEALLOCATE( ZWORK ) + DEALLOCATE( WORK ) + DEALLOCATE( IWORK ) + + END IF ! ZGEDMDQ + +!....................................................................................................... + + END DO ! LWMINOPT + !write(*,*) 'LWMINOPT loop completed' + END DO ! iWHTSVD + !write(*,*) 'WHTSVD loop completed' + END DO ! iNRNK -2:-1 + !write(*,*) 'NRNK loop completed' + END DO ! iSCALE 1:4 + !write(*,*) 'SCALE loop completed' + END DO + !write(*,*) 'JOBREF loop completed' + END DO ! iJOBZ + !write(*,*) 'JOBZ loop completed' + + END DO ! MODE -6:6 + !write(*,*) 'MODE loop completed' + END DO ! 1 or 2 trajectories + !write(*,*) 'trajectories loop completed' + + DEALLOCATE( ZA ) + DEALLOCATE( ZAC ) + DEALLOCATE( ZZ ) + DEALLOCATE( ZF ) + DEALLOCATE( ZF0 ) + DEALLOCATE( ZF1 ) + DEALLOCATE( ZX ) + DEALLOCATE( ZX0 ) + DEALLOCATE( ZY ) + DEALLOCATE( ZY0 ) + DEALLOCATE( ZY1 ) + DEALLOCATE( ZAU ) + DEALLOCATE( ZW ) + DEALLOCATE( ZS ) + DEALLOCATE( ZZ1 ) + DEALLOCATE( RES ) + DEALLOCATE( RES1 ) + DEALLOCATE( RESEX ) + DEALLOCATE( ZEIGS ) + DEALLOCATE( SINGVX ) + DEALLOCATE( SINGVQX ) + + END DO ! LLOOP + + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for ZGEDMD :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + IF ( NFAIL_Z_XV == 0 ) THEN + WRITE(*,*) '>>>> Z - U*V test PASSED.' + ELSE + WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)' + WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV + END IF + IF ( NFAIL_AU == 0 ) THEN + WRITE(*,*) '>>>> A*U test PASSED. ' + ELSE + WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)' + WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU + END IF + + IF ( NFAIL_REZ == 0 ) THEN + WRITE(*,*) '>>>> Residual computation test PASSED.' + ELSE + WRITE(*,*) 'Residual computation test FAILED ', NFAIL_REZ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ + END IF + + IF ( NFAIL_TOTAL == 0 ) THEN + WRITE(*,*) '>>>> ZGEDMD :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAIL_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>>>>>>>>> ZGEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + IF ( TEST_QRDMD ) THEN + WRITE(*,*) + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for ZGEDMDQ :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + + IF ( NFAIL_SVDIFF == 0 ) THEN + WRITE(*,*) '>>>> ZGEDMD and ZGEDMDQ computed singular & + &values test PASSED.' + ELSE + WRITE(*,*) 'ZGEDMD and ZGEDMDQ discrepancies in & + &the singular values unacceptable ', & + NFAIL_SVDIFF, ' times. Test FAILED.' + WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF + END IF + + IF ( NFAIL_F_QR == 0 ) THEN + WRITE(*,*) '>>>> F - Q*R test PASSED.' + ELSE + WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)' + WRITE(*,*) 'The largest relative residual was ', TMP_FQR + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR + END IF + + IF ( NFAIL_REZQ == 0 ) THEN + WRITE(*,*) '>>>> Residual computation test PASSED.' + ELSE + WRITE(*,*) 'Residual computation test FAILED ', NFAIL_REZQ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ + END IF + + IF ( NFAILQ_TOTAL == 0 ) THEN + WRITE(*,*) '>>>>>>> ZGEDMDQ :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>> ZGEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + END IF + + WRITE(*,*) + WRITE(*,*) 'Test completed.' + STOP + END diff --git a/lapack-netlib/TESTING/EIG/zchkee.F b/lapack-netlib/TESTING/EIG/zchkee.F index b14127553b..7261881b48 100644 --- a/lapack-netlib/TESTING/EIG/zchkee.F +++ b/lapack-netlib/TESTING/EIG/zchkee.F @@ -2522,7 +2522,7 @@ PROGRAM ZCHKEE 9974 FORMAT( ' Tests of ZHBTRD', / ' (reduction of a Hermitian band ', $ 'matrix to real tridiagonal form)' ) 9973 FORMAT( / 1X, 71( '-' ) ) - 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I1, '.', I1 ) + 9972 FORMAT( / ' LAPACK VERSION ', I1, '.', I2, '.', I1 ) 9971 FORMAT( / ' Tests of the Generalized Linear Regression Model ', $ 'routines' ) 9970 FORMAT( / ' Tests of the Generalized QR and RQ routines' ) diff --git a/lapack-netlib/TESTING/EIG/zdrvsg2stg.f b/lapack-netlib/TESTING/EIG/zdrvsg2stg.f index c5ef4ce700..19419409ba 100644 --- a/lapack-netlib/TESTING/EIG/zdrvsg2stg.f +++ b/lapack-netlib/TESTING/EIG/zdrvsg2stg.f @@ -1009,7 +1009,7 @@ SUBROUTINE ZDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * CALL ZHPGVX( IBTYPE, 'V', 'A', UPLO, N, AP, BP, VL, $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, - $ RWORK, IWORK( N+1 ), IWORK, INFO ) + $ RWORK, IWORK( N+1 ), IWORK, IINFO ) IF( IINFO.NE.0 ) THEN WRITE( NOUNIT, FMT = 9999 )'ZHPGVX(V,A' // UPLO // $ ')', IINFO, N, JTYPE, IOLDSD @@ -1055,7 +1055,7 @@ SUBROUTINE ZDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, VU = ANORM CALL ZHPGVX( IBTYPE, 'V', 'V', UPLO, N, AP, BP, VL, $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, - $ RWORK, IWORK( N+1 ), IWORK, INFO ) + $ RWORK, IWORK( N+1 ), IWORK, IINFO ) IF( IINFO.NE.0 ) THEN WRITE( NOUNIT, FMT = 9999 )'ZHPGVX(V,V' // UPLO // $ ')', IINFO, N, JTYPE, IOLDSD @@ -1099,7 +1099,7 @@ SUBROUTINE ZDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * CALL ZHPGVX( IBTYPE, 'V', 'I', UPLO, N, AP, BP, VL, $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, - $ RWORK, IWORK( N+1 ), IWORK, INFO ) + $ RWORK, IWORK( N+1 ), IWORK, IINFO ) IF( IINFO.NE.0 ) THEN WRITE( NOUNIT, FMT = 9999 )'ZHPGVX(V,I' // UPLO // $ ')', IINFO, N, JTYPE, IOLDSD diff --git a/lapack-netlib/TESTING/LIN/cchkaa.F b/lapack-netlib/TESTING/LIN/cchkaa.F index 57d95c7419..a5a3428c14 100644 --- a/lapack-netlib/TESTING/LIN/cchkaa.F +++ b/lapack-netlib/TESTING/LIN/cchkaa.F @@ -1245,7 +1245,7 @@ PROGRAM CCHKAA 9995 FORMAT( ' Invalid input value: ', A4, '=', I6, '; must be <=', $ I6 ) 9994 FORMAT( ' Tests of the COMPLEX LAPACK routines ', - $ / ' LAPACK VERSION ', I1, '.', I1, '.', I1, + $ / ' LAPACK VERSION ', I1, '.', I2, '.', I1, $ / / ' The following parameter values will be used:' ) 9993 FORMAT( 4X, A4, ': ', 10I6, / 11X, 10I6 ) 9992 FORMAT( / ' Routines pass computational tests if test ratio is ', diff --git a/lapack-netlib/TESTING/LIN/cchkrfp.f b/lapack-netlib/TESTING/LIN/cchkrfp.f index 58d55c31c9..2511f8b56b 100644 --- a/lapack-netlib/TESTING/LIN/cchkrfp.f +++ b/lapack-netlib/TESTING/LIN/cchkrfp.f @@ -276,7 +276,7 @@ PROGRAM CCHKRFP 9995 FORMAT( ' !! Invalid input value: ', A4, '=', I6, '; must be <=', $ I6 ) 9994 FORMAT( / ' Tests of the COMPLEX LAPACK RFP routines ', - $ / ' LAPACK VERSION ', I1, '.', I1, '.', I1, + $ / ' LAPACK VERSION ', I1, '.', I2, '.', I1, $ / / ' The following parameter values will be used:' ) 9993 FORMAT( 4X, A4, ': ', 10I6, / 11X, 10I6 ) 9992 FORMAT( / ' Routines pass computational tests if test ratio is ', diff --git a/lapack-netlib/TESTING/LIN/dchkaa.F b/lapack-netlib/TESTING/LIN/dchkaa.F index 6582cac135..91ed659661 100644 --- a/lapack-netlib/TESTING/LIN/dchkaa.F +++ b/lapack-netlib/TESTING/LIN/dchkaa.F @@ -1089,7 +1089,7 @@ PROGRAM DCHKAA 9995 FORMAT( ' Invalid input value: ', A4, '=', I6, '; must be <=', $ I6 ) 9994 FORMAT( ' Tests of the DOUBLE PRECISION LAPACK routines ', - $ / ' LAPACK VERSION ', I1, '.', I1, '.', I1, + $ / ' LAPACK VERSION ', I1, '.', I2, '.', I1, $ / / ' The following parameter values will be used:' ) 9993 FORMAT( 4X, A4, ': ', 10I6, / 11X, 10I6 ) 9992 FORMAT( / ' Routines pass computational tests if test ratio is ', diff --git a/lapack-netlib/TESTING/LIN/dchkab.f b/lapack-netlib/TESTING/LIN/dchkab.f index a6b8a0fe1d..afa73c80ce 100644 --- a/lapack-netlib/TESTING/LIN/dchkab.f +++ b/lapack-netlib/TESTING/LIN/dchkab.f @@ -350,7 +350,7 @@ PROGRAM DCHKAB $ I6 ) 9994 FORMAT( ' Tests of the DOUBLE PRECISION LAPACK DSGESV/DSPOSV', $ ' routines ', - $ / ' LAPACK VERSION ', I1, '.', I1, '.', I1, + $ / ' LAPACK VERSION ', I1, '.', I2, '.', I1, $ / / ' The following parameter values will be used:' ) 9993 FORMAT( 4X, A4, ': ', 10I6, / 11X, 10I6 ) 9992 FORMAT( / ' Routines pass computational tests if test ratio is ', diff --git a/lapack-netlib/TESTING/LIN/dchkrfp.f b/lapack-netlib/TESTING/LIN/dchkrfp.f index e242e013d8..ebaa482fad 100644 --- a/lapack-netlib/TESTING/LIN/dchkrfp.f +++ b/lapack-netlib/TESTING/LIN/dchkrfp.f @@ -275,7 +275,7 @@ PROGRAM DCHKRFP 9995 FORMAT( ' !! Invalid input value: ', A4, '=', I6, '; must be <=', $ I6 ) 9994 FORMAT( / ' Tests of the DOUBLE PRECISION LAPACK RFP routines ', - $ / ' LAPACK VERSION ', I1, '.', I1, '.', I1, + $ / ' LAPACK VERSION ', I1, '.', I2, '.', I1, $ / / ' The following parameter values will be used:' ) 9993 FORMAT( 4X, A4, ': ', 10I6, / 11X, 10I6 ) 9992 FORMAT( / ' Routines pass computational tests if test ratio is ', diff --git a/lapack-netlib/TESTING/LIN/schkaa.F b/lapack-netlib/TESTING/LIN/schkaa.F index 036b13924f..ad6ea87767 100644 --- a/lapack-netlib/TESTING/LIN/schkaa.F +++ b/lapack-netlib/TESTING/LIN/schkaa.F @@ -1083,7 +1083,7 @@ PROGRAM SCHKAA 9995 FORMAT( ' Invalid input value: ', A4, '=', I6, '; must be <=', $ I6 ) 9994 FORMAT( ' Tests of the REAL LAPACK routines ', - $ / ' LAPACK VERSION ', I1, '.', I1, '.', I1, + $ / ' LAPACK VERSION ', I1, '.', I2, '.', I1, $ / / ' The following parameter values will be used:' ) 9993 FORMAT( 4X, A4, ': ', 10I6, / 11X, 10I6 ) 9992 FORMAT( / ' Routines pass computational tests if test ratio is ', diff --git a/lapack-netlib/TESTING/LIN/schkrfp.f b/lapack-netlib/TESTING/LIN/schkrfp.f index f3fdfa07f9..6fd3c00abf 100644 --- a/lapack-netlib/TESTING/LIN/schkrfp.f +++ b/lapack-netlib/TESTING/LIN/schkrfp.f @@ -274,7 +274,7 @@ PROGRAM SCHKRFP 9995 FORMAT( ' !! Invalid input value: ', A4, '=', I6, '; must be <=', $ I6 ) 9994 FORMAT( / ' Tests of the REAL LAPACK RFP routines ', - $ / ' LAPACK VERSION ', I1, '.', I1, '.', I1, + $ / ' LAPACK VERSION ', I1, '.', I2, '.', I1, $ / / ' The following parameter values will be used:' ) 9993 FORMAT( 4X, A4, ': ', 10I6, / 11X, 10I6 ) 9992 FORMAT( / ' Routines pass computational tests if test ratio is ', diff --git a/lapack-netlib/TESTING/LIN/zchkaa.F b/lapack-netlib/TESTING/LIN/zchkaa.F index f1020f2d87..77a7a6cb31 100644 --- a/lapack-netlib/TESTING/LIN/zchkaa.F +++ b/lapack-netlib/TESTING/LIN/zchkaa.F @@ -1281,7 +1281,7 @@ PROGRAM ZCHKAA 9995 FORMAT( ' Invalid input value: ', A4, '=', I6, '; must be <=', $ I6 ) 9994 FORMAT( ' Tests of the COMPLEX*16 LAPACK routines ', - $ / ' LAPACK VERSION ', I1, '.', I1, '.', I1, + $ / ' LAPACK VERSION ', I1, '.', I2, '.', I1, $ / / ' The following parameter values will be used:' ) 9993 FORMAT( 4X, A4, ': ', 10I6, / 11X, 10I6 ) 9992 FORMAT( / ' Routines pass computational tests if test ratio is ', diff --git a/lapack-netlib/TESTING/LIN/zchkab.f b/lapack-netlib/TESTING/LIN/zchkab.f index 63274cabb9..495d373e0c 100644 --- a/lapack-netlib/TESTING/LIN/zchkab.f +++ b/lapack-netlib/TESTING/LIN/zchkab.f @@ -350,7 +350,7 @@ PROGRAM ZCHKAB 9995 FORMAT( ' Invalid input value: ', A4, '=', I6, '; must be <=', $ I6 ) 9994 FORMAT( ' Tests of the COMPLEX*16 LAPACK ZCGESV/ZCPOSV routines ', - $ / ' LAPACK VERSION ', I1, '.', I1, '.', I1, + $ / ' LAPACK VERSION ', I1, '.', I2, '.', I1, $ / / ' The following parameter values will be used:' ) 9993 FORMAT( 4X, A4, ': ', 10I6, / 11X, 10I6 ) 9992 FORMAT( / ' Routines pass computational tests if test ratio is ', diff --git a/lapack-netlib/TESTING/LIN/zchkrfp.f b/lapack-netlib/TESTING/LIN/zchkrfp.f index 1764f566e4..2bf2ac3cf7 100644 --- a/lapack-netlib/TESTING/LIN/zchkrfp.f +++ b/lapack-netlib/TESTING/LIN/zchkrfp.f @@ -276,7 +276,7 @@ PROGRAM ZCHKRFP 9995 FORMAT( ' !! Invalid input value: ', A4, '=', I6, '; must be <=', $ I6 ) 9994 FORMAT( / ' Tests of the COMPLEX*16 LAPACK RFP routines ', - $ / ' LAPACK VERSION ', I1, '.', I1, '.', I1, + $ / ' LAPACK VERSION ', I1, '.', I2, '.', I1, $ / / ' The following parameter values will be used:' ) 9993 FORMAT( 4X, A4, ': ', 10I6, / 11X, 10I6 ) 9992 FORMAT( / ' Routines pass computational tests if test ratio is ', diff --git a/lapack-netlib/TESTING/cbb.in b/lapack-netlib/TESTING/cbb.in index 2aad1aaf22..a4e4dd269a 100644 --- a/lapack-netlib/TESTING/cbb.in +++ b/lapack-netlib/TESTING/cbb.in @@ -7,6 +7,6 @@ CBB: Data file for testing banded Singular Value Decomposition routines 2 Number of values of NRHS 1 2 Values of NRHS 20.0 Threshold value -F Put T to test the error exits +T Put T to test the error exits 1 Code to interpret the seed CBB 15 diff --git a/lapack-netlib/TESTING/cgg.in b/lapack-netlib/TESTING/cgg.in index da524e92c8..790feedaa2 100644 --- a/lapack-netlib/TESTING/cgg.in +++ b/lapack-netlib/TESTING/cgg.in @@ -10,7 +10,7 @@ CGG: Data file for testing Nonsymmetric Eigenvalue Problem routines 40 40 2 2 Values of NBCOL (minimum col. dimension) 20.0 Threshold value T Put T to test the LAPACK routines -F Put T to test the driver routines +T Put T to test the driver routines T Put T to test the error exits 1 Code to interpret the seed CGG 26 diff --git a/lapack-netlib/TESTING/dbb.in b/lapack-netlib/TESTING/dbb.in index 3303274eee..a329ffac63 100644 --- a/lapack-netlib/TESTING/dbb.in +++ b/lapack-netlib/TESTING/dbb.in @@ -7,6 +7,6 @@ DBB: Data file for testing banded Singular Value Decomposition routines 2 Number of values of NRHS 1 2 Values of NRHS 20.0 Threshold value -F Put T to test the error exits +T Put T to test the error exits 1 Code to interpret the seed DBB 15 diff --git a/lapack-netlib/TESTING/dgd.in b/lapack-netlib/TESTING/dgd.in index 127b63789e..67726ea13f 100644 --- a/lapack-netlib/TESTING/dgd.in +++ b/lapack-netlib/TESTING/dgd.in @@ -1,10 +1,11 @@ DGS Data for the Real Nonsymmetric Schur Form Driver 5 Number of matrix dimensions -2 10 12 20 30 Matrix dimensions +2 6 10 12 20 30 Matrix dimensions 1 1 1 2 1 Parameters NB, NBMIN, NXOVER, NS, NBCOL 10 Threshold for test ratios .TRUE. Put T to test the error exits -0 Code to interpret the seed +2 Code to interpret the seed +1234 5678 9012 3456 DGS 26 Test all 26 matrix types DGV Data for the Real Nonsymmetric Eigenvalue Problem Driver 6 Number of matrix dimensions @@ -83,4 +84,4 @@ DXV Data for the Real Nonsymmetric Eigenvalue Expert Driver 0.0000D+00 0.0000D+00 0.0000D+00 1.0000D+00 1.3639D+00 4.0417D+00 6.4089D-01 6.8030D-01 eigenvalue condition #'s 7.6064D-01 8.4964D-01 1.1222D-01 1.1499D-01 eigenvector condition #'s -0 +0 \ No newline at end of file diff --git a/lapack-netlib/TESTING/dgg.in b/lapack-netlib/TESTING/dgg.in index 073cf5b807..fcc44c0b46 100644 --- a/lapack-netlib/TESTING/dgg.in +++ b/lapack-netlib/TESTING/dgg.in @@ -10,7 +10,7 @@ DGG: Data file for testing Nonsymmetric Eigenvalue Problem routines 40 40 2 2 Values of NBCOL (minimum col. dimension) 20.0 Threshold value T Put T to test the LAPACK routines -F Put T to test the driver routines +T Put T to test the driver routines T Put T to test the error exits 1 Code to interpret the seed DGG 26 diff --git a/lapack-netlib/TESTING/sbb.in b/lapack-netlib/TESTING/sbb.in index 0f1ee51f6c..570d37ad14 100644 --- a/lapack-netlib/TESTING/sbb.in +++ b/lapack-netlib/TESTING/sbb.in @@ -7,6 +7,6 @@ SBB: Data file for testing banded Singular Value Decomposition routines 2 Number of values of NRHS 1 2 Values of NRHS 20.0 Threshold value -F Put T to test the error exits +T Put T to test the error exits 1 Code to interpret the seed SBB 15 diff --git a/lapack-netlib/TESTING/sgg.in b/lapack-netlib/TESTING/sgg.in index f6478a28cd..162ba3eff9 100644 --- a/lapack-netlib/TESTING/sgg.in +++ b/lapack-netlib/TESTING/sgg.in @@ -10,7 +10,7 @@ SGG: Data file for testing Nonsymmetric Eigenvalue Problem routines 40 40 2 2 Values of NBCOL (minimum col. dimension) 20.0 Threshold value T Put T to test the LAPACK routines -F Put T to test the driver routines +T Put T to test the driver routines T Put T to test the error exits 1 Code to interpret the seed SGG 26 diff --git a/lapack-netlib/TESTING/zbb.in b/lapack-netlib/TESTING/zbb.in index 51e54e998d..73c5ac8faf 100644 --- a/lapack-netlib/TESTING/zbb.in +++ b/lapack-netlib/TESTING/zbb.in @@ -7,6 +7,6 @@ ZBB: Data file for testing banded Singular Value Decomposition routines 2 Number of values of NRHS 1 2 Values of NRHS 20.0 Threshold value -F Put T to test the error exits +T Put T to test the error exits 1 Code to interpret the seed ZBB 15 diff --git a/lapack-netlib/TESTING/zgg.in b/lapack-netlib/TESTING/zgg.in index 23cc875ed7..802e5ddf67 100644 --- a/lapack-netlib/TESTING/zgg.in +++ b/lapack-netlib/TESTING/zgg.in @@ -10,7 +10,7 @@ ZGG: Data file for testing Nonsymmetric Eigenvalue Problem routines 40 40 2 2 Values of NBCOL (minimum col. dimension) 20.0 Threshold value T Put T to test the LAPACK routines -F Put T to test the driver routines +T Put T to test the driver routines T Put T to test the error exits 1 Code to interpret the seed ZGG 26 diff --git a/lapack/laed3/laed3_parallel.c b/lapack/laed3/laed3_parallel.c index e02567b699..c6749144cf 100644 --- a/lapack/laed3/laed3_parallel.c +++ b/lapack/laed3/laed3_parallel.c @@ -33,6 +33,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "common.h" +#ifdef max +#undef max +#endif #define max(a,b) ((a) > (b) ? (a) : (b)) #define copysign(x,y) ((y) < 0 ? ((x) < 0 ? (x) : -(x)) : ((x) < 0 ? -(x) : (x))) @@ -54,7 +57,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define LASET BLASFUNC(slaset) #endif -FLOAT LAMC3(FLOAT *, FLOAT *); +FLOATRET LAMC3(FLOAT *, FLOAT *); void LAED4(blasint *, blasint *, FLOAT *, FLOAT *, FLOAT *, FLOAT *, FLOAT *, blasint *); void LACPY(char *, blasint *, blasint *, FLOAT *, blasint *, FLOAT *, blasint *); void LASET(char *, blasint *, blasint *, FLOAT *, FLOAT *, FLOAT *, blasint *); diff --git a/lapack/laed3/laed3_single.c b/lapack/laed3/laed3_single.c index b21bb99300..99871f1855 100644 --- a/lapack/laed3/laed3_single.c +++ b/lapack/laed3/laed3_single.c @@ -53,7 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define LASET BLASFUNC(slaset) #endif -FLOAT LAMC3(FLOAT *, FLOAT *); +FLOATRET LAMC3(FLOAT *, FLOAT *); void LAED4(blasint *, blasint *, FLOAT *, FLOAT *, FLOAT *, FLOAT *, FLOAT *, blasint *); void LACPY(char *, blasint *, blasint *, FLOAT *, blasint *, FLOAT *, blasint *); void LASET(char *, blasint *, blasint *, FLOAT *, FLOAT *, FLOAT *, blasint *); diff --git a/openblas_config_template.h b/openblas_config_template.h index 1735fdda2a..e91cbeaf4e 100644 --- a/openblas_config_template.h +++ b/openblas_config_template.h @@ -40,10 +40,17 @@ typedef uint16_t bfloat16; #endif #if defined(__GNUC__) && (__GNUC__ > 12) -#if defined(OPENBLAS_ARCH_POWER) +#if defined(OPENBLAS_ARCH_POWER) || defined(OPENBLAS_ARCH_LOONGARCH64) typedef bfloat16 hfloat16; #else +#define __STDC_WANT_IEC_60559_TYPES_EXT__ +#include +#ifdef FLT16_MAX typedef _Float16 hfloat16; +#else +#include +typedef uint16_t hfloat16; +#endif #endif #else #include diff --git a/param.h b/param.h index 4faaebff7c..c4a1b2520a 100644 --- a/param.h +++ b/param.h @@ -3682,6 +3682,13 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #define SBGEMM_DEFAULT_UNROLL_M 8 #define SBGEMM_DEFAULT_UNROLL_N 8 +#undef SHGEMM_ALIGN_K +#undef SHGEMM_DEFAULT_UNROLL_M +#undef SHGEMM_DEFAULT_UNROLL_N +#define SHGEMM_ALIGN_K 4 +#define SHGEMM_DEFAULT_UNROLL_M 8 +#define SHGEMM_DEFAULT_UNROLL_N 8 + #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e3491d7f11..bcee5ed666 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -66,7 +66,8 @@ FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1 "$ErrorActionPreference = \"Stop\"\n" "Get-Content $args[1] | & $args[0]\n" "If ((Get-Content $args[2] | %{$_ -match \"FATAL\"}) -contains $true) {\n" -"echo Error\n" +"echo Error in $args[1]:\n" +"Get-Content $args[2] \n" "exit 1\n" "} else {\n" "exit 0\n" diff --git a/test/Makefile b/test/Makefile index f653b70b13..230ee8ef23 100644 --- a/test/Makefile +++ b/test/Makefile @@ -277,6 +277,10 @@ ifeq ($(BUILD_BFLOAT16),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_bgemm > BBLAT3.SUMM @$(GREP) -q FATAL BBLAT3.SUMM && cat BBLAT3.SUMM || exit 0 endif +ifeq ($(BUILD_HFLOAT16),1) + OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_shgemm > SHBLAT3.SUMM + @$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 +endif ifeq ($(BUILD_SINGLE),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat3 < ./sblat3.dat @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 @@ -302,6 +306,11 @@ ifeq ($(BUILD_BFLOAT16),1) OMP_NUM_THREADS=2 ./test_bgemm > BBLAT3.SUMM @$(GREP) -q FATAL BBLAT3.SUMM && cat BBLAT3.SUMM || exit 0 endif +ifeq ($(BUILD_HFLOAT16),1) + OMP_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM + @$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 +endif + ifeq ($(BUILD_SINGLE),1) OMP_NUM_THREADS=2 ./sblat3 < ./sblat3.dat @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 @@ -325,6 +334,10 @@ ifeq ($(BUILD_BFLOAT16),1) OPENBLAS_NUM_THREADS=2 ./test_bgemm > BBLAT3.SUMM @$(GREP) -q FATAL BBLAT3.SUMM && cat BBLAT3.SUMM || exit 0 endif +ifeq ($(BUILD_HFLOAT16),1) + OPENBLAS_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM + @$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 +endif ifeq ($(BUILD_SINGLE),1) OPENBLAS_NUM_THREADS=2 ./sblat3 < ./sblat3.dat @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0