Skip to content

Commit 9ed9a4f

Browse files
committed
Merge branch '70-extend-chase-to-computing-lower-excited-states-of-full-bse' into 'master'
Resolve "Extend ChASE to computing lower excited states of full BSE" Closes #70 See merge request chase/chase-library/ChASE!97
2 parents 6fed1ae + 4986c4e commit 9ed9a4f

106 files changed

Lines changed: 24382 additions & 3739 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CMakeLists.txt

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,39 @@ option(CHASE_ENABLE_XGEEV_RAYLEIGHRITZ "Enable solving pseudo-hermitian problems
5151
option(ChASE_DISPLAY_COND_V_SVD "Compute and display condition number of V from SVD" OFF)
5252
option(CHASE_QR_DOUBLE_PRECISION "Operate QR in Double Precision" ON)
5353
option(CHASE_RR_DOUBLE_PRECISION "Operate HEEVD in RR for pseudo-hermitian matrices in Double Precision" OFF)
54+
option(CHASE_PANEL_HIPREC "Enable Split-Sync-Fix and high-precision panel protection" OFF)
5455
option(CHASE_ENABLE_TESTS "Enable unit tests." OFF)
55-
#XZ: Will be removed once performance is confirmed
56-
option(CHASE_ENABLE_GPU_RESIDENT_LANCZOS "Enable GPU-resident Lanczos with fused kernels and NCCL (experimental)" ON)
56+
# Optional NCCL warm-up in pChASEGPU constructor (cholQR1 + Lanczos); skews startup timing.
57+
option(CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP
58+
"Enable NCCL warm-up in pChASEGPU constructor (benchmarks only; extra startup cost)" OFF)
59+
60+
set(CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP_EFFECTIVE OFF)
61+
if(CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP)
62+
if(CMAKE_BUILD_TYPE)
63+
string(TOUPPER "${CMAKE_BUILD_TYPE}" _CHASE_BT_UPPER)
64+
if(_CHASE_BT_UPPER STREQUAL "RELEASE" OR _CHASE_BT_UPPER STREQUAL "RELWITHDEBINFO")
65+
message(WARNING
66+
"CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP is ignored for CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} "
67+
"(Release and RelWithDebInfo builds never enable this warm-up).")
68+
else()
69+
set(CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP_EFFECTIVE ON)
70+
endif()
71+
else()
72+
# Multi-config generators may leave CMAKE_BUILD_TYPE empty; honor the option if set.
73+
set(CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP_EFFECTIVE ON)
74+
endif()
75+
endif()
5776

58-
if(NOT CMAKE_BUILD_TYPE STREQUAL "Release")
59-
message(WARNING "For prodution, please consider CMAKE_BUILD_TYPE to be Release or RelWithDebInfo")
77+
if(CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP_EFFECTIVE)
78+
target_compile_definitions(chase_algorithm INTERFACE CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP=1)
79+
message(STATUS "GPU constructor benchmark warm-up (pChASEGPU): ON")
80+
else()
81+
message(STATUS "GPU constructor benchmark warm-up (pChASEGPU): OFF")
6082
endif()
6183

84+
# GPU-resident Lanczos (fused kernels + NCCL) is required for GPU Lanczos paths.
85+
set(CHASE_ENABLE_GPU_RESIDENT_LANCZOS ON)
86+
6287
if( CHASE_OUTPUT )
6388
target_compile_definitions( chase_algorithm INTERFACE "-DCHASE_OUTPUT" )
6489
endif()
@@ -100,18 +125,19 @@ if(CHASE_RR_DOUBLE_PRECISION)
100125
target_compile_definitions(chase_algorithm INTERFACE "-DRR_DOUBLE_PRECISION")
101126
endif()
102127

128+
if(CHASE_PANEL_HIPREC)
129+
target_compile_definitions(chase_algorithm INTERFACE "-DCHASE_PANEL_HIPREC=1")
130+
endif()
131+
103132
if(ChASE_DISPLAY_COND_V_SVD)
104133
target_compile_definitions(chase_algorithm INTERFACE "-DChASE_DISPLAY_COND_V_SVD")
105134
endif()
106135

107136
if(CHASE_SAVE_RESIDUALS)
108137
target_compile_definitions(chase_algorithm INTERFACE "-DCHASE_SAVE_RESIDUALS")
109138
endif()
110-
# XZ: Will be removed once performance is confirmed
111-
if(CHASE_ENABLE_GPU_RESIDENT_LANCZOS)
112-
target_compile_definitions(chase_algorithm INTERFACE "-DCHASE_ENABLE_GPU_RESIDENT_LANCZOS")
113-
message(STATUS "GPU-resident Lanczos with fused kernels enabled (experimental)")
114-
endif()
139+
target_compile_definitions(chase_algorithm INTERFACE "-DCHASE_ENABLE_GPU_RESIDENT_LANCZOS")
140+
message(STATUS "GPU-resident Lanczos (required for distributed GPU Lanczos)")
115141

116142
add_subdirectory(external)
117143
add_subdirectory(grid)
@@ -192,14 +218,18 @@ endif()
192218
if(CHASE_RR_DOUBLE_PRECISION)
193219
set(CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DRR_DOUBLE_PRECISION")
194220
endif()
221+
if(CHASE_PANEL_HIPREC)
222+
set(CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DCHASE_PANEL_HIPREC=1")
223+
endif()
195224
if(ChASE_DISPLAY_COND_V_SVD)
196225
set(CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DChASE_DISPLAY_COND_V_SVD")
197226
endif()
198227
if(CHASE_SAVE_RESIDUALS)
199228
set(CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DCHASE_SAVE_RESIDUALS")
200229
endif()
201-
if(CHASE_ENABLE_GPU_RESIDENT_LANCZOS)
202-
set(CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DCHASE_ENABLE_GPU_RESIDENT_LANCZOS")
230+
set(CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DCHASE_ENABLE_GPU_RESIDENT_LANCZOS")
231+
if(CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP_EFFECTIVE)
232+
set(CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DCHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP=1")
203233
endif()
204234

205235
set(CHASE_LINK_LIBRARIES "")

0 commit comments

Comments
 (0)