@@ -51,14 +51,39 @@ option(CHASE_ENABLE_XGEEV_RAYLEIGHRITZ "Enable solving pseudo-hermitian problems
5151option (ChASE_DISPLAY_COND_V_SVD "Compute and display condition number of V from SVD" OFF )
5252option (CHASE_QR_DOUBLE_PRECISION "Operate QR in Double Precision" ON )
5353option (CHASE_RR_DOUBLE_PRECISION "Operate HEEVD in RR for pseudo-hermitian matrices in Double Precision" OFF )
54+ option (CHASE_PANEL_HIPREC "Enable Split-Sync-Fix and high-precision panel protection" OFF )
5455option (CHASE_ENABLE_TESTS "Enable unit tests." OFF )
55- #XZ: Will be removed once performance is confirmed
56- option (CHASE_ENABLE_GPU_RESIDENT_LANCZOS "Enable GPU-resident Lanczos with fused kernels and NCCL (experimental)" ON )
56+ # Optional NCCL warm-up in pChASEGPU constructor (cholQR1 + Lanczos); skews startup timing.
57+ option (CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP
58+ "Enable NCCL warm-up in pChASEGPU constructor (benchmarks only; extra startup cost)" OFF )
59+
60+ set (CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP_EFFECTIVE OFF )
61+ if (CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP)
62+ if (CMAKE_BUILD_TYPE )
63+ string (TOUPPER "${CMAKE_BUILD_TYPE} " _CHASE_BT_UPPER)
64+ if (_CHASE_BT_UPPER STREQUAL "RELEASE" OR _CHASE_BT_UPPER STREQUAL "RELWITHDEBINFO" )
65+ message (WARNING
66+ "CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP is ignored for CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} "
67+ "(Release and RelWithDebInfo builds never enable this warm-up)." )
68+ else ()
69+ set (CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP_EFFECTIVE ON )
70+ endif ()
71+ else ()
72+ # Multi-config generators may leave CMAKE_BUILD_TYPE empty; honor the option if set.
73+ set (CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP_EFFECTIVE ON )
74+ endif ()
75+ endif ()
5776
58- if (NOT CMAKE_BUILD_TYPE STREQUAL "Release" )
59- message (WARNING "For prodution, please consider CMAKE_BUILD_TYPE to be Release or RelWithDebInfo" )
77+ if (CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP_EFFECTIVE)
78+ target_compile_definitions (chase_algorithm INTERFACE CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP=1 )
79+ message (STATUS "GPU constructor benchmark warm-up (pChASEGPU): ON" )
80+ else ()
81+ message (STATUS "GPU constructor benchmark warm-up (pChASEGPU): OFF" )
6082endif ()
6183
84+ # GPU-resident Lanczos (fused kernels + NCCL) is required for GPU Lanczos paths.
85+ set (CHASE_ENABLE_GPU_RESIDENT_LANCZOS ON )
86+
6287if ( CHASE_OUTPUT )
6388 target_compile_definitions ( chase_algorithm INTERFACE "-DCHASE_OUTPUT" )
6489endif ()
@@ -100,18 +125,19 @@ if(CHASE_RR_DOUBLE_PRECISION)
100125 target_compile_definitions (chase_algorithm INTERFACE "-DRR_DOUBLE_PRECISION" )
101126endif ()
102127
128+ if (CHASE_PANEL_HIPREC)
129+ target_compile_definitions (chase_algorithm INTERFACE "-DCHASE_PANEL_HIPREC=1" )
130+ endif ()
131+
103132if (ChASE_DISPLAY_COND_V_SVD)
104133 target_compile_definitions (chase_algorithm INTERFACE "-DChASE_DISPLAY_COND_V_SVD" )
105134endif ()
106135
107136if (CHASE_SAVE_RESIDUALS)
108137 target_compile_definitions (chase_algorithm INTERFACE "-DCHASE_SAVE_RESIDUALS" )
109138endif ()
110- # XZ: Will be removed once performance is confirmed
111- if (CHASE_ENABLE_GPU_RESIDENT_LANCZOS)
112- target_compile_definitions (chase_algorithm INTERFACE "-DCHASE_ENABLE_GPU_RESIDENT_LANCZOS" )
113- message (STATUS "GPU-resident Lanczos with fused kernels enabled (experimental)" )
114- endif ()
139+ target_compile_definitions (chase_algorithm INTERFACE "-DCHASE_ENABLE_GPU_RESIDENT_LANCZOS" )
140+ message (STATUS "GPU-resident Lanczos (required for distributed GPU Lanczos)" )
115141
116142add_subdirectory (external )
117143add_subdirectory (grid )
@@ -192,14 +218,18 @@ endif()
192218if (CHASE_RR_DOUBLE_PRECISION)
193219 set (CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DRR_DOUBLE_PRECISION" )
194220endif ()
221+ if (CHASE_PANEL_HIPREC)
222+ set (CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DCHASE_PANEL_HIPREC=1" )
223+ endif ()
195224if (ChASE_DISPLAY_COND_V_SVD)
196225 set (CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DChASE_DISPLAY_COND_V_SVD" )
197226endif ()
198227if (CHASE_SAVE_RESIDUALS)
199228 set (CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DCHASE_SAVE_RESIDUALS" )
200229endif ()
201- if (CHASE_ENABLE_GPU_RESIDENT_LANCZOS)
202- set (CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DCHASE_ENABLE_GPU_RESIDENT_LANCZOS" )
230+ set (CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DCHASE_ENABLE_GPU_RESIDENT_LANCZOS" )
231+ if (CHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP_EFFECTIVE)
232+ set (CHASE_COMPILE_DEFINITIONS "${CHASE_COMPILE_DEFINITIONS} -DCHASE_GPU_CONSTRUCTOR_BENCHMARK_WARMUP=1" )
203233endif ()
204234
205235set (CHASE_LINK_LIBRARIES "" )
0 commit comments