Skip to content

Commit b9da01e

Browse files
author
Spencer Bryngelson
committed
Work around CCE 19.0.0 compiler bugs for Cray+OpenACC builds
Three distinct CCE 19.0.0 compiler bugs required fixes: Bug 1: InstCombine ICE in matmul() in m_phase_change.fpp - Replace matmul() with explicit 2x2 arithmetic Bug 2: IPA bring_routine_resident SIGSEGV in m_phase_change.fpp - Add -Oipa0 per-file in CMakeLists.txt (Cray+OpenACC only) - Use cray_noinline=True on 4 GPU_ROUTINE calls in m_phase_change.fpp and 4 in m_variables_conversion.fpp Bug 3: IPA castIsValid ICE in m_bubbles_EL.fpp - Change proc_bubble_counts from VLA to allocatable - Add -Oipa0 per-file in CMakeLists.txt (Cray+OpenACC only) Bug 4: m_chemistry.fpp VLA ICE in case-optimized pre_process builds - Guard 4 dimension(num_species) local arrays with USING_CCE Bug 5: Pyrometheus GPU_ROUTINE macro missing !acc routine seq on Cray+ACC - Post-process generated m_thermochem.f90 in toolchain/mfc/run/input.py to replace the broken Cray INLINEALWAYS-only macro with plain #define GPU_ROUTINE(name) !acc routine seq Also fix uninitialized FT in s_TSat (use huge(1.0_wp) not huge(FT)). See PR #1286.
1 parent 31899ad commit b9da01e

7 files changed

Lines changed: 102 additions & 22 deletions

File tree

CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,7 @@ HANDLE_SOURCES(simulation ON)
397397
HANDLE_SOURCES(post_process ON)
398398
HANDLE_SOURCES(syscheck OFF)
399399

400+
400401
# MFC_SETUP_TARGET: Given a target (herein <target>), this macro creates a new
401402
# executable <target> with the appropriate sources, compiler definitions, and
402403
# linked libraries (assuming HANDLE_SOURCES was called on <target>).
@@ -633,6 +634,19 @@ if (MFC_SIMULATION)
633634
MFC_SETUP_TARGET(TARGET simulation
634635
SOURCES "${simulation_SRCs}"
635636
MPI FFTW OpenACC OpenMP)
637+
# CCE 19.0.0 IPA workaround: two files trigger IPA crashes:
638+
# m_bubbles_EL: castIsValid assertion (InstCombine/foldIntegerTypedPHI)
639+
# m_phase_change: bring_routine_resident SIGSEGV
640+
# Disabling IPA per-file avoids the crashes while preserving IPA for
641+
# the rest of simulation (needed for thermochem INLINEALWAYS inlining).
642+
# See PR #1286.
643+
if (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray" AND MFC_OpenACC)
644+
set_source_files_properties(
645+
"${CMAKE_BINARY_DIR}/fypp/simulation/m_bubbles_EL.fpp.f90"
646+
"${CMAKE_BINARY_DIR}/fypp/simulation/m_phase_change.fpp.f90"
647+
PROPERTIES COMPILE_FLAGS "-Oipa0"
648+
)
649+
endif()
636650
endif()
637651

638652
if (MFC_POST_PROCESS)

src/common/include/parallel_macros.fpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,18 +48,44 @@
4848

4949
#:enddef
5050

51-
#:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None, extraOmpArgs=None)
51+
#:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, cray_noinline=False, extraAccArgs=None, extraOmpArgs=None)
5252
#:assert isinstance(cray_inline, bool)
53+
#:assert isinstance(cray_noinline, bool)
54+
#:assert not (cray_inline and cray_noinline), "cray_inline and cray_noinline are mutually exclusive"
5355
#:set acc_directive = ACC_ROUTINE(function_name=function_name, parallelism=parallelism, nohost=nohost, extraAccArgs=extraAccArgs)
5456
#:set omp_directive = OMP_ROUTINE(function_name=function_name, nohost=nohost, extraOmpArgs=extraOmpArgs)
5557

56-
#:if cray_inline == True
58+
#:if cray_noinline == True
59+
#:if not isinstance(function_name, str)
60+
#:stop "When using cray_noinline, function name must be given and given as a string"
61+
#:endif
62+
#:set cray_noinline_directive = ('!DIR$ NOINLINE ' + function_name).strip('\n')
63+
#ifdef _CRAYFTN
64+
#if MFC_OpenACC
65+
$:acc_directive
66+
#elif MFC_OpenMP
67+
$:omp_directive
68+
#else
69+
$:cray_noinline_directive
70+
#endif
71+
#elif MFC_OpenACC
72+
$:acc_directive
73+
#elif MFC_OpenMP
74+
$:omp_directive
75+
#endif
76+
#:elif cray_inline == True
5777
#:if not isinstance(function_name, str)
5878
#:stop "When inlining for Cray Compiler, function name must be given and given as a string"
5979
#:endif
6080
#:set cray_directive = ('!DIR$ INLINEALWAYS ' + function_name).strip('\n')
6181
#ifdef _CRAYFTN
82+
#if MFC_OpenACC
83+
$:acc_directive
84+
#elif MFC_OpenMP
85+
$:omp_directive
86+
#else
6287
$:cray_directive
88+
#endif
6389
#elif MFC_OpenACC
6490
$:acc_directive
6591
#elif MFC_OpenMP

src/common/m_chemistry.fpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,11 @@ contains
6363

6464
integer :: x, y, z, eqn
6565
real(wp) :: energy, T_in
66-
real(wp), dimension(num_species) :: Ys
66+
#:if USING_CCE
67+
real(wp), dimension(10) :: Ys
68+
#:else
69+
real(wp), dimension(num_species) :: Ys
70+
#:endif
6771

6872
do z = bounds(3)%beg, bounds(3)%end
6973
do y = bounds(2)%beg, bounds(2)%end
@@ -101,7 +105,11 @@ contains
101105
type(int_bounds_info), dimension(1:3), intent(in) :: bounds
102106

103107
integer :: x, y, z, i
104-
real(wp), dimension(num_species) :: Ys
108+
#:if USING_CCE
109+
real(wp), dimension(10) :: Ys
110+
#:else
111+
real(wp), dimension(num_species) :: Ys
112+
#:endif
105113
real(wp) :: mix_mol_weight
106114

107115
do z = bounds(3)%beg, bounds(3)%end
@@ -131,7 +139,7 @@ contains
131139
integer :: eqn
132140
real(wp) :: T
133141
real(wp) :: rho, omega_m
134-
#:if not MFC_CASE_OPTIMIZATION and USING_AMD
142+
#:if (not MFC_CASE_OPTIMIZATION and USING_AMD) or USING_CCE
135143
real(wp), dimension(10) :: Ys
136144
real(wp), dimension(10) :: omega
137145
#:else
@@ -180,7 +188,7 @@ contains
180188
type(int_bounds_info), intent(in) :: irx, iry, irz
181189

182190
integer, intent(in) :: idir
183-
#:if not MFC_CASE_OPTIMIZATION and USING_AMD
191+
#:if (not MFC_CASE_OPTIMIZATION and USING_AMD) or USING_CCE
184192
real(wp), dimension(10) :: Xs_L, Xs_R, Xs_cell, Ys_L, Ys_R, Ys_cell
185193
real(wp), dimension(10) :: mass_diffusivities_mixavg1, mass_diffusivities_mixavg2
186194
real(wp), dimension(10) :: mass_diffusivities_mixavg_Cell, dXk_dxi, h_l, h_r, h_k

src/common/m_phase_change.fpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,16 @@ contains
104104
!< Generic loop iterators
105105
integer :: i, j, k, l
106106

107+
#ifdef _CRAYFTN
108+
#ifdef MFC_OpenACC
109+
! CCE 19 IPA workaround: prevent bring_routine_resident SIGSEGV
110+
!DIR$ NOINLINE s_infinite_pt_relaxation_k
111+
!DIR$ NOINLINE s_infinite_ptg_relaxation_k
112+
!DIR$ NOINLINE s_correct_partial_densities
113+
!DIR$ NOINLINE s_TSat
114+
#endif
115+
#endif
116+
107117
! starting equilibrium solver
108118
$:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok,pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF]')
109119
do j = 0, m
@@ -296,7 +306,7 @@ contains
296306
!! @param TS equilibrium temperature at the interface
297307
subroutine s_infinite_pt_relaxation_k(j, k, l, MFL, pS, p_infpT, q_cons_vf, rhoe, TS)
298308
$:GPU_ROUTINE(function_name='s_infinite_pt_relaxation_k', &
299-
& parallelism='[seq]', cray_inline=True)
309+
& parallelism='[seq]', cray_noinline=True)
300310

301311
! initializing variables
302312
integer, intent(in) :: j, k, l, MFL
@@ -411,7 +421,7 @@ contains
411421
!! @param TS equilibrium temperature at the interface
412422
subroutine s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS)
413423
$:GPU_ROUTINE(function_name='s_infinite_ptg_relaxation_k', &
414-
& parallelism='[seq]', cray_inline=True)
424+
& parallelism='[seq]', cray_noinline=True)
415425

416426
integer, intent(in) :: j, k, l
417427
real(wp), intent(inout) :: pS
@@ -579,7 +589,8 @@ contains
579589
InvJac = InvJac/(Jac(1, 1)*Jac(2, 2) - Jac(1, 2)*Jac(2, 1))
580590

581591
! calculating correction array for Newton's method
582-
DeltamP = -1.0_wp*(matmul(InvJac, R2D))
592+
DeltamP(1) = -1.0_wp*(InvJac(1, 1)*R2D(1) + InvJac(1, 2)*R2D(2))
593+
DeltamP(2) = -1.0_wp*(InvJac(2, 1)*R2D(1) + InvJac(2, 2)*R2D(2))
583594
584595
! updating two reacting 'masses'. Recall that inert 'masses' do not change during the phase change
585596
! liquid
@@ -638,7 +649,7 @@ contains
638649
!! @param l generic loop iterator for z direction
639650
subroutine s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l)
640651
$:GPU_ROUTINE(function_name='s_correct_partial_densities', &
641-
& parallelism='[seq]', cray_inline=True)
652+
& parallelism='[seq]', cray_noinline=True)
642653
643654
!> @name variables for the correction of the reacting partial densities
644655
!> @{
@@ -689,7 +700,7 @@ contains
689700
!! @param TSIn equilibrium Temperature
690701
elemental subroutine s_TSat(pSat, TSat, TSIn)
691702
$:GPU_ROUTINE(function_name='s_TSat',parallelism='[seq]', &
692-
& cray_inline=True)
703+
& cray_noinline=True)
693704
694705
real(wp), intent(in) :: pSat
695706
real(wp), intent(out) :: TSat
@@ -716,6 +727,12 @@ contains
716727

717728
! underrelaxation factor
718729
Om = 1.0e-3_wp
730+
731+
! FT must be initialized before the do while condition is evaluated.
732+
! Fortran .or. is not short-circuit: abs(FT) is always evaluated even
733+
! when ns == 0, so FT must have a defined value here.
734+
FT = huge(1.0_wp)
735+
719736
do while ((abs(FT) > ptgalpha_eps) .or. (ns == 0))
720737
! increasing counter
721738
ns = ns + 1

src/common/m_variables_conversion.fpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ contains
116116
!! @param pres_mag Magnetic pressure (optional)
117117
subroutine s_compute_pressure(energy, alf, dyn_p, pi_inf, gamma, rho, qv, rhoYks, pres, T, stress, mom, G, pres_mag)
118118
$:GPU_ROUTINE(function_name='s_compute_pressure',parallelism='[seq]', &
119-
& cray_inline=True)
119+
& cray_noinline=True)
120120

121121
real(stp), intent(in) :: energy, alf
122122
real(wp), intent(in) :: dyn_p
@@ -326,7 +326,7 @@ contains
326326
alpha_K, alpha_rho_K, Re_K, &
327327
G_K, G)
328328
$:GPU_ROUTINE(function_name='s_convert_species_to_mixture_variables_acc', &
329-
& parallelism='[seq]', cray_inline=True)
329+
& parallelism='[seq]', cray_noinline=True)
330330

331331
real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K
332332
#:if not MFC_CASE_OPTIMIZATION and USING_AMD
@@ -1335,7 +1335,7 @@ contains
13351335
!> This subroutine computes partial densities and volume fractions
13361336
subroutine s_compute_species_fraction(q_vf, k, l, r, alpha_rho_K, alpha_K)
13371337
$:GPU_ROUTINE(function_name='s_compute_species_fraction', &
1338-
& parallelism='[seq]', cray_inline=True)
1338+
& parallelism='[seq]', cray_noinline=True)
13391339
type(scalar_field), dimension(sys_size), intent(in) :: q_vf
13401340
integer, intent(in) :: k, l, r
13411341
#:if not MFC_CASE_OPTIMIZATION and USING_AMD
@@ -1480,7 +1480,7 @@ contains
14801480
!> @brief Computes the fast magnetosonic wave speed from the sound speed, density, and magnetic field components.
14811481
subroutine s_compute_fast_magnetosonic_speed(rho, c, B, norm, c_fast, h)
14821482
$:GPU_ROUTINE(function_name='s_compute_fast_magnetosonic_speed', &
1483-
& parallelism='[seq]', cray_inline=True)
1483+
& parallelism='[seq]', cray_noinline=True)
14841484

14851485
real(wp), intent(in) :: B(3), rho, c
14861486
real(wp), intent(in) :: h ! only used for relativity

src/simulation/m_bubbles_EL.fpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1532,8 +1532,7 @@ contains
15321532
integer(KIND=MPI_OFFSET_KIND) :: disp
15331533
integer :: view
15341534
integer, dimension(2) :: gsizes, lsizes, start_idx_part
1535-
integer, dimension(num_procs) :: part_order, part_ord_mpi
1536-
integer, dimension(num_procs) :: proc_bubble_counts
1535+
integer, allocatable :: proc_bubble_counts(:)
15371536
real(wp), dimension(1:1, 1:lag_io_vars) :: dummy
15381537
dummy = 0._wp
15391538

@@ -1548,6 +1547,8 @@ contains
15481547

15491548
if (.not. parallel_io) return
15501549

1550+
allocate (proc_bubble_counts(num_procs))
1551+
15511552
lsizes(1) = bub_id
15521553
lsizes(2) = lag_io_vars
15531554

@@ -1659,6 +1660,8 @@ contains
16591660
call MPI_FILE_CLOSE(ifile, ierr)
16601661
end if
16611662
1663+
deallocate (proc_bubble_counts)
1664+
16621665
#endif
16631666
16641667
end subroutine s_write_restart_lag_bubbles

toolchain/mfc/run/input.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,25 @@ def generate_fpp(self, target) -> None:
9090
directive_str = None
9191

9292
# Write the generated Fortran code to the m_thermochem.f90 file with the chosen precision
93+
thermochem_code = pyro.FortranCodeGenerator().generate(
94+
"m_thermochem",
95+
self.get_cantera_solution(),
96+
pyro.CodeGenerationOptions(scalar_type = real_type, directive_offload = directive_str)
97+
)
98+
99+
# CCE 19.0.0 workaround: pyrometheus generates !DIR$ INLINEALWAYS for Cray+ACC
100+
# but omits !$acc routine seq, so thermochem routines are not registered as
101+
# OpenACC device routines. Replace with plain !$acc routine seq (no INLINEALWAYS).
102+
if directive_str == 'acc':
103+
thermochem_code = thermochem_code.replace(
104+
"#ifdef _CRAYFTN\n#define GPU_ROUTINE(name) !DIR$ INLINEALWAYS name\n"
105+
"#else\n#define GPU_ROUTINE(name) !$acc routine seq\n#endif",
106+
"#define GPU_ROUTINE(name) !$acc routine seq"
107+
)
108+
93109
common.file_write(
94110
os.path.join(modules_dir, "m_thermochem.f90"),
95-
pyro.FortranCodeGenerator().generate(
96-
"m_thermochem",
97-
self.get_cantera_solution(),
98-
pyro.CodeGenerationOptions(scalar_type = real_type, directive_offload = directive_str)
99-
),
111+
thermochem_code,
100112
True
101113
)
102114

0 commit comments

Comments
 (0)