Skip to content

Commit 81de5d4

Browse files
authored
Merge branch 'master' into fix/deallocate-missing-arrays
2 parents 9c819d4 + f343234 commit 81de5d4

7 files changed

Lines changed: 143 additions & 110 deletions

File tree

src/common/include/acc_macros.fpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,8 @@
121121
& copyout_val.strip('\n') + create_val.strip('\n') + &
122122
& no_create_val.strip('\n') + present_val.strip('\n') + &
123123
& deviceptr_val.strip('\n') + attach_val.strip('\n')
124-
#:set acc_directive = '!$acc parallel ' + &
125-
& acc_clause_val + extraAccArgs_val.strip('\n')
124+
#:set acc_directive = FOLD_DIRECTIVE('!$acc parallel ' + &
125+
& acc_clause_val + extraAccArgs_val.strip('\n'), '!$acc').strip('\n')
126126
#:set end_acc_directive = '!$acc end parallel'
127127
$:acc_directive
128128
$:code
@@ -153,8 +153,8 @@
153153
& copyout_val.strip('\n') + create_val.strip('\n') + &
154154
& no_create_val.strip('\n') + present_val.strip('\n') + &
155155
& deviceptr_val.strip('\n') + attach_val.strip('\n')
156-
#:set acc_directive = '!$acc parallel loop ' + &
157-
& clause_val + extraAccArgs_val.strip('\n')
156+
#:set acc_directive = FOLD_DIRECTIVE('!$acc parallel loop ' + &
157+
& clause_val + extraAccArgs_val.strip('\n'), '!$acc').strip('\n')
158158
$:acc_directive
159159
#:enddef
160160

src/common/include/omp_macros.fpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@
141141
& deviceptr_val.strip('\n') + attach_val.strip('\n')
142142

143143
#:set omp_clause_val = omp_clause_val.strip('\n')
144-
#:set omp_directive = '!$omp target teams ' + omp_clause_val + extraOmpArgs_val.strip('\n')
144+
#:set omp_directive = FOLD_DIRECTIVE('!$omp target teams ' + omp_clause_val + extraOmpArgs_val.strip('\n'), '!$omp').strip('\n')
145145

146146
#:set omp_end_directive = '!$omp end target teams'
147147
$:omp_directive
@@ -186,7 +186,7 @@
186186
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
187187
#:endif
188188

189-
#:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n')
189+
#:set omp_directive = FOLD_DIRECTIVE(omp_start_directive + clause_val + extraOmpArgs_val.strip('\n'), '!$omp').strip('\n')
190190
$:omp_directive
191191
#:enddef
192192

src/common/include/shared_parallel_macros.fpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,4 +117,29 @@
117117
#:endif
118118
$:extraArgs_val
119119
#:enddef
120+
121+
#:def FOLD_DIRECTIVE(directive, sentinel, width=200)
122+
#! Fold a long GPU directive across free-form continuation lines so it stays
123+
#! under nvfortran's ~1000-char source-line limit. Breaks only at whole-clause
124+
#! boundaries (clause(args) groups and bare keywords), repeating the sentinel
125+
#! (e.g. '!$acc&') on each continuation -- which fypp's --no-folding cannot do
126+
#! because its generic folder omits the sentinel. Every emitted line is no
127+
#! longer than the prefix plus the single longest clause, i.e. no longer than
128+
#! the unfolded line a build with one fewer clause already compiles.
129+
#:set _toks = re.findall(r'\w+\([^)]*\)|\S+', directive)
130+
#:set _lines = []
131+
#:set _cur = ''
132+
#:for _t in _toks
133+
#:if _cur == ''
134+
#:set _cur = _t
135+
#:elif len(_cur) + 1 + len(_t) > width
136+
#:set _lines = _lines + [_cur + ' &']
137+
#:set _cur = sentinel + '& ' + _t
138+
#:else
139+
#:set _cur = _cur + ' ' + _t
140+
#:endif
141+
#:endfor
142+
#:set _lines = _lines + [_cur]
143+
$:'\n'.join(_lines)
144+
#:enddef
120145
! New line at end of file is required for FYPP

src/simulation/m_riemann_solver_hll.fpp

Lines changed: 46 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -57,53 +57,55 @@ contains
5757
real(wp), dimension(num_species) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR
5858
real(wp), dimension(num_species) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2
5959
#:endif
60-
real(wp) :: rho_L, rho_R
61-
real(wp) :: pres_L, pres_R
62-
real(wp) :: E_L, E_R
63-
real(wp) :: H_L, H_R
64-
real(wp) :: Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi
65-
real(wp) :: T_L, T_R
66-
real(wp) :: Y_L, Y_R
67-
real(wp) :: MW_L, MW_R
68-
real(wp) :: R_gas_L, R_gas_R
69-
real(wp) :: Cp_L, Cp_R
70-
real(wp) :: Cv_L, Cv_R
71-
real(wp) :: Gamm_L, Gamm_R
72-
real(wp) :: gamma_L, gamma_R
73-
real(wp) :: pi_inf_L, pi_inf_R
74-
real(wp) :: qv_L, qv_R
75-
real(wp) :: c_L, c_R
76-
real(wp), dimension(6) :: tau_e_L, tau_e_R
77-
real(wp) :: G_L, G_R
78-
real(wp), dimension(2) :: Re_L, Re_R
79-
real(wp), dimension(3) :: xi_field_L, xi_field_R
80-
real(wp) :: rho_avg
81-
real(wp) :: H_avg
82-
real(wp) :: qv_avg
83-
real(wp) :: gamma_avg
84-
real(wp) :: c_avg
85-
real(wp) :: s_L, s_R, s_M, s_P, s_S
86-
real(wp) :: xi_M, xi_P
87-
real(wp) :: ptilde_L, ptilde_R
88-
real(wp) :: vel_L_rms, vel_R_rms, vel_avg_rms
89-
real(wp) :: vel_L_tmp, vel_R_tmp
90-
real(wp) :: Ms_L, Ms_R, pres_SL, pres_SR
91-
real(wp) :: alpha_L_sum, alpha_R_sum
92-
real(wp) :: zcoef, pcorr !< low Mach number correction
93-
type(riemann_states) :: c_fast, pres_mag
60+
real(wp) :: rho_L, rho_R
61+
real(wp) :: pres_L, pres_R
62+
real(wp) :: E_L, E_R
63+
real(wp) :: H_L, H_R
64+
real(wp) :: Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi
65+
real(wp) :: T_L, T_R
66+
real(wp) :: Y_L, Y_R
67+
real(wp) :: MW_L, MW_R
68+
real(wp) :: R_gas_L, R_gas_R
69+
real(wp) :: Cp_L, Cp_R
70+
real(wp) :: Cv_L, Cv_R
71+
real(wp) :: Gamm_L, Gamm_R
72+
real(wp) :: gamma_L, gamma_R
73+
real(wp) :: pi_inf_L, pi_inf_R
74+
real(wp) :: qv_L, qv_R
75+
real(wp) :: c_L, c_R
76+
real(wp), dimension(6) :: tau_e_L, tau_e_R
77+
real(wp) :: G_L, G_R
78+
real(wp), dimension(2) :: Re_L, Re_R
79+
real(wp), dimension(3) :: xi_field_L, xi_field_R
80+
real(wp) :: rho_avg
81+
real(wp) :: H_avg
82+
real(wp) :: qv_avg
83+
real(wp) :: gamma_avg
84+
real(wp) :: c_avg
85+
real(wp) :: s_L, s_R, s_M, s_P, s_S
86+
real(wp) :: xi_M, xi_P
87+
real(wp) :: ptilde_L, ptilde_R
88+
real(wp) :: vel_L_rms, vel_R_rms, vel_avg_rms
89+
real(wp) :: vel_L_tmp, vel_R_tmp
90+
real(wp) :: Ms_L, Ms_R, pres_SL, pres_SR
91+
real(wp) :: alpha_L_sum, alpha_R_sum
92+
real(wp) :: zcoef, pcorr !< low Mach number correction
93+
type(riemann_states) :: c_fast, pres_mag
9494
type(riemann_states_vec3) :: B
95-
type(riemann_states) :: Ga !< Gamma (Lorentz factor)
96-
type(riemann_states) :: vdotB, B2
97-
type(riemann_states_vec3) :: b4 !< 4-magnetic field components (spatial: b4x, b4y, b4z)
98-
type(riemann_states_vec3) :: cm !< Conservative momentum variables
99-
integer :: i, j, k, l, q !< Generic loop iterators
95+
type(riemann_states) :: Ga !< Gamma (Lorentz factor)
96+
type(riemann_states) :: vdotB, B2
97+
type(riemann_states_vec3) :: b4 !< 4-magnetic field components (spatial: b4x, b4y, b4z)
98+
type(riemann_states_vec3) :: cm !< Conservative momentum variables
99+
integer :: i, j, k, l, q !< Generic loop iterators
100+
integer :: Re_size_loc1, Re_size_loc2 !< host copy of Re_size; amdflang reads the declare-target original stale cross-TU
100101
! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions
101102

102103
call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, &
103104
& qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, norm_dir, ix, iy, iz)
104105

105106
! Reshaping inputted data based on dimensional splitting direction
106107
call s_initialize_riemann_solver(flux_src_vf, norm_dir)
108+
Re_size_loc1 = Re_size(1); Re_size_loc2 = Re_size(2)
107109
#:for NORM_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in &
108110
[(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1', 'is2', 'is3'), &
109111
(2, 'y', 'k', 'j, {STENCIL_IDX}, l', 'is2', 'is1', 'is3'), &
@@ -119,7 +121,8 @@ contains
119121
& Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, &
120122
& gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, rho_avg, H_avg, c_avg, &
121123
& gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, Ms_L, Ms_R, pres_SL, &
122-
& pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R]', copyin='[norm_dir]')
124+
& pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R, s_M, s_P, xi_M, xi_P]', &
125+
& copyin='[norm_dir]', firstprivate='[Re_size_loc1, Re_size_loc2]')
123126
do l = ${Z_BND}$%beg, ${Z_BND}$%end
124127
do k = ${Y_BND}$%beg, ${Y_BND}$%end
125128
do j = ${X_BND}$%beg, ${X_BND}$%end
@@ -216,11 +219,11 @@ contains
216219
Re_L(i) = dflt_real
217220
Re_R(i) = dflt_real
218221

219-
if (Re_size(i) > 0) Re_L(i) = 0._wp
220-
if (Re_size(i) > 0) Re_R(i) = 0._wp
222+
if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_L(i) = 0._wp
223+
if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_R(i) = 0._wp
221224

222225
$:GPU_LOOP(parallelism='[seq]')
223-
do q = 1, Re_size(i)
226+
do q = 1, merge(Re_size_loc1, Re_size_loc2, i == 1)
224227
Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
225228
Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i)
226229
end do

src/simulation/m_riemann_solver_hllc.fpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,9 @@ contains
117117
real(wp) :: rho_Star, E_Star, p_Star, p_K_Star, vel_K_star
118118
real(wp) :: pres_SL, pres_SR, Ms_L, Ms_R
119119
real(wp) :: flux_ene_e
120-
real(wp) :: zcoef, pcorr !< low Mach number correction
121-
integer :: Re_max, i, j, k, l, q !< Generic loop iterators
120+
real(wp) :: zcoef, pcorr !< low Mach number correction
121+
integer :: Re_max, i, j, k, l, q !< Generic loop iterators
122+
integer :: Re_size_loc1, Re_size_loc2 !< host copy of Re_size; amdflang reads the declare-target original stale cross-TU
122123
! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions
123124

124125
call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, &
@@ -127,6 +128,7 @@ contains
127128
! Reshaping inputted data based on dimensional splitting direction
128129

129130
call s_initialize_riemann_solver(flux_src_vf, norm_dir)
131+
Re_size_loc1 = Re_size(1); Re_size_loc2 = Re_size(2)
130132

131133
#:for NORM_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in &
132134
[(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1', 'is2', 'is3'), &
@@ -147,7 +149,7 @@ contains
147149
& rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, &
148150
& vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, &
149151
& alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, s_M, s_P, s_S, &
150-
& xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP]')
152+
& xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP]', firstprivate='[Re_size_loc1, Re_size_loc2]')
151153
do l = ${Z_BND}$%beg, ${Z_BND}$%end
152154
do k = ${Y_BND}$%beg, ${Y_BND}$%end
153155
do j = ${X_BND}$%beg, ${X_BND}$%end
@@ -229,10 +231,10 @@ contains
229231
do i = 1, 2
230232
Re_L(i) = dflt_real
231233
Re_R(i) = dflt_real
232-
if (Re_size(i) > 0) Re_L(i) = 0._wp
233-
if (Re_size(i) > 0) Re_R(i) = 0._wp
234+
if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_L(i) = 0._wp
235+
if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_R(i) = 0._wp
234236
$:GPU_LOOP(parallelism='[seq]')
235-
do q = 1, Re_size(i)
237+
do q = 1, merge(Re_size_loc1, Re_size_loc2, i == 1)
236238
Re_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
237239
Re_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + Re_idx(i, q))/Res_gs(i, &
238240
& q) + Re_R(i)
@@ -782,7 +784,7 @@ contains
782784
& Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, s_L, s_R, s_M, s_P, s_S, xi_M, &
783785
& xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP, nbub_L, nbub_R, PbwR3Lbar, PbwR3Rbar, &
784786
& R3Lbar, R3Rbar, R3V2Lbar, R3V2Rbar, Ys_L, Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, &
785-
& Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2]')
787+
& Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2]', firstprivate='[Re_size_loc1, Re_size_loc2]')
786788
do l = ${Z_BND}$%beg, ${Z_BND}$%end
787789
do k = ${Y_BND}$%beg, ${Y_BND}$%end
788790
do j = ${X_BND}$%beg, ${X_BND}$%end
@@ -851,11 +853,11 @@ contains
851853
Re_L(i) = dflt_real
852854
Re_R(i) = dflt_real
853855

854-
if (Re_size(i) > 0) Re_L(i) = 0._wp
855-
if (Re_size(i) > 0) Re_R(i) = 0._wp
856+
if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_L(i) = 0._wp
857+
if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_R(i) = 0._wp
856858

857859
$:GPU_LOOP(parallelism='[seq]')
858-
do q = 1, Re_size(i)
860+
do q = 1, merge(Re_size_loc1, Re_size_loc2, i == 1)
859861
Re_L(i) = (1._wp - qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + Re_idx(i, &
860862
& q)))/Res_gs(i, q) + Re_L(i)
861863
Re_R(i) = (1._wp - qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + Re_idx(i, &
@@ -1179,7 +1181,7 @@ contains
11791181
& vel_R, Re_L, Re_R, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, &
11801182
& vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, &
11811183
& tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, G_L, &
1182-
& G_R]', copyin='[is1, is2, is3]')
1184+
& G_R, c_sum_Yi_Phi, flux_ene_e]', copyin='[is1, is2, is3]', firstprivate='[Re_size_loc1, Re_size_loc2]')
11831185
do l = ${Z_BND}$%beg, ${Z_BND}$%end
11841186
do k = ${Y_BND}$%beg, ${Y_BND}$%end
11851187
do j = ${X_BND}$%beg, ${X_BND}$%end
@@ -1244,8 +1246,8 @@ contains
12441246
end do
12451247

12461248
Re_max = 0
1247-
if (Re_size(1) > 0) Re_max = 1
1248-
if (Re_size(2) > 0) Re_max = 2
1249+
if (Re_size_loc1 > 0) Re_max = 1
1250+
if (Re_size_loc2 > 0) Re_max = 2
12491251

12501252
if (viscous) then
12511253
$:GPU_LOOP(parallelism='[seq]')
@@ -1254,7 +1256,7 @@ contains
12541256
Re_R(i) = 0._wp
12551257

12561258
$:GPU_LOOP(parallelism='[seq]')
1257-
do q = 1, Re_size(i)
1259+
do q = 1, merge(Re_size_loc1, Re_size_loc2, i == 1)
12581260
Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
12591261
Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i)
12601262
end do

0 commit comments

Comments
 (0)