Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ exit 0
target_link_options(${a_target} PRIVATE -fopenmp)
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -fopenmp-target-fast -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription)
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a)
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -flto-partitions=${MFC_BUILD_JOBS})
endif()
endif()

Expand Down Expand Up @@ -710,14 +710,15 @@ exit 0
PRIVATE -DFRONTIER_UNIFIED)
endif()

find_library(HIP_LIB amdhip64
find_library(HIP_LIB amdhip64
HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED)
find_library(HIPFORT_AMDGCN_LIB hipfort-amdgcn
HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED)
target_include_directories(${a_target} PRIVATE
"$ENV{OLCF_AFAR_ROOT}/include/hipfort/amdgcn")
target_link_libraries(${a_target} PRIVATE
${HIP_LIB} ${HIPFORT_AMDGCN_LIB} flang_rt.hostdevice)
${HIP_LIB} ${HIPFORT_AMDGCN_LIB})

endif()
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc")
Expand Down Expand Up @@ -790,6 +791,12 @@ if (MFC_POST_PROCESS)

# -O0 is in response to https://github.com/MFlowCode/MFC-develop/issues/95
target_compile_options(post_process PRIVATE -O0)

# flang-23/LLD defaults to PIE; SILO and LAPACK static libs on Frontier are
# non-PIC, producing R_X86_64_32 relocations that LLD rejects in PIE mode.
if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
target_link_options(post_process PRIVATE -no-pie)
endif()
endif()

if (MFC_SYSCHECK)
Expand Down
2 changes: 1 addition & 1 deletion examples/3D_performance_test/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
json.dumps(
{
# Logistics
"run_time_info": "T",
"run_time_info": "F",
# Computational Domain Parameters
"x_domain%beg": 0.0e00,
"x_domain%end": 4.0e-03 / 1.0e-03,
Expand Down
2 changes: 1 addition & 1 deletion src/common/m_chemistry.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ contains

$:GPU_UPDATE(device='[isc1, isc2, isc3]')

if (chemistry .or. dummy) then
if (chemistry) then
! Set offsets based on direction using array indexing
offsets = 0
offsets(idir) = 1
Expand Down
2 changes: 0 additions & 2 deletions src/post_process/m_global_parameters.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ module m_global_parameters
logical :: E_wrt
logical, dimension(num_fluids_max) :: alpha_rho_e_wrt
logical :: fft_wrt
logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug
logical :: pres_wrt
logical, dimension(num_fluids_max) :: alpha_wrt
logical :: gamma_wrt
Expand Down Expand Up @@ -397,7 +396,6 @@ contains
file_per_process = .false.
E_wrt = .false.
fft_wrt = .false.
dummy = .false.
pres_wrt = .false.
alpha_wrt = .false.
gamma_wrt = .false.
Expand Down
2 changes: 0 additions & 2 deletions src/pre_process/m_global_parameters.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ module m_global_parameters
real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D)
integer :: buff_size !< Number of ghost cells for boundary condition storage
logical :: fft_wrt
logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug

contains

Expand Down Expand Up @@ -303,7 +302,6 @@ contains
elliptic_smoothing = .false.

fft_wrt = .false.
dummy = .false.

simplex_perturb = .false.
simplex_params%perturb_vel(:) = .false.
Expand Down
18 changes: 10 additions & 8 deletions src/simulation/m_acoustic_src.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -454,14 +454,16 @@ contains
call s_mpi_abort('Fatal Error: Inconsistent allocation of source_spatials')
end if

$:GPU_UPDATE(device='[source_spatials(ai)%coord]')
$:GPU_UPDATE(device='[source_spatials(ai)%val]')
if (support(ai) >= 5) then
if (dim == 2) then
$:GPU_UPDATE(device='[source_spatials(ai)%angle]')
end if
if (dim == 3) then
$:GPU_UPDATE(device='[source_spatials(ai)%xyz_to_r_ratios]')
if (count > 0) then
$:GPU_UPDATE(device='[source_spatials(ai)%coord]')
$:GPU_UPDATE(device='[source_spatials(ai)%val]')
if (support(ai) >= 5) then
if (dim == 2) then
$:GPU_UPDATE(device='[source_spatials(ai)%angle]')
end if
if (dim == 3) then
$:GPU_UPDATE(device='[source_spatials(ai)%xyz_to_r_ratios]')
end if
end if
end if
end do
Expand Down
4 changes: 2 additions & 2 deletions src/simulation/m_cbc.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ contains
#:for CBC_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
if (cbc_dir == ${CBC_DIR}$ .and. recon_type == WENO_TYPE) then
! PI2 of flux_rs_vf and flux_src_rs_vf at j = 1/2
if (weno_order == 3 .or. dummy) then
if (weno_order == 3) then
call s_convert_primitive_to_flux_variables(q_prim_rs${XYZ}$_vf, F_rs${XYZ}$_vf, F_src_rs${XYZ}$_vf, is1, is2, &
& is3, idwbuff(2)%beg, idwbuff(3)%beg)

Expand Down Expand Up @@ -557,7 +557,7 @@ contains
end if

! PI4 of flux_rs_vf and flux_src_rs_vf at j = 1/2, 3/2
if (weno_order == 5 .or. dummy) then
if (weno_order == 5) then
call s_convert_primitive_to_flux_variables(q_prim_rs${XYZ}$_vf, F_rs${XYZ}$_vf, F_src_rs${XYZ}$_vf, is1, is2, &
& is3, idwbuff(2)%beg, idwbuff(3)%beg)

Expand Down
2 changes: 1 addition & 1 deletion src/simulation/m_data_output.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ contains
#:call GPU_PARALLEL(copyout='[icfl_max_loc]', copyin='[icfl_sf]')
icfl_max_loc = maxval(icfl_sf)
#:endcall GPU_PARALLEL
if (viscous .or. dummy) then
if (viscous) then
#:call GPU_PARALLEL(copyout='[vcfl_max_loc, Rc_min_loc]', copyin='[vcfl_sf,Rc_sf]')
vcfl_max_loc = maxval(vcfl_sf)
Rc_min_loc = minval(Rc_sf)
Expand Down
19 changes: 11 additions & 8 deletions src/simulation/m_fftw.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ module m_fftw

type(c_ptr) :: fwd_plan, bwd_plan
type(c_ptr) :: fftw_real_data, fftw_cmplx_data, fftw_fltr_cmplx_data
integer :: real_size, cmplx_size, x_size, batch_size, Nfq
integer :: real_size, cmplx_size, x_size, batch_size, Nfq, i2
real(c_double), pointer :: data_real(:) !< Real data
complex(c_double_complex), pointer :: data_cmplx(:) !< Complex data in Fourier space
complex(c_double_complex), pointer :: data_fltr_cmplx(:) !< Filtered complex data in Fourier space
#if defined(MFC_GPU)
$:GPU_DECLARE(create='[real_size, cmplx_size, x_size, batch_size, Nfq]')
$:GPU_DECLARE(create='[real_size, cmplx_size, x_size, batch_size, Nfq, i2]')

real(dp), allocatable, target :: data_real_gpu(:)
complex(dp), allocatable, target :: data_cmplx_gpu(:)
Expand Down Expand Up @@ -76,8 +76,8 @@ contains
allocate (gpu_fft_size(1:rank), iembed(1:rank), oembed(1:rank))

gpu_fft_size(1) = real_size
iembed(1) = 0
oembed(1) = 0
iembed(1) = real_size
oembed(1) = cmplx_size
$:GPU_ENTER_DATA(copyin='[real_size, cmplx_size, x_size, sys_size, batch_size, Nfq]')
$:GPU_UPDATE(device='[real_size, cmplx_size, x_size, sys_size, batch_size]')
#else
Expand Down Expand Up @@ -189,6 +189,9 @@ contains
$:END_GPU_PARALLEL_LOOP()

do i = 1, fourier_rings
i2 = i
$:GPU_UPDATE(device='[i2]')

$:GPU_PARALLEL_LOOP(collapse=3)
do k = 1, sys_size
do j = 0, m
Expand All @@ -199,11 +202,11 @@ contains
end do
$:END_GPU_PARALLEL_LOOP()

$:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]')
$:GPU_PARALLEL_LOOP(collapse=3)
do k = 1, sys_size
do j = 0, m
do l = 0, p
data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l)
data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i2, l)
end do
end do
end do
Expand Down Expand Up @@ -241,13 +244,13 @@ contains
#endif
#:endcall GPU_HOST_DATA

$:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]')
$:GPU_PARALLEL_LOOP(collapse=3)
do k = 1, sys_size
do j = 0, m
do l = 0, p
data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k &
& - 1)*real_size*x_size)/real(real_size, dp)
q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
q_cons_vf(k)%sf(j, i2, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
end do
end do
end do
Expand Down
2 changes: 0 additions & 2 deletions src/simulation/m_global_parameters.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,6 @@ module m_global_parameters
$:GPU_DECLARE(create='[Bx0]')

logical :: fft_wrt
logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug
!> @name Continuum damage model parameters
!> @{!
real(wp) :: tau_star !< Stress threshold for continuum damage modeling
Expand Down Expand Up @@ -695,7 +694,6 @@ contains
#:endfor

fft_wrt = .false.
dummy = .false.

do j = 1, num_probes_max
acoustic(j)%pulse = dflt_int
Expand Down
2 changes: 1 addition & 1 deletion src/simulation/m_igr.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ contains

call s_populate_F_igr_buffers(bc_type, jac_sf)

if (igr_iter_solver == 1 .or. dummy) then ! Jacobi iteration
if (igr_iter_solver == 1) then ! Jacobi iteration
$:GPU_PARALLEL_LOOP(private='[j, k, l]', collapse=3)
do l = idwbuff(3)%beg, idwbuff(3)%end
do k = idwbuff(2)%beg, idwbuff(2)%end
Expand Down
Loading
Loading