Skip to content

Commit 48d4df1

Browse files
committed
fix: AMD AFAR (LLVMFlang) GPU pointer attachment for derived-type field arrays
AMD AFAR 23.2.1 does not support the OpenMP 5.1 attach() clause on target enter data. Add OMP_AMD_ATTACH_FIX macro in omp_macros.fpp that manually performs pointer attachment using omp_get_mapped_ptr: 1. Gets device address of already-mapped array data 2. Reconstructs bounds from host pointer metadata 3. Reassigns device-side pointer in a small !omp target region Key fixes applied: - OMP_ENTER_DATA: suppress attach clause on AMD, call OMP_AMD_ATTACH_FIX instead - OMP_AMD_ATTACH_FIX: guard c_loc call with associated() to avoid null pointer from unassociated aliases (e.g. flux_src_n(i>1) components that alias unallocated flux_src_n(1) members) - Bounds-spec syntax (lb:) instead of bounds-remapping (lb:ub) to avoid 'target must be rank-1 or simply contiguous' error with c_f_pointer results Also enables ACC_SETUP_VFs attach path for MFC_OpenMP builds so that all vector field structs get proper device-side pointer setup on AMD.
1 parent 2874c19 commit 48d4df1

5 files changed

Lines changed: 466 additions & 395 deletions

File tree

CMakeLists.txt

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -710,12 +710,23 @@ exit 0
710710
PRIVATE -DFRONTIER_UNIFIED)
711711
endif()
712712

713-
find_library(HIP_LIB amdhip64
714-
HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED)
715-
find_library(HIPFORT_AMDGCN_LIB hipfort-amdgcn
716-
HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED)
717-
target_include_directories(${a_target} PRIVATE
718-
"$ENV{OLCF_AFAR_ROOT}/include/hipfort/amdgcn")
713+
# Use direct paths from OLCF_AFAR_ROOT to avoid system ROCm shadowing.
714+
# therock-afar-23.x layout: hipfort-amdgcn at lib/llvm/lib/ and .mod at lib/llvm/include/
715+
# rocm-afar-22.x layout: hipfort-amdgcn at lib/ and .mod at include/hipfort/amdgcn/
716+
find_library(HIP_LIB amdhip64
717+
PATHS "$ENV{OLCF_AFAR_ROOT}/lib"
718+
NO_DEFAULT_PATH REQUIRED)
719+
if(EXISTS "$ENV{OLCF_AFAR_ROOT}/lib/llvm/lib/libhipfort-amdgcn.a")
720+
set(HIPFORT_AMDGCN_LIB "$ENV{OLCF_AFAR_ROOT}/lib/llvm/lib/libhipfort-amdgcn.a")
721+
target_include_directories(${a_target} PRIVATE
722+
"$ENV{OLCF_AFAR_ROOT}/lib/llvm/include/hipfort/amdgcn")
723+
else()
724+
find_library(HIPFORT_AMDGCN_LIB hipfort-amdgcn
725+
PATHS "$ENV{OLCF_AFAR_ROOT}/lib"
726+
NO_DEFAULT_PATH REQUIRED)
727+
target_include_directories(${a_target} PRIVATE
728+
"$ENV{OLCF_AFAR_ROOT}/include/hipfort/amdgcn")
729+
endif()
719730
target_link_libraries(${a_target} PRIVATE
720731
${HIP_LIB} ${HIPFORT_AMDGCN_LIB})
721732

src/common/include/macros.fpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,9 @@
8383
deallocate (${allocated_variables}$)
8484
#:enddef DEALLOCATE
8585
86-
! Cray-specific GPU pointer setup for vector fields
86+
! GPU pointer setup for vector fields (Cray and bare LLVMFlang OpenMP target, e.g. AMD AFAR)
8787
#:def ACC_SETUP_VFs(*args)
88-
#ifdef _CRAYFTN
88+
#if defined(_CRAYFTN) || defined(MFC_LLVMFlang)
8989
block
9090
integer :: macros_setup_vfs_i
9191
@@ -99,6 +99,9 @@
9999
if (associated(${arg}$%vf(macros_setup_vfs_i)%sf)) then
100100
$:GPU_ENTER_DATA(copyin=('[' + arg + '%vf(macros_setup_vfs_i)]'))
101101
$:GPU_ENTER_DATA(copyin=('[' + arg + '%vf(macros_setup_vfs_i)%sf]'))
102+
#if defined(MFC_OpenMP)
103+
$:GPU_ENTER_DATA(attach=('[' + arg + '%vf(macros_setup_vfs_i)%sf]'))
104+
#endif
102105
end if
103106
end do
104107
end if
@@ -107,9 +110,9 @@
107110
#endif
108111
#:enddef
109112
110-
! Cray-specific GPU pointer setup for scalar fields
113+
! GPU pointer setup for scalar fields (Cray and bare LLVMFlang OpenMP target, e.g. AMD AFAR)
111114
#:def ACC_SETUP_SFs(*args)
112-
#ifdef _CRAYFTN
115+
#if defined(_CRAYFTN) || defined(MFC_LLVMFlang)
113116
block
114117
@:LOG({'@:ACC_SETUP_SFs(${', '.join(args)}$)'})
115118

src/common/include/omp_macros.fpp

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -293,11 +293,18 @@
293293
#:def OMP_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraOmpArgs=None)
294294
#:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n')
295295
#:set create_val = OMP_CREATE_STR(create)
296-
#:set attach_val = OMP_MAP_STR('always,to', attach)
296+
#:if USING_AMD
297+
#:set attach_val = ''
298+
$:OMP_AMD_ATTACH_FIX(attach)
299+
#:else
300+
#:set attach_val = GEN_PARENTHESES_CLAUSE('attach', attach)
301+
#:endif
297302
#:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs)
298303
#:set omp_clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n')
299-
#:set omp_directive = '!$omp target enter data ' + omp_clause_val + extraOmpArgs_val.strip('\n')
300-
$:omp_directive
304+
#:if omp_clause_val.strip()
305+
#:set omp_directive = '!$omp target enter data ' + omp_clause_val + extraOmpArgs_val.strip('\n')
306+
$:omp_directive
307+
#:endif
301308
#:enddef
302309

303310
#:def OMP_EXIT_DATA(copyout=None, delete=None, detach=None, extraOmpArgs=None)
@@ -382,4 +389,42 @@
382389
$:code
383390
#:endif
384391
#:enddef
392+
393+
! AMD AFAR (LLVMFlang) does not support the OpenMP 5.1 attach() clause on target enter data.
394+
! This macro performs pointer attachment manually: it gets the device address of the
395+
! already-mapped array data, then in a small target region reassigns the Fortran POINTER
396+
! member in the device-side struct to that device address.
397+
#:def OMP_AMD_ATTACH_FIX(attach)
398+
#:if attach is not None
399+
#:set clause_regex = re.compile(',(?![^(]*\\))')
400+
#:set attach_str = re.sub(clause_regex, ';', attach.strip('[]'))
401+
#:set attach_list = [x.strip() for x in attach_str.split(';')]
402+
#:for var_expr in attach_list
403+
block
404+
use iso_c_binding, only: c_ptr, c_loc, c_associated, c_f_pointer
405+
use omp_lib, only: omp_get_mapped_ptr, omp_get_default_device
406+
type(c_ptr) :: amd_dev_ptr
407+
integer :: amd_lb1, amd_lb2, amd_lb3, amd_n1, amd_n2, amd_n3
408+
if (associated(${var_expr}$)) then
409+
amd_lb1 = lbound(${var_expr}$, 1)
410+
amd_lb2 = lbound(${var_expr}$, 2)
411+
amd_lb3 = lbound(${var_expr}$, 3)
412+
amd_n1 = size(${var_expr}$, 1)
413+
amd_n2 = size(${var_expr}$, 2)
414+
amd_n3 = size(${var_expr}$, 3)
415+
amd_dev_ptr = omp_get_mapped_ptr(c_loc(${var_expr}$), omp_get_default_device())
416+
if (c_associated(amd_dev_ptr)) then
417+
!$omp target firstprivate(amd_dev_ptr, amd_lb1, amd_lb2, amd_lb3, amd_n1, amd_n2, amd_n3)
418+
block
419+
real(stp), pointer :: amd_sf_view(:,:,:)
420+
call c_f_pointer(amd_dev_ptr, amd_sf_view, [amd_n1, amd_n2, amd_n3])
421+
${var_expr}$(amd_lb1:,amd_lb2:,amd_lb3:) => amd_sf_view
422+
end block
423+
!$omp end target
424+
end if
425+
end if
426+
end block
427+
#:endfor
428+
#:endif
429+
#:enddef
385430
! New line at end of file is required for FYPP

src/simulation/m_rhs.fpp

Lines changed: 1 addition & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -67,18 +67,6 @@ module m_rhs
6767
type(scalar_field), allocatable, dimension(:) :: tau_Re_vf
6868
$:GPU_DECLARE(create='[tau_Re_vf]')
6969

70-
!> @name The cell-boundary values of the fluxes (src - source, gsrc - geometrical source). These are computed by applying the
71-
!! chosen Riemann problem solver on the left and right cell-boundary values of the primitive variables
72-
!> @{
73-
type(vector_field), allocatable, dimension(:) :: flux_n
74-
type(vector_field), allocatable, dimension(:) :: flux_src_n
75-
type(vector_field), allocatable, dimension(:) :: flux_gsrc_n
76-
77-
#if defined(MFC_OpenACC)
78-
$:GPU_DECLARE(create='[flux_n, flux_src_n, flux_gsrc_n]')
79-
#endif
80-
!> @}
81-
8270
type(vector_field), allocatable, dimension(:) :: qL_prim, qR_prim
8371
#if defined(MFC_OpenACC)
8472
$:GPU_DECLARE(create='[qL_prim, qR_prim]')
@@ -182,79 +170,6 @@ contains
182170
$:GPU_ENTER_DATA(attach='[q_prim_qp%vf(eqn_idx%psi)%sf]')
183171
end if
184172

185-
if (.not. igr) then
186-
@:ALLOCATE(flux_n(1:num_dims))
187-
@:ALLOCATE(flux_src_n(1:num_dims))
188-
@:ALLOCATE(flux_gsrc_n(1:num_dims))
189-
190-
do i = 1, num_dims
191-
@:ALLOCATE(flux_n(i)%vf(1:sys_size))
192-
@:ALLOCATE(flux_src_n(i)%vf(1:sys_size))
193-
@:ALLOCATE(flux_gsrc_n(i)%vf(1:sys_size))
194-
195-
if (i == 1) then
196-
do l = 1, sys_size
197-
@:ALLOCATE(flux_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, &
198-
& idwbuff(3)%beg:idwbuff(3)%end))
199-
@:ALLOCATE(flux_gsrc_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, &
200-
& idwbuff(3)%beg:idwbuff(3)%end))
201-
end do
202-
203-
if (viscous .or. surface_tension) then
204-
do l = eqn_idx%mom%beg, eqn_idx%E
205-
@:ALLOCATE(flux_src_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, &
206-
& idwbuff(3)%beg:idwbuff(3)%end))
207-
end do
208-
end if
209-
210-
@:ALLOCATE(flux_src_n(i)%vf(eqn_idx%adv%beg)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, &
211-
& idwbuff(3)%beg:idwbuff(3)%end))
212-
213-
if (riemann_solver == 1 .or. riemann_solver == 4) then
214-
do l = eqn_idx%adv%beg + 1, eqn_idx%adv%end
215-
@:ALLOCATE(flux_src_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, &
216-
& idwbuff(3)%beg:idwbuff(3)%end))
217-
end do
218-
end if
219-
220-
if (chemistry) then
221-
do l = eqn_idx%species%beg, eqn_idx%species%end
222-
@:ALLOCATE(flux_src_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, &
223-
& idwbuff(3)%beg:idwbuff(3)%end))
224-
end do
225-
if (chem_params%diffusion .and. .not. viscous) then
226-
@:ALLOCATE(flux_src_n(i)%vf(eqn_idx%E)%sf(idwbuff(1)%beg:idwbuff(1)%end, &
227-
& idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end))
228-
end if
229-
end if
230-
else
231-
do l = 1, sys_size
232-
@:ALLOCATE(flux_gsrc_n(i)%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, &
233-
& idwbuff(3)%beg:idwbuff(3)%end))
234-
end do
235-
end if
236-
237-
@:ACC_SETUP_VFs(flux_n(i))
238-
@:ACC_SETUP_VFs(flux_src_n(i), flux_gsrc_n(i))
239-
240-
if (i == 1) then
241-
if (riemann_solver /= 1) then
242-
do l = eqn_idx%adv%beg + 1, eqn_idx%adv%end
243-
flux_src_n(i)%vf(l)%sf => flux_src_n(i)%vf(eqn_idx%adv%beg)%sf
244-
$:GPU_ENTER_DATA(attach='[flux_src_n(i)%vf(l)%sf]')
245-
end do
246-
end if
247-
else
248-
do l = 1, sys_size
249-
flux_n(i)%vf(l)%sf => flux_n(1)%vf(l)%sf
250-
$:GPU_ENTER_DATA(attach='[flux_n(i)%vf(l)%sf]')
251-
flux_src_n(i)%vf(l)%sf => flux_src_n(1)%vf(l)%sf
252-
$:GPU_ENTER_DATA(attach='[flux_src_n(i)%vf(l)%sf]')
253-
end do
254-
end if
255-
end do
256-
end if
257-
258173
if ((.not. igr)) then
259174
@:ALLOCATE(dq_prim_dx_qp(1:1))
260175
@:ALLOCATE(dq_prim_dy_qp(1:1))
@@ -411,20 +326,6 @@ contains
411326
end if
412327
end do
413328
end if
414-
415-
$:GPU_PARALLEL_LOOP(private='[i, j, k, l, id]', collapse=4)
416-
do id = 1, num_dims
417-
do i = 1, sys_size
418-
do l = idwbuff(3)%beg, idwbuff(3)%end
419-
do k = idwbuff(2)%beg, idwbuff(2)%end
420-
do j = idwbuff(1)%beg, idwbuff(1)%end
421-
flux_gsrc_n(id)%vf(i)%sf(j, k, l) = 0._wp
422-
end do
423-
end do
424-
end do
425-
end do
426-
end do
427-
$:END_GPU_PARALLEL_LOOP()
428329
end if
429330

430331
if (qbmm) then
@@ -716,7 +617,7 @@ contains
716617
call nvtxStartRange("RHS-RIEMANN-SOLVER")
717618
call s_riemann_solver(qR_rsx_vf, dqR_prim_n%x(id)%vf, dqR_prim_n%y(id)%vf, dqR_prim_n%z(id)%vf, qR_prim(id)%vf, &
718619
& qL_rsx_vf, dqL_prim_n%x(id)%vf, dqL_prim_n%y(id)%vf, dqL_prim_n%z(id)%vf, qL_prim(id)%vf, &
719-
& q_prim_qp%vf, flux_n(id)%vf, flux_src_n(id)%vf, flux_gsrc_n(id)%vf, id, irx, iry, irz)
620+
& q_prim_qp%vf, id, irx, iry, irz)
720621
call nvtxEndRange
721622
722623
! Additional physics and source terms RHS addition for advection source
@@ -1805,49 +1706,6 @@ contains
18051706
deallocate (alf_sum%sf)
18061707
end if
18071708
1808-
if (.not. igr) then
1809-
do i = num_dims, 1, -1
1810-
if (i /= 1) then
1811-
do l = 1, sys_size
1812-
nullify (flux_n(i)%vf(l)%sf)
1813-
nullify (flux_src_n(i)%vf(l)%sf)
1814-
@:DEALLOCATE(flux_gsrc_n(i)%vf(l)%sf)
1815-
end do
1816-
else
1817-
do l = 1, sys_size
1818-
@:DEALLOCATE(flux_n(i)%vf(l)%sf)
1819-
@:DEALLOCATE(flux_gsrc_n(i)%vf(l)%sf)
1820-
end do
1821-
1822-
if (viscous) then
1823-
do l = eqn_idx%mom%beg, eqn_idx%E
1824-
@:DEALLOCATE(flux_src_n(i)%vf(l)%sf)
1825-
end do
1826-
end if
1827-
1828-
if (chem_params%diffusion .and. .not. viscous) then
1829-
@:DEALLOCATE(flux_src_n(i)%vf(eqn_idx%E)%sf)
1830-
end if
1831-
1832-
if (riemann_solver == 1 .or. riemann_solver == 4) then
1833-
do l = eqn_idx%adv%beg + 1, eqn_idx%adv%end
1834-
@:DEALLOCATE(flux_src_n(i)%vf(l)%sf)
1835-
end do
1836-
else
1837-
do l = eqn_idx%adv%beg + 1, eqn_idx%adv%end
1838-
nullify (flux_src_n(i)%vf(l)%sf)
1839-
end do
1840-
end if
1841-
1842-
@:DEALLOCATE(flux_src_n(i)%vf(eqn_idx%adv%beg)%sf)
1843-
end if
1844-
1845-
@:DEALLOCATE(flux_n(i)%vf, flux_src_n(i)%vf, flux_gsrc_n(i)%vf)
1846-
end do
1847-
1848-
@:DEALLOCATE(flux_n, flux_src_n, flux_gsrc_n)
1849-
end if
1850-
18511709
end subroutine s_finalize_rhs_module
18521710
18531711
end module m_rhs

0 commit comments

Comments
 (0)