Skip to content

Commit 6a0c621

Browse files
committed
merge upstream/master into MovingBubblesFresh-clean
2 parents 009a826 + 060f752 commit 6a0c621

27 files changed

Lines changed: 1434 additions & 1829 deletions

.github/scripts/prebuild-case-optimization.sh

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
#!/bin/bash
22

3-
# Pre-builds all benchmark cases with --case-optimization.
4-
# No GPU hardware needed — compilation only.
3+
# Pre-builds all benchmark cases with --case-optimization using --dry-run so
4+
# binaries are cached before the GPU run job. No simulation is executed.
55
# Can run in two modes:
66
# 1. Direct (Frontier login nodes): pass cluster/device/interface as args
7-
# 2. Inside SLURM (Phoenix): uses $job_device/$job_interface from submit-slurm-job.sh
7+
# 2. Inside SLURM (Phoenix/frontier_amd): uses $job_device/$job_interface
88
# Usage: bash prebuild-case-optimization.sh [<cluster> <device> <interface>]
99

1010
set -e
@@ -22,14 +22,18 @@ case "$cluster" in
2222
*) echo "ERROR: Unknown cluster '$cluster'"; exit 1 ;;
2323
esac
2424

25-
source .github/scripts/clean-build.sh
26-
clean_build
25+
# Phoenix starts fresh (no prior dep build); other clusters pre-build deps via
26+
# build.sh first, so we must preserve them and only clean MFC target staging.
27+
if [ "$cluster" = "phoenix" ]; then
28+
source .github/scripts/clean-build.sh
29+
clean_build
30+
else
31+
find build/staging -maxdepth 1 -regex '.*/[0-9a-f]+' -type d -exec rm -rf {} + 2>/dev/null || true
32+
find build/install -maxdepth 1 -regex '.*/[0-9a-f]+' -type d -exec rm -rf {} + 2>/dev/null || true
33+
fi
2734

2835
. ./mfc.sh load -c "$flag" -m g
2936

30-
# Set GPU build flags from interface — this is always a GPU build.
31-
# Don't use gpu-opts.sh since $job_device may be "cpu" when submitted
32-
# to a CPU SLURM partition (no GPU hardware needed for compilation).
3337
case "$job_interface" in
3438
acc) gpu_opts="--gpu acc" ;;
3539
omp) gpu_opts="--gpu mp" ;;
@@ -38,5 +42,5 @@ esac
3842

3943
for case in benchmarks/*/case.py; do
4044
echo "=== Pre-building: $case ==="
41-
./mfc.sh build -i "$case" --case-optimization $gpu_opts -j 8
45+
./mfc.sh run "$case" --case-optimization $gpu_opts -j 8 --dry-run
4246
done

.github/scripts/run_case_optimization.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@ benchmarks=(
2323

2424
# For Frontier/Frontier AMD: deps were fetched on the login node via --deps-only;
2525
# build case-optimized binaries here on the compute node before running.
26-
# For Phoenix: prebuild-case-optimization.sh already built everything in a prior SLURM job.
26+
# For Phoenix and frontier_amd: prebuild-case-optimization.sh already built
27+
# everything in a prior SLURM job (via --dry-run), so skip the build here.
2728
#
2829
# Clean stale MFC target staging before building. On self-hosted CI runners,
2930
# corrupted intermediate files from a prior failed build (e.g. CCE optcg crash)
3031
# can persist and poison subsequent builds. Each case-opt config gets its own
3132
# hash-named staging dir, but install dirs and other artifacts may be stale.
32-
if [ "$job_cluster" != "phoenix" ]; then
33+
if [ "$job_cluster" != "phoenix" ] && [ "$job_cluster" != "frontier_amd" ]; then
3334
# Clean stale MFC target dirs (hash-named) from prior builds, but
3435
# preserve dependency dirs (hipfort, fftw, etc.) since the compute
3536
# node has no internet to re-fetch them.

.github/workflows/test.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -659,12 +659,16 @@ jobs:
659659
if: matrix.cluster == 'phoenix'
660660
run: bash .github/scripts/submit-slurm-job.sh .github/scripts/prebuild-case-optimization.sh cpu ${{ matrix.interface }} ${{ matrix.cluster }}
661661

662+
- name: Pre-Build (SLURM)
663+
if: matrix.cluster == 'frontier_amd'
664+
run: bash .github/scripts/submit-slurm-job.sh .github/scripts/prebuild-case-optimization.sh gpu ${{ matrix.interface }} ${{ matrix.cluster }}
665+
662666
- name: Build & Run Case-Optimization Tests
663-
if: matrix.cluster != 'phoenix'
667+
if: matrix.cluster != 'phoenix' && matrix.cluster != 'frontier_amd'
664668
run: bash .github/scripts/submit-slurm-job.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }}
665669

666670
- name: Run Case-Optimization Tests
667-
if: matrix.cluster == 'phoenix'
671+
if: matrix.cluster == 'phoenix' || matrix.cluster == 'frontier_amd'
668672
run: bash .github/scripts/submit-slurm-job.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }}
669673

670674
- name: Cancel SLURM Jobs

CMakeLists.txt

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,7 @@ exit 0
644644
target_link_options(${a_target} PRIVATE -fopenmp)
645645
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
646646
target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -fopenmp-target-fast -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription)
647-
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a)
647+
target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -flto-partitions=${MFC_BUILD_JOBS})
648648
endif()
649649
endif()
650650

@@ -710,14 +710,15 @@ exit 0
710710
PRIVATE -DFRONTIER_UNIFIED)
711711
endif()
712712

713-
find_library(HIP_LIB amdhip64
713+
find_library(HIP_LIB amdhip64
714714
HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED)
715715
find_library(HIPFORT_AMDGCN_LIB hipfort-amdgcn
716716
HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED)
717717
target_include_directories(${a_target} PRIVATE
718718
"$ENV{OLCF_AFAR_ROOT}/include/hipfort/amdgcn")
719719
target_link_libraries(${a_target} PRIVATE
720-
${HIP_LIB} ${HIPFORT_AMDGCN_LIB} flang_rt.hostdevice)
720+
${HIP_LIB} ${HIPFORT_AMDGCN_LIB})
721+
721722
endif()
722723
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
723724
target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc")
@@ -790,6 +791,12 @@ if (MFC_POST_PROCESS)
790791

791792
# -O0 is in response to https://github.com/MFlowCode/MFC-develop/issues/95
792793
target_compile_options(post_process PRIVATE -O0)
794+
795+
# flang-23/LLD defaults to PIE; SILO and LAPACK static libs on Frontier are
796+
# non-PIC, producing R_X86_64_32 relocations that LLD rejects in PIE mode.
797+
if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
798+
target_link_options(post_process PRIVATE -no-pie)
799+
endif()
793800
endif()
794801

795802
if (MFC_SYSCHECK)

examples/3D_performance_test/case.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
json.dumps(
77
{
88
# Logistics
9-
"run_time_info": "T",
9+
"run_time_info": "F",
1010
# Computational Domain Parameters
1111
"x_domain%beg": 0.0e00,
1212
"x_domain%end": 4.0e-03 / 1.0e-03,

src/common/m_chemistry.fpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ contains
192192

193193
$:GPU_UPDATE(device='[isc1, isc2, isc3]')
194194

195-
if (chemistry .or. dummy) then
195+
if (chemistry) then
196196
! Set offsets based on direction using array indexing
197197
offsets = 0
198198
offsets(idir) = 1

src/post_process/m_global_parameters.fpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,6 @@ module m_global_parameters
207207
logical :: E_wrt
208208
logical, dimension(num_fluids_max) :: alpha_rho_e_wrt
209209
logical :: fft_wrt
210-
!> AMDFlang workaround: keep a dummy logical to avoid a compiler case-optimization bug when a parameter+GPU-kernel conditional
211-
!! is false
212-
logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug
213210
logical :: pres_wrt
214211
logical, dimension(num_fluids_max) :: alpha_wrt
215212
logical :: gamma_wrt
@@ -431,7 +428,6 @@ contains
431428
file_per_process = .false.
432429
E_wrt = .false.
433430
fft_wrt = .false.
434-
dummy = .false.
435431
pres_wrt = .false.
436432
alpha_wrt = .false.
437433
gamma_wrt = .false.

src/pre_process/m_data_output.fpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,10 @@ contains
133133

134134
if (bubbles_lagrange) then
135135
block
136-
real(stp), allocatable :: beta_ones(:,:,:)
136+
real(stp), allocatable :: beta_ones(:,:,:)
137137
character(LEN=len_trim(t_step_dir) + name_len) :: beta_file_loc
138-
integer :: jj, kk, ll
139-
allocate (beta_ones(0:m, 0:n, 0:p))
138+
integer :: jj, kk, ll
139+
allocate (beta_ones(0:m,0:n,0:p))
140140
do ll = 0, p
141141
do kk = 0, n
142142
do jj = 0, m
@@ -553,8 +553,8 @@ contains
553553
if (bubbles_lagrange) then
554554
block
555555
real(stp), allocatable :: beta_ones(:,:,:)
556-
integer :: jj, kk, ll
557-
allocate (beta_ones(0:m, 0:n, 0:p))
556+
integer :: jj, kk, ll
557+
allocate (beta_ones(0:m,0:n,0:p))
558558
do ll = 0, p
559559
do kk = 0, n
560560
do jj = 0, m
@@ -635,8 +635,8 @@ contains
635635
if (bubbles_lagrange) then
636636
block
637637
real(stp), allocatable :: beta_ones(:,:,:)
638-
integer :: jj, kk, ll
639-
allocate (beta_ones(0:m, 0:n, 0:p))
638+
integer :: jj, kk, ll
639+
allocate (beta_ones(0:m,0:n,0:p))
640640
do ll = 0, p
641641
do kk = 0, n
642642
do jj = 0, m

src/pre_process/m_global_parameters.fpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,6 @@ module m_global_parameters
205205
real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D)
206206
integer :: buff_size !< Number of ghost cells for boundary condition storage
207207
logical :: fft_wrt
208-
logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug
209208
210209
! Variables for hardcoded initial conditions that are read from input files
211210
character(LEN=2*path_len) :: interface_file
@@ -326,7 +325,6 @@ contains
326325
elliptic_smoothing = .false.
327326
328327
fft_wrt = .false.
329-
dummy = .false.
330328
331329
simplex_perturb = .false.
332330
simplex_params%perturb_vel(:) = .false.

src/simulation/m_acoustic_src.fpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -464,14 +464,16 @@ contains
464464
call s_mpi_abort('Fatal Error: Inconsistent allocation of source_spatials')
465465
end if
466466

467-
$:GPU_UPDATE(device='[source_spatials(ai)%coord]')
468-
$:GPU_UPDATE(device='[source_spatials(ai)%val]')
469-
if (support(ai) >= 5) then
470-
if (dim == 2) then
471-
$:GPU_UPDATE(device='[source_spatials(ai)%angle]')
472-
end if
473-
if (dim == 3) then
474-
$:GPU_UPDATE(device='[source_spatials(ai)%xyz_to_r_ratios]')
467+
if (count > 0) then
468+
$:GPU_UPDATE(device='[source_spatials(ai)%coord]')
469+
$:GPU_UPDATE(device='[source_spatials(ai)%val]')
470+
if (support(ai) >= 5) then
471+
if (dim == 2) then
472+
$:GPU_UPDATE(device='[source_spatials(ai)%angle]')
473+
end if
474+
if (dim == 3) then
475+
$:GPU_UPDATE(device='[source_spatials(ai)%xyz_to_r_ratios]')
476+
end if
475477
end if
476478
end if
477479
end do

0 commit comments

Comments
 (0)