Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
2470 commits
Select commit Hold shift + click to select a range
62382bc
Disable GPU Aware MPI on frontier for now
Aug 15, 2025
672cc78
Ran 1386^3 on a single node with mixed precision with UVM and without…
Aug 15, 2025
9d66c9a
Changing kind=2 to kind=1 for patch_ID
Aug 15, 2025
88c0a11
Remove fortitude from workflow (#986)
sbryngelson Aug 15, 2025
c7f6313
Fix Down-Sampling with UVM on AMD compilers
Aug 15, 2025
060e9d1
Add copyin for idwbuff
Aug 15, 2025
7cb0f9e
Cleanup
Aug 17, 2025
19b7671
Further Cleanup
Aug 17, 2025
42349df
Setup Case for 200T
Aug 17, 2025
f0be342
Deallocate after pre_process
Aug 17, 2025
d290396
Change bc_type and ib_markers to kind1
Aug 17, 2025
e457e04
Change bc_x
Aug 17, 2025
dfdd901
Use parameter arrays with OpenACC
Aug 17, 2025
b6edcf9
add simplex nosie
wilfonba Aug 18, 2025
6ca1db4
CCE build fixes
wilfonba Aug 18, 2025
3ec56b2
Revert to default frontier.mako and add mnt changes
Aug 18, 2025
1bff304
Merge branch 'openmp_rebased' of https://github.com/prathi-wind/MFC-p…
Aug 18, 2025
678777e
Ensure bc_type does not overflow
Aug 19, 2025
34a080e
Update m_boundary_common.fpp
anandrdbz Aug 19, 2025
0a080c2
Change loop bounds to kind=8
Aug 20, 2025
59d11bc
Change to kind8
Aug 20, 2025
6fcdf2b
Revert halo_size
Aug 20, 2025
fe87fc5
Resolving bug with multiple ranks using IBM (#990)
anandrdbz Sep 1, 2025
336ec52
Update PR agent configuration settings (#992)
sbryngelson Sep 2, 2025
0a68648
Revise CFD simulation statistics in README (#993)
sbryngelson Sep 3, 2025
1bf4e9a
Refactor `m_riemann_solvers` Module (HLLC Solver Subroutine) (#912)
Malmahrouqi3 Sep 9, 2025
fbdaecf
Create automated benchmark for OLCF Frontier (#998)
wilfonba Sep 15, 2025
e966781
`--rdma-mpi` flag fix (#996)
Malmahrouqi3 Sep 15, 2025
65eb598
Improved error reporting for failed tolerance checks (#988)
DimAdam-01 Sep 19, 2025
eb61616
remove m_eigen_solver (#1000)
hyeoksu-lee Sep 19, 2025
e8a47a9
Reduce multiple Weno Recon. (#1005)
DimAdam-01 Sep 28, 2025
eb152c5
Multicomponent diffusion fluxes, thermal conduction, and mixture visc…
DimAdam-01 Sep 28, 2025
c86fdd9
Add moving imersed boundaries (#1006)
danieljvickers Oct 5, 2025
1fbaaf8
Add Python 3.13 setup step to test workflow (#1016)
sbryngelson Oct 12, 2025
5c9d069
Refactor time_stepper (#1012)
hyeoksu-lee Oct 13, 2025
1609707
Add rotating mibms (#1014)
danieljvickers Oct 17, 2025
d91cb68
I/O Improvements for run time info and EL Bubbles (#1015)
wilfonba Oct 18, 2025
4f8fb91
MPI FFTW (#997)
anandrdbz Oct 21, 2025
199aaeb
MFC Containerization (#971)
Malmahrouqi3 Oct 23, 2025
2da0daf
Resolve nvhpc 25 3 (#1020)
danieljvickers Oct 31, 2025
531a46f
Add OpenMP support for Nvidia hardware (#999)
prathi-wind Nov 1, 2025
bfd732c
Mibm gpu optimization and io bugs (#1019)
danieljvickers Nov 4, 2025
ee4e831
corrected link (#1021)
Malmahrouqi3 Nov 4, 2025
7982c58
docs: Update README and CITATION.cff, add issue templates (#1022)
sbryngelson Nov 4, 2025
23bbea6
Merge branch 'openmp_rebased' into openmp_cce
Nov 4, 2025
e84464d
Merge branch 'openmp_rebased' into openmp_cce
Nov 6, 2025
416f021
IGR tests pass (CPUs)
Nov 6, 2025
767fb94
520/529 cases pass
Nov 6, 2025
9d2f746
change down_sample
Nov 6, 2025
56e1269
bug fix
Nov 6, 2025
be6fced
All but one case pass on CPUs
Nov 6, 2025
9baca98
All test cases pass on CPUs
Nov 6, 2025
42806e3
All test cases pass with parallel_io=T
Nov 6, 2025
70ec48e
fix the breaking change
Nov 7, 2025
d4dc3b3
Add additional test
Nov 7, 2025
b6022a8
Merge branch 'openmp_cce' of https://github.com/prathi-wind/MFC-prath…
Nov 7, 2025
68033a1
fix ts1 and all tests pass on frontier
Nov 7, 2025
08a9024
OpenMP compile bug
Nov 7, 2025
b0697c5
1D cases pass on CCE + OpenMP (except adap_dt and chemistry)
Nov 9, 2025
a36a225
Homebrew formula (#1024)
sbryngelson Nov 9, 2025
8a84e1b
Probe WRT and adap_dt fixed
Nov 9, 2025
4f0704f
Looks like the LD library path is bad (#1031)
danieljvickers Nov 10, 2025
b983483
1D/2D cases pass
Nov 10, 2025
622c06c
Passing most 3D cases
Nov 10, 2025
b9db549
replace comments with appropriate ifdefs
Nov 10, 2025
d759c6f
fix ifdef
Nov 10, 2025
8d40011
fix ifdef
Nov 10, 2025
c4d2603
fix macros
Nov 10, 2025
5a97a92
line length cleanup
anandrdbz Nov 10, 2025
cab026e
Fixed Cylindrical on OpenMP Frontier
Nov 10, 2025
e9a5a93
Added OMP Frontier CI
Nov 10, 2025
6d9b0fb
Merge remote-tracking branch 'upstream/master' into openmp_cce
Nov 10, 2025
f5b64a0
Ran formatter
Nov 10, 2025
5f73950
Ran formatter
Nov 10, 2025
5ab057b
Case Opt Works
Nov 10, 2025
409c367
Ran spellcheck
Nov 10, 2025
5e70305
Fixed lint issues on toolchain
Nov 10, 2025
1ebb536
Remove extra print
Nov 10, 2025
cacc5d5
remove duplicates in private
Nov 10, 2025
268808e
IGR tests pass with AMD compiler
Nov 10, 2025
216261f
fix format
Nov 10, 2025
84f85a5
fix bug in CBC + OpenMP
anandrdbz Nov 11, 2025
530638a
fix bounds in post_process
Nov 11, 2025
eb96476
Homebrew formula (#1037)
sbryngelson Nov 12, 2025
e1d9044
Increase bench time
Nov 12, 2025
d5e7059
Fix badge links and update Homebrew command (#1038)
sbryngelson Nov 12, 2025
a774051
fix post_process benchmark
Nov 12, 2025
87fae79
fix benchmarking on frontier
Nov 12, 2025
9027349
Homebrew formula (#1039)
sbryngelson Nov 12, 2025
771797b
fix benchmarking on frontier
Nov 12, 2025
9f40b5e
Improve README formatting and add emojis (#1041)
sbryngelson Nov 12, 2025
f1afee3
example case fix ups
wilfonba Nov 13, 2025
e08bcae
CMakeLists.txt fix
wilfonba Nov 13, 2025
b2ad78a
Addressing more PR comments
wilfonba Nov 13, 2025
88cbac0
fix boundary condition patches and simplex noise
wilfonba Nov 13, 2025
7ae0a65
more bug fixes
wilfonba Nov 13, 2025
78ff9c6
Merge remote-tracking branch 'upstream/master' into openmp_cce
wilfonba Nov 13, 2025
4f2f579
format
wilfonba Nov 13, 2025
56097f8
fix IB markers seg fault
wilfonba Nov 13, 2025
8a66941
Fix for benchmarking on frontier
Nov 13, 2025
d7d2ec6
Remove release badge from README (#1044)
sbryngelson Nov 14, 2025
76b6620
Smooth MHD Convergence Cases (#1040)
danieljvickers Nov 14, 2025
f5ac529
fix to make benchmark work on frontier
Nov 14, 2025
5fa68fa
format
Nov 14, 2025
7169711
Merge pull request #1035 from prathi-wind/openmp_cce
anandrdbz Nov 14, 2025
c701821
Add journal information to citation (#1045)
sbryngelson Nov 14, 2025
9333c68
fix homebrew a bit (#1047)
sbryngelson Nov 15, 2025
7386504
readme (#1049)
sbryngelson Nov 15, 2025
37560f1
Modify badges in README.md (#1050)
sbryngelson Nov 15, 2025
22af239
Add line numbering to gpu loops (#1029)
danieljvickers Nov 19, 2025
7bd0efa
Complete form of the SG EoS
JRChreim Nov 22, 2025
62d430e
Update m_variables_conversion.fpp
JRChreim Nov 22, 2025
c00ba3d
Update m_data_output.fpp
JRChreim Nov 23, 2025
9607e33
Update m_variables_conversion.fpp
JRChreim Nov 23, 2025
50fdb51
fix debug build
wilfonba Nov 23, 2025
ec8834a
correct roe average of qv
wilfonba Nov 23, 2025
d7fd48f
fix acc build
wilfonba Nov 23, 2025
98ce554
Update m_riemann_solvers.fpp
JRChreim Nov 23, 2025
7e030b8
Update m_riemann_solvers.fpp
JRChreim Nov 24, 2025
76d6a5c
Merge pull request #1057 from JRChreim/SGEoS
JRChreim Nov 24, 2025
11375f3
CI test (#1054)
sbryngelson Nov 25, 2025
364a237
Adding internal energies to the output of the conservative variables …
JRChreim Nov 27, 2025
e29d8e1
reduce redundancy in bubbles (#1062)
hyeoksu-lee Nov 27, 2025
cb856f7
Make fluid variables uniform through the code (#1063)
JRChreim Nov 27, 2025
76250b7
Three new examples cases (#1060)
Cowsreal Nov 27, 2025
e25dbc4
Update cantera and pyrometheus dependencies (#1053)
sbryngelson Nov 28, 2025
77ddd1e
cfl dt bug fix (#1027)
wilfonba Nov 28, 2025
ec01fb7
Enable automatic versioning from Git tags (#1064)
sbryngelson Nov 28, 2025
56d8a24
Add example case for convergence test in 1D (#1030)
wilfonba Nov 29, 2025
6daae96
fix
sbryngelson Nov 29, 2025
987888c
Revert "fix"
sbryngelson Nov 29, 2025
e2ce07d
Add dimension-aware long-running test notifications (#1067)
sbryngelson Nov 29, 2025
ba8a820
Case checking moved to Python! (#1066)
sbryngelson Nov 30, 2025
9d283e5
removing duplicated codes in mixture rules for EE bubbles (#1071)
hyeoksu-lee Dec 1, 2025
d8d9e25
Constraints (#1072)
sbryngelson Dec 3, 2025
ab9a1fd
fix (#1074)
sbryngelson Dec 3, 2025
6c380cd
Update GPU condition checks in frontier.mako (#1076)
sbryngelson Dec 5, 2025
8d9a83b
fix gpu templates for real though (#1077)
sbryngelson Dec 5, 2025
c91edb9
Oscar Mako and module update (#1082)
mrodrig6 Dec 10, 2025
b220ebf
benchmarks hardening! (#1078)
sbryngelson Dec 11, 2025
9a3093b
Invicid two-way fluid-structure interaction (#1075)
danieljvickers Dec 12, 2025
8ae42aa
Fixed compilation with post_processing and removed the last of the ol…
danieljvickers Dec 13, 2025
2902000
Refactor subgrid bubble models (#1085)
hyeoksu-lee Dec 16, 2025
46ffb97
homebrew: fix toolchain install without git metadata (#1096)
sbryngelson Dec 19, 2025
49d4ae9
update python in docker files (#1097)
sbryngelson Dec 19, 2025
32bd630
homebrew: fix toolchain install without git metadata (#1098)
sbryngelson Dec 20, 2025
df782b5
fix swapped vapor gas properties (#1093)
hyeoksu-lee Dec 21, 2025
6576e36
fix table (#1099)
hyeoksu-lee Dec 27, 2025
639c313
5 Equation model and relaxation (#1094)
JRChreim Jan 9, 2026
31c938f
Unity Lewis Implementation (#1084)
DimAdam-01 Jan 9, 2026
b02e7f2
Viscous stress and ellipse ib (#1102)
danieljvickers Jan 16, 2026
cbcedd6
Tuo modules and template (#1103)
wilfonba Jan 16, 2026
cb59e13
This change will fix configuration issues on HiperGator (#1112)
mrvandenboom Jan 20, 2026
6264d3f
Pretty Comments (#1113)
danieljvickers Jan 21, 2026
944aa2f
Analytic mibm velocities and airfoil centroid (#1111)
danieljvickers Jan 24, 2026
d987bdb
Fixes mako file for Hipergator CPUs (#1115)
mrvandenboom Jan 28, 2026
bac27a5
AMDFlang compiler for MFC (#1116)
anandrdbz Feb 1, 2026
a6d3f37
MHD Hyperbolic Divergence Cleaning (#1086)
ChrisZYJ Feb 3, 2026
b70292f
Fix Homebrew CI and add automated release workflow (#1120)
sbryngelson Feb 3, 2026
3cb0cc9
Fix Docker build on ARM by pre-installing numpy (#1121)
sbryngelson Feb 4, 2026
1d3286a
Quality of life improvements for MFC toolchain (#1118)
sbryngelson Feb 5, 2026
7b35b59
Add CI lint gate and local precheck command (#1122)
sbryngelson Feb 5, 2026
0945009
Shell completion auto-install and pre-commit hook improvements (#1124)
sbryngelson Feb 9, 2026
1c96f96
Remove .cursor directory (#1128)
sbryngelson Feb 9, 2026
3f59e87
Update README: theming, capabilities, and cleanup (#1129)
sbryngelson Feb 9, 2026
1578018
Update MFC 5.0 citation to published CPC article (#1131)
sbryngelson Feb 9, 2026
545f3a9
Simplify landing page JS and fix broken scaling section (#1132)
sbryngelson Feb 10, 2026
afc11b6
Auto-generate description labels and extend DEPENDENCIES schema (#1133)
sbryngelson Feb 11, 2026
0ba6c02
Add developer guide, streamline PR template, AI reviewers (#1134)
sbryngelson Feb 11, 2026
c756f42
Modify .pr_agent.toml for PR commands and settings (#1137)
sbryngelson Feb 12, 2026
56bff68
Add comprehensive equations documentation (#1136)
sbryngelson Feb 13, 2026
3639574
Fix empty bibliography in CI docs build (#1142)
sbryngelson Feb 13, 2026
21347c1
Fix 1D multi-rank MPI_GATHERV bug in post-process silo output (#1138)
sbryngelson Feb 13, 2026
ef6998d
Levelset refactor (#1123)
danieljvickers Feb 14, 2026
b104aea
Add physics constraint validation and auto-generated docs (#1149)
sbryngelson Feb 15, 2026
4c52155
Document dimensions, stored-parameter conventions, and strengthen doc…
sbryngelson Feb 15, 2026
8034f57
Restructure README for impact and fix broken doc links (#1151)
sbryngelson Feb 16, 2026
598df2c
Refresh README, docs landing page, and scaling plots (#1152)
sbryngelson Feb 17, 2026
c5493c1
Fix Homebrew install crash: namelist parser gracefully handles missin…
sbryngelson Feb 17, 2026
4073a4a
Fix homebrew-release workflow for retags and fork URLs (#1155)
sbryngelson Feb 17, 2026
9b1e64f
Refresh README, docs, and YouTube integration (#1153)
sbryngelson Feb 17, 2026
df22a69
Add simulation gallery entries and computer links (#1160)
sbryngelson Feb 18, 2026
d048c4b
Add custom 404 page and verify essential site files in CI (#1162)
sbryngelson Feb 18, 2026
8b0c3f6
fix ibm benchmark on frontier + amd (#1159)
anandrdbz Feb 19, 2026
4ff0bef
Exclude SC22 proceedings from lychee (returns 415 to crawlers)
sbryngelson Feb 19, 2026
5674a63
Auto-update copyright year and add NNSA to acknowledgements (#1164)
sbryngelson Feb 19, 2026
9e216d8
Extract simulation data to simulations.json
sbryngelson Feb 19, 2026
eaaebec
Include simulations.json in docs install
sbryngelson Feb 19, 2026
84dc476
Widen scaling plot card and add proper link bar (#1165)
sbryngelson Feb 19, 2026
e9d15f3
Merge branch 'master' into simulations-json
sbryngelson Feb 19, 2026
5a8dd03
Restore inline sims array in index.html
sbryngelson Feb 19, 2026
05b26f7
Link to Computational Physics Group website (#1168)
sbryngelson Feb 19, 2026
c612b6a
Improve API documentation discoverability (#1167)
sbryngelson Feb 19, 2026
202033e
Add unified API landing page and fix footer layout (#1169)
sbryngelson Feb 19, 2026
ad35e0e
Fix docs CI: run linkcheck before publish step (#1172)
sbryngelson Feb 19, 2026
356b61f
Add CI build caching and improve benchmark workflow (#1148)
sbryngelson Feb 20, 2026
3781b98
Harden benchmark workflow: retry builds, proactive clean, robust moni…
sbryngelson Feb 21, 2026
84c46e0
Skip benchmark workflow for bot review events (#1192)
sbryngelson Feb 21, 2026
98efa30
Remove debug print left in post-process production code (#1185)
sbryngelson Feb 22, 2026
c9a3274
Add 502 to accepted status codes in .lychee.toml (#1236) (#1237)
sbryngelson Feb 22, 2026
df28255
Remove debug print left in QBMM production code (#1226)
sbryngelson Feb 22, 2026
c8279d3
Add Claude Code GitHub Workflow (#1238)
sbryngelson Feb 22, 2026
2cb8b34
Fix claude pr (#1243)
sbryngelson Feb 22, 2026
e25be02
Fix claude again (#1245)
sbryngelson Feb 22, 2026
b310c43
Update Claude Code Review workflow to include checkout (#1247)
sbryngelson Feb 22, 2026
1fcb45a
Update excluded paths in .fortlsrc (#1248)
sbryngelson Feb 22, 2026
8a77d33
Fix claude 01 (#1250)
sbryngelson Feb 22, 2026
5e08029
Simplify execution of main.py by removing command checks (#1251)
sbryngelson Feb 23, 2026
a028536
Claude fix02 (#1252)
sbryngelson Feb 23, 2026
e36eb61
Add step to delete previous Claude review comments (#1253)
sbryngelson Feb 23, 2026
38f0457
Add 504 to accepted status codes in .lychee.toml (#1254)
sbryngelson Feb 23, 2026
7997663
Add CLAUDE.md and .claude/rules/ for Claude Code guidance (#1255)
sbryngelson Feb 24, 2026
b528867
Fix GPU example, compiler matrix, and AMD flang consistency (#1256)
sbryngelson Feb 24, 2026
c0da7ca
Refactor Claude Code Review workflow (#1257)
sbryngelson Feb 24, 2026
34c13b7
Claude fix (#1259)
sbryngelson Feb 24, 2026
b08eac0
Increase max turns from 10 to 30 in workflow (#1260)
sbryngelson Feb 24, 2026
d5123c3
Add fixes for Delta PrgEnv-nvidia (#1258)
wilfonba Feb 24, 2026
c89a491
Modify CLAUDE code review workflow settings (#1261)
sbryngelson Feb 24, 2026
29f0da5
Skip self-hosted tests and benchmarks for draft PRs (#1262)
sbryngelson Feb 25, 2026
97a6dac
Enhance workflow with local bin setup and tool updates (#1263)
sbryngelson Feb 25, 2026
7658b43
Modify Claude Code Review workflow settings (#1264)
sbryngelson Feb 25, 2026
0ce8382
Exclude 404.html from lychee link checks (#1265)
sbryngelson Feb 25, 2026
9652f08
Fix claude maybe (#1266)
sbryngelson Feb 25, 2026
052868f
Remove push trigger settings from .pr_agent.toml
sbryngelson Feb 26, 2026
3df5d5d
Add auto review settings and update Fortran instructions
sbryngelson Feb 26, 2026
5e28e4f
Delete .codeant directory
sbryngelson Feb 26, 2026
78a9172
Update .coderabbit.yaml with review settings
sbryngelson Feb 26, 2026
fe08007
Add AI review trigger commands to PR template (#1268)
sbryngelson Feb 26, 2026
0779aa0
Update .coderabbit.yaml with new review settings
sbryngelson Feb 26, 2026
e20e260
Increase max turns from 10 to 90 in workflow
sbryngelson Feb 26, 2026
1412eb2
Fix 8 HPC-sensitive bugs: GPU kernels, MPI broadcast, domain decompos…
sbryngelson Feb 26, 2026
5a2a8a4
Trigger CI on draft-to-ready PR conversion (#1271)
sbryngelson Feb 27, 2026
b62b08b
Fix post_process non-MPI builds never reading input file (#1272)
sbryngelson Feb 27, 2026
33ab04e
Update .coderabbit.yaml for improved configuration
sbryngelson Feb 27, 2026
35b2134
Fix serial I/O missing beta for bubbles_lagrange in post_process (#1274)
sbryngelson Feb 27, 2026
be93258
remove .png
wilfonba Feb 27, 2026
3728b8d
fix up merge errors and test suite
wilfonba Feb 27, 2026
28fc258
Add test sharding, proactive clean, and retry logic for self-hosted C…
sbryngelson Feb 28, 2026
ab5082e
Fix SIGILL crashes on GitHub runners via CPU-aware build cache keys (…
sbryngelson Feb 28, 2026
b5486c9
Add CLI visualization command (./mfc.sh viz) (#1233)
sbryngelson Mar 1, 2026
021322c
Exclude OLCF Summit URL from link checker (#1279)
sbryngelson Mar 1, 2026
4ee892c
Skip coverage CI for draft PRs (#1280)
sbryngelson Mar 1, 2026
7c806be
IBM Feature Improvements and Speedup (#1157)
danieljvickers Mar 2, 2026
fd96fec
Merge remote-tracking branch 'upstream/master' into MovingBubblesFresh
wilfonba Mar 2, 2026
6fc430a
Merge remote-tracking branch 'upstream/master' into MovingBubblesFresh
wilfonba Mar 2, 2026
4166fc2
precheck and bug fixes
wilfonba Mar 2, 2026
5014df0
Fix docs: remove duplicate simulations.json, fix broken YouTube link …
sbryngelson Mar 3, 2026
472a0de
Support Python 3.9 (#1287)
sbryngelson Mar 3, 2026
ce98373
Consolidate CI infrastructure and add NFS-resilient build cache (#1285)
sbryngelson Mar 3, 2026
5032526
Merge remote-tracking branch 'upstream/master' into MovingBubblesFresh
wilfonba Mar 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/file-filter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ yml: &yml
- '.github/workflows/phoenix/**'
- '.github/workflows/frontier/**'
- '.github/workflows/frontier_amd/**'
- '.github/scripts/**'
- '.github/workflows/bench.yml'
- '.github/workflows/test.yml'
- '.github/workflows/formatting.yml'
Expand Down
15 changes: 15 additions & 0 deletions .github/scripts/bench-preamble.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash
# Shared preamble for benchmark scripts: detects GPUs, sets build/device opts.
# Sets: $gpu_opts, $build_opts, $device_opts, $n_ranks, $ngpus, $gpu_ids
# Usage: source .github/scripts/bench-preamble.sh

source .github/scripts/detect-gpus.sh
source .github/scripts/gpu-opts.sh

n_ranks=12
build_opts="$gpu_opts"
device_opts=""
if [ "$job_device" = "gpu" ]; then
n_ranks=$ngpus
device_opts="$gpu_opts -g $gpu_ids"
fi
42 changes: 42 additions & 0 deletions .github/scripts/check_case_optimization_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python3

"""Validate case-optimization output: check D/*.dat for NaN/Inf via the packer."""

import math
import sys
import os

if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} <case_directory>", file=sys.stderr)
sys.exit(1)

# Allow importing from the repo root
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))

from toolchain.mfc.packer.pack import compile as pack_compile

case_dir = sys.argv[1]
if os.path.isfile(case_dir):
case_dir = os.path.dirname(case_dir)

pack, err = pack_compile(case_dir)
if err is not None:
print(f"ERROR: {err}")
sys.exit(1)

if not pack.entries:
print(f"ERROR: No data found in {case_dir}/D/")
sys.exit(1)

if pack.has_bad_values():
print("ERROR: NaN or Inf detected in output:")
for name, entry in pack.entries.items():
for i, val in enumerate(entry.doubles):
if math.isnan(val) or math.isinf(val):
label = 'NaN' if math.isnan(val) else 'Inf'
print(f" {label} at index {i} in {name}")
break
sys.exit(1)

total = sum(len(e.doubles) for e in pack.entries.values())
print(f"OK: {len(pack.entries)} files, {total} values — no NaN/Inf found")
13 changes: 13 additions & 0 deletions .github/scripts/detect-gpus.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
# Detects GPUs (NVIDIA or AMD), sets $ngpus and $gpu_ids.
# Usage: source .github/scripts/detect-gpus.sh

ngpus=0
gpu_ids=""
if command -v nvidia-smi &>/dev/null; then
ngpus=$(nvidia-smi -L | wc -l)
gpu_ids=$(seq -s ' ' 0 $((ngpus - 1)))
elif command -v rocm-smi &>/dev/null; then
gpu_ids=$(rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' ')
ngpus=$(echo "$gpu_ids" | wc -w)
fi
13 changes: 13 additions & 0 deletions .github/scripts/gpu-opts.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
# Sets $gpu_opts from $job_device and $job_interface.
# Usage: source .github/scripts/gpu-opts.sh

gpu_opts=""
if [ "$job_device" = "gpu" ]; then
gpu_opts="--gpu"
if [ "$job_interface" = "omp" ]; then
gpu_opts+=" mp"
elif [ "$job_interface" = "acc" ]; then
gpu_opts+=" acc"
fi
fi
30 changes: 30 additions & 0 deletions .github/scripts/prebuild-case-optimization.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

# Pre-builds all benchmark cases with --case-optimization.
# Can run in two modes:
# 1. Direct (Frontier login nodes): pass cluster/device/interface as args
# 2. Inside SLURM (Phoenix): uses $job_device/$job_interface from submit.sh
# Usage: bash prebuild-case-optimization.sh [<cluster> <device> <interface>]

set -e

# Support both positional args (direct invocation) and env vars (SLURM via submit.sh)
cluster="${1:-${job_cluster:-phoenix}}"
job_device="${2:-$job_device}"
job_interface="${3:-$job_interface}"

# Derive module flag from cluster name
case "$cluster" in
phoenix) flag="p" ;;
frontier) flag="f" ;;
frontier_amd) flag="famd" ;;
*) echo "ERROR: Unknown cluster '$cluster'"; exit 1 ;;
esac

. ./mfc.sh load -c "$flag" -m g
source .github/scripts/gpu-opts.sh

for case in benchmarks/*/case.py; do
echo "=== Pre-building: $case ==="
./mfc.sh build -i "$case" --case-optimization $gpu_opts -j 8
done
60 changes: 60 additions & 0 deletions .github/scripts/retry-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/bin/bash
# Provides retry_build(): 3-attempt loop with configurable cleanup.
# Set RETRY_CLEAN_CMD to override cleanup (default: rm -rf build/staging build/install build/lock.yaml).
# Set RETRY_VALIDATE_CMD to run a post-build validation; failure triggers a retry.
# Usage: source .github/scripts/retry-build.sh
# retry_build ./mfc.sh build -j 8 --gpu acc

# Try normal cleanup; if it fails, escalate to cache nuke.
_retry_clean() {
local clean_cmd="$1"
if eval "$clean_cmd" 2>/dev/null; then
return 0
fi
echo " Normal cleanup failed."
if type _cache_nuke > /dev/null 2>&1; then
echo " Escalating to NFS cache nuke..."
_cache_nuke
else
echo " _cache_nuke not available, best-effort rm."
rm -rf build/staging build/install build/lock.yaml 2>/dev/null || true
fi
}

retry_build() {
local clean_cmd="${RETRY_CLEAN_CMD:-rm -rf build/staging build/install build/lock.yaml}"
local validate_cmd="${RETRY_VALIDATE_CMD:-}"
local max_attempts=3
local attempt=1
while [ $attempt -le $max_attempts ]; do
echo "Build attempt $attempt of $max_attempts..."
if "$@"; then
if [ -n "$validate_cmd" ]; then
if ! eval "$validate_cmd"; then
echo "Post-build validation failed on attempt $attempt."
if [ $attempt -lt $max_attempts ]; then
echo "Cleaning and retrying in 5s..."
_retry_clean "$clean_cmd"
sleep 5
attempt=$((attempt + 1))
continue
else
echo "Validation still failing after $max_attempts attempts."
return 1
fi
fi
fi
echo "Build succeeded on attempt $attempt."
return 0
fi
if [ $attempt -lt $max_attempts ]; then
echo "Build failed on attempt $attempt. Retrying in 30s..."
_retry_clean "$clean_cmd"
sleep 30
else
echo "Build failed after $max_attempts attempts."
return 1
fi
attempt=$((attempt + 1))
done
}
34 changes: 34 additions & 0 deletions .github/scripts/run-tests-with-retry.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
# Runs ./mfc.sh test with all provided arguments, then retries a small number
# of sporadic failures (up to 5). Exits non-zero on real failures.
# Usage: bash .github/scripts/run-tests-with-retry.sh [mfc test args...]

# Extract flags that should carry over to retries (retries build their own
# argument list with --only, so we capture passthrough flags here).
PASSTHROUGH=""
for arg in "$@"; do
case "$arg" in
--test-all) PASSTHROUGH="$PASSTHROUGH --test-all" ;;
esac
done

rm -f tests/failed_uuids.txt
TEST_EXIT=0
/bin/bash mfc.sh test "$@" || TEST_EXIT=$?

# Retry only if a small number of tests failed (sporadic failures)
if [ -s tests/failed_uuids.txt ]; then
NUM_FAILED=$(wc -l < tests/failed_uuids.txt)
if [ "$NUM_FAILED" -le 5 ]; then
FAILED=$(tr '\n' ' ' < tests/failed_uuids.txt)
echo ""
echo "=== Retrying $NUM_FAILED failed test(s): $FAILED ==="
echo ""
/bin/bash mfc.sh test -v --max-attempts 3 -j "$(nproc)" --only $FAILED $PASSTHROUGH || exit $?
else
echo "Too many failures ($NUM_FAILED) to retry — likely a real issue."
exit 1
fi
elif [ "$TEST_EXIT" -ne 0 ]; then
exit $TEST_EXIT
fi
75 changes: 75 additions & 0 deletions .github/scripts/run_case_optimization.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/bin/bash

# Case-optimization CI test script.
# Runs inside a SLURM job — expects $job_device and $job_interface from submit.sh.

set -e

source .github/scripts/detect-gpus.sh
source .github/scripts/gpu-opts.sh

# Default to 1 GPU if detection found none but we're in GPU mode
if [ "$job_device" = "gpu" ] && [ "$ngpus" -eq 0 ]; then
ngpus=1
fi

# Verify the venv Python interpreter exists (created by ./mfc.sh build)
if [ ! -x build/venv/bin/python3 ]; then
echo "ERROR: build/venv/bin/python3 not found."
echo "The MFC build venv may not have been created. Was the pre-build step successful?"
exit 1
fi

benchmarks=(
benchmarks/5eq_rk3_weno3_hllc/case.py
benchmarks/viscous_weno5_sgb_acoustic/case.py
benchmarks/hypo_hll/case.py
benchmarks/ibm/case.py
benchmarks/igr/case.py
)

passed=0
failed=0
failed_cases=""

for case in "${benchmarks[@]}"; do
case_dir="$(dirname "$case")"
case_name="$(basename "$case_dir")"
echo ""
echo "========================================"
echo "Case-optimization test: $case_name"
echo "========================================"

# Clean any previous output
rm -rf "$case_dir/D" "$case_dir/p_all" "$case_dir/restart_data"

# Build + run with --case-optimization, small grid, 10 timesteps
if ./mfc.sh run "$case" --case-optimization $gpu_opts -n "$ngpus" -j "$(nproc)" -- --gbpp 1 --steps 10; then
# Validate output
if build/venv/bin/python3 .github/scripts/check_case_optimization_output.py "$case_dir"; then
echo "PASS: $case_name"
passed=$((passed + 1))
else
echo "FAIL: $case_name (validation error)"
failed=$((failed + 1))
failed_cases="$failed_cases $case_name"
fi
else
echo "FAIL: $case_name (build or run error)"
failed=$((failed + 1))
failed_cases="$failed_cases $case_name"
fi

# Clean up output between cases
rm -rf "$case_dir/D" "$case_dir/p_all" "$case_dir/restart_data"
done

echo ""
echo "========================================"
echo "Case-optimization summary: $passed passed, $failed failed"
if [ $failed -gt 0 ]; then
echo "Failed cases:$failed_cases"
fi
echo "========================================"

[ $failed -eq 0 ] && exit 0 || exit 1
66 changes: 64 additions & 2 deletions .github/scripts/setup-build-cache.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
# Sets up a persistent build cache for self-hosted CI runners.
# Creates a symlink: ./build -> /storage/coda1/d-coc/0/sbryngelson3/.mfc-ci-cache/<key>/build
# Creates a symlink: ./build -> <cache_root>/<key>/build
#
# Each runner gets its own cache keyed by (cluster, device, interface, runner).
# This avoids cross-runner path issues entirely — CMake's absolute paths are
Expand All @@ -13,8 +13,58 @@ _cache_device="${2:?}"
_cache_interface="${3:-none}"
_cache_runner="${RUNNER_NAME:?RUNNER_NAME not set}"

# Select cache root based on cluster (each HPC system has its own persistent storage).
case "$_cache_cluster" in
phoenix)
_cache_root="/storage/coda1/d-coc/0/sbryngelson3/.mfc-ci-cache" ;;
frontier|frontier_amd)
_cache_root="/lustre/orion/cfd154/scratch/sbryngelson/.mfc-ci-cache" ;;
*)
echo "=== Build Cache Setup ==="
echo " No cache root configured for cluster '$_cache_cluster' — skipping."
echo "========================="
return 0 2>/dev/null || exit 0 ;;
esac

_cache_key="${_cache_cluster}-${_cache_device}-${_cache_interface}-${_cache_runner}"
_cache_base="/storage/coda1/d-coc/0/sbryngelson3/.mfc-ci-cache/${_cache_key}/build"
_cache_base="${_cache_root}/${_cache_key}/build"

# Check if the cache directory is healthy (readable, writable, no stale handles).
_cache_healthy() {
local dir="$1"
if ! ls "$dir" > /dev/null 2>&1; then
echo " Health check FAILED: cannot list $dir"
return 1
fi
if [ -e "$dir/lock.yaml" ] && ! stat "$dir/lock.yaml" > /dev/null 2>&1; then
echo " Health check FAILED: cannot stat $dir/lock.yaml"
return 1
fi
local probe="$dir/.nfs_probe.$$"
if ! touch "$probe" 2>/dev/null || ! rm -f "$probe" 2>/dev/null; then
echo " Health check FAILED: cannot write/remove probe in $dir"
rm -f "$probe" 2>/dev/null
return 1
fi
return 0
}

# Nuclear recovery: rename stale cache out of the way and create a fresh one.
# Uses mv (operates on parent directory entry) which works even when children
# have stale file handles that prevent rm -rf from succeeding.
_cache_nuke() {
local base="${1:-$_cache_base}"
local stale_name="${base}.stale.$(date +%s)"
echo " NFS cache nuke: parking stale dir -> $stale_name"
if mv "$base" "$stale_name" 2>/dev/null; then
echo " NFS cache nuke: renamed successfully"
else
echo " NFS cache nuke: mv failed, trying rm -rf as fallback"
rm -rf "$base" 2>/dev/null || true
fi
mkdir -p "$base"
echo " NFS cache nuke: fresh cache created at $base"
}

mkdir -p "$_cache_base"
_cache_dir="$(cd "$_cache_base" && pwd -P)"
Expand All @@ -23,6 +73,13 @@ echo "=== Build Cache Setup ==="
echo " Cache key: $_cache_key"
echo " Cache dir: $_cache_dir"

# Pre-flight: detect stale NFS handles before wasting a build attempt.
if ! _cache_healthy "$_cache_dir"; then
echo " Stale NFS cache detected — nuking and recreating."
_cache_nuke "$_cache_base"
_cache_dir="$(cd "$_cache_base" && pwd -P)"
fi

# Replace any existing build/ (real dir or stale symlink) with a symlink
# to our runner-specific cache directory.
# Use unlink for symlinks to avoid rm -rf following the link and deleting
Expand All @@ -36,4 +93,9 @@ fi
ln -s "$_cache_dir" "build"

echo " Symlink: build -> $_cache_dir"

# Garbage-collect stale cache dirs parked by _cache_nuke more than 7 days ago.
_cache_parent="$(dirname "$_cache_base")"
find "$_cache_parent" -maxdepth 1 -name "*.stale.*" -mtime +7 -exec rm -rf {} + 2>/dev/null || true

echo "========================="
Loading
Loading