Concurrent Frontier CI: parallel test+bench, consolidated SLURM jobs #4171
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: 'Test Suite' | |
| on: | |
| push: | |
| branches: [master] | |
| pull_request: | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| lint-gate: | |
| name: Lint Gate | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| - name: Setup Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.12' | |
| - name: Initialize MFC | |
| run: ./mfc.sh init | |
| - name: Check Formatting | |
| run: | | |
| ./mfc.sh format -j $(nproc) | |
| git diff --exit-code || (echo "::error::Code is not formatted. Run './mfc.sh format' locally." && exit 1) | |
| - name: Spell Check | |
| run: ./mfc.sh spelling | |
| - name: Lint Toolchain | |
| run: ./mfc.sh lint | |
| - name: Lint Source - No Raw Directives | |
| run: | | |
| ! grep -iR '!\$acc\|!\$omp' --exclude="parallel_macros.fpp" --exclude="acc_macros.fpp" --exclude="omp_macros.fpp" --exclude="shared_parallel_macros.fpp" --exclude="syscheck.fpp" ./src/* | |
| - name: Lint Source - No Double Precision Intrinsics | |
| run: | | |
| ! grep -iR 'double_precision\|dsqrt\|dexp\|dlog\|dble\|dabs\|double\ precision\|real(8)\|real(4)\|dprod\|dmin\|dmax\|dfloat\|dreal\|dcos\|dsin\|dtan\|dsign\|dtanh\|dsinh\|dcosh\|d0' --exclude-dir=syscheck --exclude="*nvtx*" --exclude="*precision_select*" ./src/* | |
| - name: Lint Source - No Junk Code | |
| run: | | |
| ! grep -iR -e '\.\.\.' -e '\-\-\-' -e '===' ./src/* | |
| file-changes: | |
| name: Detect File Changes | |
| runs-on: 'ubuntu-latest' | |
| outputs: | |
| checkall: ${{ steps.changes.outputs.checkall }} | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| - name: Detect Changes | |
| uses: dorny/paths-filter@v3 | |
| id: changes | |
| with: | |
| filters: ".github/file-filter.yml" | |
| github: | |
| name: Github | |
| if: needs.file-changes.outputs.checkall == 'true' | |
| needs: [lint-gate, file-changes] | |
| strategy: | |
| matrix: | |
| os: ['ubuntu', 'macos'] | |
| mpi: ['mpi'] | |
| precision: [''] | |
| debug: ['debug', 'no-debug'] | |
| intel: [true, false] | |
| exclude: | |
| - os: macos | |
| intel: true | |
| include: | |
| - os: ubuntu | |
| mpi: no-mpi | |
| precision: single | |
| debug: no-debug | |
| intel: false | |
| fail-fast: false | |
| continue-on-error: true | |
| runs-on: ${{ matrix.os }}-latest | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| - name: Setup MacOS | |
| if: matrix.os == 'macos' | |
| run: | | |
| brew update | |
| brew upgrade | |
| brew install coreutils python fftw hdf5 gcc@15 boost open-mpi lapack | |
| echo "FC=gfortran-15" >> $GITHUB_ENV | |
| echo "BOOST_INCLUDE=/opt/homebrew/include/" >> $GITHUB_ENV | |
| - name: Setup Ubuntu | |
| if: matrix.os == 'ubuntu' && matrix.intel == false | |
| run: | | |
| sudo apt update -y | |
| sudo apt install -y cmake gcc g++ python3 python3-dev hdf5-tools \ | |
| libfftw3-dev libhdf5-dev openmpi-bin libopenmpi-dev \ | |
| libblas-dev liblapack-dev | |
| - name: Setup Ubuntu (Intel) | |
| if: matrix.os == 'ubuntu' && matrix.intel == true | |
| run: | | |
| wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | |
| sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | |
| sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" | |
| sudo apt-get update | |
| sudo apt-get install -y intel-oneapi-compiler-fortran intel-oneapi-mpi intel-oneapi-mpi-devel | |
| source /opt/intel/oneapi/setvars.sh | |
| printenv >> $GITHUB_ENV | |
| - name: Set up Python 3.14 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.14' | |
| - name: Build | |
| run: | | |
| /bin/bash mfc.sh test -v --dry-run -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }} --${{ matrix.precision }} $TEST_ALL | |
| env: | |
| TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }} | |
| - name: Test | |
| run: | | |
| /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $TEST_ALL $TEST_PCT | |
| env: | |
| TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }} | |
| TEST_PCT: ${{ matrix.debug == 'debug' && '-% 20' || '' }} | |
| self: | |
| name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }})" | |
| if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' | |
| needs: [lint-gate, file-changes] | |
| continue-on-error: false | |
| timeout-minutes: 480 | |
| strategy: | |
| matrix: | |
| include: | |
| # Phoenix (GT) — build+test combined in SLURM job | |
| - runner: 'gt' | |
| cluster: 'phoenix' | |
| cluster_name: 'Georgia Tech | Phoenix' | |
| device: 'gpu' | |
| interface: 'acc' | |
| - runner: 'gt' | |
| cluster: 'phoenix' | |
| cluster_name: 'Georgia Tech | Phoenix' | |
| device: 'gpu' | |
| interface: 'omp' | |
| - runner: 'gt' | |
| cluster: 'phoenix' | |
| cluster_name: 'Georgia Tech | Phoenix' | |
| device: 'cpu' | |
| interface: 'none' | |
| # Frontier (ORNL) — all configs consolidated into one 5-node SLURM job | |
| - runner: 'frontier' | |
| cluster: 'frontier_all' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'all' | |
| interface: 'configs' | |
| runs-on: | |
| group: phoenix | |
| labels: ${{ matrix.runner }} | |
| env: | |
| NODE_OPTIONS: ${{ matrix.cluster == 'phoenix' && '--max-old-space-size=2048' || '' }} | |
| ACTIONS_RUNNER_FORCE_ACTIONS_NODE_VERSION: node16 | |
| ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| - name: Build | |
| if: matrix.cluster != 'phoenix' && matrix.cluster != 'frontier_all' | |
| run: bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }} | |
| - name: Test | |
| if: matrix.cluster != 'frontier_all' | |
| run: bash .github/workflows/${{ matrix.cluster }}/submit.sh .github/workflows/${{ matrix.cluster }}/test.sh ${{ matrix.device }} ${{ matrix.interface }} | |
| - name: Build & Test (Frontier All Configs) | |
| if: matrix.cluster == 'frontier_all' | |
| run: bash .github/scripts/run_frontier_all_tests.sh | |
| - name: Print Logs | |
| if: always() | |
| run: | | |
| cat test-*.out 2>/dev/null || true | |
| - name: Archive Logs | |
| uses: actions/upload-artifact@v4 | |
| if: always() && matrix.cluster != 'phoenix' | |
| with: | |
| name: logs-${{ matrix.cluster }} | |
| path: test-*.out | |
| bench: | |
| name: "Benchmark | ${{ matrix.name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }})" | |
| if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' | |
| needs: [lint-gate, file-changes] | |
| continue-on-error: false | |
| timeout-minutes: 480 | |
| strategy: | |
| fail-fast: true | |
| matrix: | |
| include: | |
| - cluster: phoenix | |
| name: Georgia Tech | Phoenix (NVHPC) | |
| group: phoenix | |
| labels: gt | |
| flag: p | |
| device: cpu | |
| interface: none | |
| build_script: "" | |
| - cluster: phoenix | |
| name: Georgia Tech | Phoenix (NVHPC) | |
| group: phoenix | |
| labels: gt | |
| flag: p | |
| device: gpu | |
| interface: acc | |
| build_script: "" | |
| - cluster: phoenix | |
| name: Georgia Tech | Phoenix (NVHPC) | |
| group: phoenix | |
| labels: gt | |
| flag: p | |
| device: gpu | |
| interface: omp | |
| build_script: "" | |
| # Frontier — all configs consolidated into one 6-node SLURM job | |
| - cluster: frontier_all | |
| name: Oak Ridge | Frontier | |
| group: phoenix | |
| labels: frontier | |
| device: all | |
| interface: configs | |
| build_script: "" | |
| runs-on: | |
| group: ${{ matrix.group }} | |
| labels: ${{ matrix.labels }} | |
| env: | |
| ACTIONS_RUNNER_FORCE_ACTIONS_NODE_VERSION: node16 | |
| ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | |
| steps: | |
| - name: Clone - PR | |
| uses: actions/checkout@v4 | |
| with: | |
| path: pr | |
| - name: Clone - Master | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: MFlowCode/MFC | |
| ref: master | |
| path: master | |
| - name: Setup & Build | |
| if: matrix.build_script != '' && matrix.cluster != 'frontier_all' | |
| run: | | |
| (cd pr && ${{ matrix.build_script }}) & | |
| (cd master && ${{ matrix.build_script }}) & | |
| wait %1 && wait %2 | |
| - name: Bench (Master v. PR) | |
| if: matrix.cluster != 'frontier_all' | |
| run: bash pr/.github/scripts/run_parallel_benchmarks.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }} | |
| - name: Bench All Configs (Frontier) | |
| if: matrix.cluster == 'frontier_all' | |
| run: bash pr/.github/scripts/run_frontier_all_benchmarks.sh | |
| - name: Generate & Post Comment | |
| if: matrix.cluster != 'frontier_all' | |
| run: | | |
| (cd pr && . ./mfc.sh load -c ${{ matrix.flag }} -m g) | |
| (cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}-${{ matrix.interface }}.yaml ../pr/bench-${{ matrix.device }}-${{ matrix.interface }}.yaml) | |
| - name: Generate & Post Comments (Frontier) | |
| if: matrix.cluster == 'frontier_all' | |
| run: bash pr/.github/scripts/frontier_bench_post.sh | |
| - name: Print Logs | |
| if: always() | |
| run: | | |
| cat pr/bench-*.* master/bench-*.* 2>/dev/null || true | |
| cat pr-*/bench-*.* master-*/bench-*.* 2>/dev/null || true | |
| - name: Archive Logs | |
| if: always() && matrix.cluster != 'phoenix' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: bench-${{ matrix.cluster }} | |
| path: | | |
| pr*/bench-*.* | |
| master*/bench-*.* | |
| cancel-on-test-failure: | |
| name: Cancel on Test Failure | |
| needs: [self] | |
| if: failure() | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Cancel Workflow Run | |
| run: gh run cancel ${{ github.run_id }} --repo ${{ github.repository }} | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |