Skip to content

Commit 68b035b

Browse files
committed
Add gcov-based test pruning with file-level coverage cache
1 parent 007b6a9 commit 68b035b

15 files changed

Lines changed: 1840 additions & 28 deletions

File tree

.github/file-filter.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ yml: &yml
2525
- '.github/workflows/phoenix/**'
2626
- '.github/workflows/frontier/**'
2727
- '.github/workflows/frontier_amd/**'
28+
- '.github/workflows/common/**'
2829
- '.github/scripts/**'
2930
- '.github/workflows/bench.yml'
3031
- '.github/workflows/test.yml'
@@ -37,3 +38,6 @@ checkall: &checkall
3738
- *tests
3839
- *scripts
3940
- *yml
41+
42+
cases_py:
43+
- 'toolchain/mfc/test/cases.py'

.github/scripts/submit-slurm-job.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ job_device="$device"
185185
job_interface="$interface"
186186
job_shard="$shard"
187187
job_cluster="$cluster"
188+
export GITHUB_EVENT_NAME="$GITHUB_EVENT_NAME"
188189
189190
. ./mfc.sh load -c $compiler_flag -m $module_mode
190191
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# Number of parallel jobs: use SLURM allocation or default to 24.
5+
# Cap at 64 to avoid overwhelming OpenMPI daemons and OS process limits with concurrent launches.
6+
NJOBS="${SLURM_CPUS_ON_NODE:-24}"
7+
if [ "$NJOBS" -gt 64 ]; then NJOBS=64; fi
8+
9+
# Clean stale build artifacts: the self-hosted runner may have a cached
10+
# GPU build (e.g. --gpu mp) whose CMake flags are incompatible with gcov.
11+
./mfc.sh clean
12+
13+
# Source retry_build() for NFS stale file handle resilience (3 attempts).
14+
source .github/scripts/retry-build.sh
15+
16+
# Build MFC with gcov coverage instrumentation (CPU-only, gfortran).
17+
retry_build ./mfc.sh build --gcov -j 8
18+
19+
# Run all tests in parallel, collecting per-test coverage data.
20+
# Each test gets an isolated GCOV_PREFIX directory so .gcda files
21+
# don't collide. Coverage is collected per-test after all tests finish.
22+
# --gcov is required so the internal build step preserves instrumentation.
23+
./mfc.sh test --build-coverage-cache --gcov -j "$NJOBS"

.github/workflows/common/test.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,10 @@ if [ -n "${job_shard:-}" ]; then
8282
shard_opts="--shard $job_shard"
8383
fi
8484

85-
./mfc.sh test -v --max-attempts 3 -a -j $n_test_threads $rdma_opts $device_opts $build_opts $shard_opts -- -c $job_cluster
85+
# Only prune tests on PRs; master pushes must run the full suite.
86+
prune_flag=""
87+
if [ "${GITHUB_EVENT_NAME:-}" = "pull_request" ]; then
88+
prune_flag="--only-changes"
89+
fi
90+
91+
./mfc.sh test -v --max-attempts 3 $prune_flag -a -j $n_test_threads $rdma_opts $device_opts $build_opts $shard_opts -- -c $job_cluster

.github/workflows/test.yml

Lines changed: 132 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,22 +56,122 @@ jobs:
5656
file-changes:
5757
name: Detect File Changes
5858
runs-on: 'ubuntu-latest'
59-
outputs:
59+
outputs:
6060
checkall: ${{ steps.changes.outputs.checkall }}
61+
cases_py: ${{ steps.changes.outputs.cases_py }}
62+
dep_changed: ${{ steps.dep-check.outputs.dep_changed }}
6163
steps:
6264
- name: Clone
6365
uses: actions/checkout@v4
6466

6567
- name: Detect Changes
6668
uses: dorny/paths-filter@v3
6769
id: changes
68-
with:
70+
with:
6971
filters: ".github/file-filter.yml"
7072

73+
- name: Check for Fortran dependency changes
74+
id: dep-check
75+
env:
76+
GH_TOKEN: ${{ github.token }}
77+
run: |
78+
# Detect added/removed use/include statements that change the
79+
# Fortran dependency graph, which would make the coverage cache stale.
80+
PR_NUMBER="${{ github.event.pull_request.number }}"
81+
BEFORE="${{ github.event.before }}"
82+
AFTER="${{ github.event.after }}"
83+
if [ "${{ github.event_name }}" = "pull_request" ]; then
84+
# Default to dep_changed=true if gh pr diff fails (safe fallback).
85+
DIFF=$(gh pr diff "$PR_NUMBER" 2>/dev/null) || {
86+
echo "gh pr diff failed — defaulting to dep_changed=true for safety."
87+
echo "dep_changed=true" >> "$GITHUB_OUTPUT"
88+
exit 0
89+
}
90+
elif [ "${{ github.event_name }}" = "push" ]; then
91+
DIFF=$(git diff "$BEFORE".."$AFTER" 2>/dev/null) || {
92+
echo "git diff failed for push event — defaulting to dep_changed=true for safety."
93+
echo "dep_changed=true" >> "$GITHUB_OUTPUT"
94+
exit 0
95+
}
96+
else
97+
DIFF=""
98+
fi
99+
if echo "$DIFF" | \
100+
grep -qE '^[+-][[:space:]]*(use[[:space:],]+[a-zA-Z_]|#:include[[:space:]]|include[[:space:]]+['"'"'"])'; then
101+
echo "dep_changed=true" >> "$GITHUB_OUTPUT"
102+
echo "Fortran dependency change detected — will rebuild coverage cache."
103+
else
104+
echo "dep_changed=false" >> "$GITHUB_OUTPUT"
105+
fi
106+
107+
rebuild-cache:
108+
name: Rebuild Coverage Cache
109+
needs: [lint-gate, file-changes]
110+
if: >-
111+
github.repository == 'MFlowCode/MFC' &&
112+
(
113+
(github.event_name == 'pull_request' &&
114+
(needs.file-changes.outputs.cases_py == 'true' ||
115+
needs.file-changes.outputs.dep_changed == 'true')) ||
116+
(github.event_name == 'push' &&
117+
(needs.file-changes.outputs.cases_py == 'true' ||
118+
needs.file-changes.outputs.dep_changed == 'true')) ||
119+
github.event_name == 'workflow_dispatch'
120+
)
121+
timeout-minutes: 240
122+
runs-on:
123+
group: phoenix
124+
labels: gt
125+
permissions:
126+
contents: write # Required for Commit Cache to Master on push events
127+
steps:
128+
- name: Clone
129+
uses: actions/checkout@v4
130+
with:
131+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
132+
clean: false
133+
134+
- name: Rebuild Cache via SLURM
135+
run: bash .github/scripts/submit-slurm-job.sh .github/workflows/common/rebuild-cache.sh cpu none phoenix
136+
137+
- name: Print Logs
138+
if: always()
139+
run: cat rebuild-cache-cpu-none.out
140+
141+
- name: Upload Cache Artifact
142+
if: github.event_name == 'pull_request'
143+
uses: actions/upload-artifact@v4
144+
with:
145+
name: coverage-cache
146+
path: toolchain/mfc/test/test_coverage_cache.json.gz
147+
retention-days: 1
148+
149+
- name: Commit Cache to Master
150+
if: (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.ref == 'refs/heads/master'
151+
run: |
152+
git config user.name "github-actions[bot]"
153+
git config user.email "github-actions[bot]@users.noreply.github.com"
154+
git add toolchain/mfc/test/test_coverage_cache.json.gz
155+
if git diff --cached --quiet; then
156+
echo "Coverage cache unchanged."
157+
else
158+
git commit -m "Regenerate gcov coverage cache [skip ci]"
159+
# Rebase onto latest master in case it advanced during the build
160+
# (which can take 20-240 minutes). Only test_coverage_cache.json.gz
161+
# is changed, so rebase conflicts are essentially impossible.
162+
git fetch origin master
163+
git rebase origin/master
164+
git push origin HEAD:refs/heads/master
165+
fi
166+
71167
github:
72168
name: Github
73-
if: needs.file-changes.outputs.checkall == 'true'
74169
needs: [lint-gate, file-changes]
170+
if: >-
171+
!cancelled() &&
172+
needs.lint-gate.result == 'success' &&
173+
needs.file-changes.result == 'success' &&
174+
needs.file-changes.outputs.checkall == 'true'
75175
strategy:
76176
matrix:
77177
os: ['ubuntu', 'macos']
@@ -98,6 +198,19 @@ jobs:
98198
- name: Clone
99199
uses: actions/checkout@v4
100200

201+
- name: Fetch master for coverage diff
202+
run: |
203+
git fetch origin master:master --depth=1
204+
git fetch --deepen=200
205+
continue-on-error: true
206+
207+
- name: Download Coverage Cache
208+
uses: actions/download-artifact@v4
209+
with:
210+
name: coverage-cache
211+
path: toolchain/mfc/test
212+
continue-on-error: true
213+
101214
- name: Setup MacOS
102215
if: matrix.os == 'macos'
103216
run: |
@@ -140,15 +253,22 @@ jobs:
140253

141254
- name: Test
142255
run: |
143-
/bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $TEST_ALL $TEST_PCT
256+
/bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $ONLY_CHANGES $TEST_ALL $TEST_PCT
144257
env:
145258
TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }}
146259
TEST_PCT: ${{ matrix.debug == 'debug' && '-% 20' || '' }}
260+
ONLY_CHANGES: ${{ github.event_name == 'pull_request' && '--only-changes' || '' }}
147261

148262
self:
149263
name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})"
150-
if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && github.event.pull_request.draft != true
151264
needs: [lint-gate, file-changes]
265+
if: >-
266+
!cancelled() &&
267+
needs.lint-gate.result == 'success' &&
268+
needs.file-changes.result == 'success' &&
269+
github.repository == 'MFlowCode/MFC' &&
270+
needs.file-changes.outputs.checkall == 'true' &&
271+
github.event.pull_request.draft != true
152272
# Frontier CCE compiler is periodically broken by toolchain updates (e.g.
153273
# cpe/25.03 introduced an IPA SIGSEGV in CCE 19.0.0). Allow Frontier to
154274
# fail without blocking PR merges; Phoenix remains a hard gate.
@@ -237,6 +357,13 @@ jobs:
237357
- name: Clean stale output files
238358
run: rm -f *.out
239359

360+
- name: Download Coverage Cache
361+
uses: actions/download-artifact@v4
362+
with:
363+
name: coverage-cache
364+
path: toolchain/mfc/test
365+
continue-on-error: true
366+
240367
- name: Build (login node)
241368
if: matrix.cluster != 'phoenix'
242369
timeout-minutes: 60

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ __pycache__
2222
# Auto-generated version file
2323
toolchain/mfc/_version.py
2424

25+
# Raw coverage cache — legacy, not tracked (the .json.gz version IS committed)
26+
toolchain/mfc/test/test_coverage_cache.json
27+
2528
# Auto-generated toolchain files (regenerate with: ./mfc.sh generate)
2629
toolchain/completions/mfc.bash
2730
toolchain/completions/_mfc

CMakeLists.txt

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ endif()
113113
# debug builds. These include optimization and debug flags, as well as some that
114114
# are required for a successful build of MFC.
115115

116+
set(FYPP_GCOV_OPTS "")
117+
116118
if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
117119
add_compile_options(
118120
$<$<COMPILE_LANGUAGE:Fortran>:-ffree-line-length-none>
@@ -131,13 +133,20 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
131133
add_compile_options(
132134
$<$<COMPILE_LANGUAGE:Fortran>:-fprofile-arcs>
133135
$<$<COMPILE_LANGUAGE:Fortran>:-ftest-coverage>
134-
$<$<COMPILE_LANGUAGE:Fortran>:-O1>
135-
)
136+
)
136137

137138
add_link_options(
138139
$<$<COMPILE_LANGUAGE:Fortran>:-lgcov>
139140
$<$<COMPILE_LANGUAGE:Fortran>:--coverage>
140141
)
142+
143+
# Override Release -O3 with -O1 for gcov: coverage instrumentation is
144+
# inaccurate at -O3, and aggressive codegen (e.g. AVX-512 FP16 on
145+
# Granite Rapids) can emit instructions that older assemblers reject.
146+
set(CMAKE_Fortran_FLAGS_RELEASE "-O1 -DNDEBUG" CACHE STRING "" FORCE)
147+
148+
# Use gfortran5 line markers so gcov can map coverage to .fpp sources.
149+
set(FYPP_GCOV_OPTS "--line-marker-format=gfortran5")
141150
endif()
142151

143152
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -245,8 +254,11 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
245254
endif()
246255
endif()
247256

248-
# Enable LTO/IPO if supported
249-
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
257+
# Enable LTO/IPO if supported (skip for gcov — LTO interferes with coverage
258+
# instrumentation and can trigger assembler errors on newer architectures).
259+
if (MFC_GCov)
260+
message(STATUS "LTO/IPO disabled for gcov build")
261+
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
250262
if (MFC_Unified)
251263
message(STATUS "LTO/IPO is not available with NVHPC using Unified Memory")
252264
elseif (CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER "24.11" AND CMAKE_Fortran_COMPILER_VERSION VERSION_LESS "25.9")
@@ -392,6 +404,7 @@ macro(HANDLE_SOURCES target useCommon)
392404
--no-folding
393405
--line-length=999
394406
--line-numbering-mode=nocontlines
407+
${FYPP_GCOV_OPTS}
395408
"${fpp}" "${f90}"
396409
DEPENDS "${fpp};${${target}_incs}"
397410
COMMENT "Preprocessing (Fypp) ${fpp_filename}"

src/simulation/m_bubbles.fpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ module m_bubbles
1616
use m_variables_conversion !< State variables type conversion procedures
1717

1818
use m_helper_basic !< Functions to compare floating point numbers
19+
use m_helper_basic !< TEMP: duplicate use to force dep_changed=true in CI — remove before merge
1920

2021
implicit none
2122

toolchain/mfc/cli/commands.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,27 @@
444444
type=str,
445445
default=None,
446446
),
447+
Argument(
448+
name="build-coverage-cache",
449+
help="Run all tests with gcov instrumentation to build the file-level coverage cache. Pass --gcov to enable coverage instrumentation in the internal build step.",
450+
action=ArgAction.STORE_TRUE,
451+
default=False,
452+
dest="build_coverage_cache",
453+
),
454+
Argument(
455+
name="only-changes",
456+
help="Only run tests whose covered files overlap with files changed since branching from master (uses file-level gcov coverage cache).",
457+
action=ArgAction.STORE_TRUE,
458+
default=False,
459+
dest="only_changes",
460+
),
461+
Argument(
462+
name="changes-branch",
463+
help="Branch to compare against for --only-changes (default: master).",
464+
type=str,
465+
default="master",
466+
dest="changes_branch",
467+
),
447468
],
448469
mutually_exclusive=[
449470
MutuallyExclusiveGroup(
@@ -476,13 +497,17 @@
476497
Example("./mfc.sh test -j 4", "Run with 4 parallel jobs"),
477498
Example("./mfc.sh test --only 3D", "Run only 3D tests"),
478499
Example("./mfc.sh test --generate", "Regenerate golden files"),
500+
Example("./mfc.sh test --only-changes -j 4", "Run tests affected by changed files"),
501+
Example("./mfc.sh build --gcov -j 8 && ./mfc.sh test --build-coverage-cache", "One-time: build file-coverage cache"),
479502
],
480503
key_options=[
481504
("-j, --jobs N", "Number of parallel test jobs"),
482505
("-o, --only PROP", "Run tests matching property"),
483506
("-f, --from UUID", "Start from specific test"),
484507
("--generate", "Generate/update golden files"),
485508
("--no-build", "Skip rebuilding MFC"),
509+
("--build-coverage-cache", "Build file-level gcov coverage cache (one-time)"),
510+
("--only-changes", "Run tests affected by changed files (requires cache)"),
486511
],
487512
)
488513

0 commit comments

Comments
 (0)