Skip to content

Commit 6b72dd9

Browse files
committed
added watchdog workflow for failed ci jobs
1 parent 597fe77 commit 6b72dd9

7 files changed

Lines changed: 134 additions & 1 deletion

File tree

.github/workflows/binary_tarballs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ jobs:
8686
name: ${{ matrix.image }}:${{ matrix.image_tag }} ${{ matrix.arch }}
8787
needs: [ discover, release-tarballs ]
8888
runs-on: ${{ matrix.runner }}
89+
timeout-minutes: 120
8990
strategy:
9091
fail-fast: false
9192
matrix: ${{ fromJSON(needs.discover.outputs.matrix_build) }}

.github/workflows/deploy.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ jobs:
6868
name: ${{ matrix.image }}/${{ matrix.image_tag }} ${{ matrix.arch }}
6969
needs: [ overview, discover ]
7070
runs-on: ${{ matrix.runner }}
71+
timeout-minutes: 360
7172
strategy:
7273
fail-fast: false
7374
matrix: ${{ fromJSON(needs.discover.outputs.matrix_build) }}
@@ -199,6 +200,7 @@ jobs:
199200
name: ${{ matrix.image }}/${{ matrix.image_tag }}
200201
needs: [ build_arch, discover ]
201202
runs-on: ubuntu-latest
203+
timeout-minutes: 60
202204
# If arm64 was skipped (e.g., archlinux), still publish amd64-only manifest.
203205
strategy:
204206
fail-fast: false
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
name: Retry Failed Matrix Jobs
2+
3+
permissions:
4+
actions: write
5+
contents: read
6+
7+
env:
8+
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
9+
10+
on:
11+
workflow_run:
12+
workflows: [ "Test", "Sanitize", "Deploy", "Binary Tarballs" ]
13+
types: [ completed ]
14+
15+
jobs:
16+
rerun-failed:
17+
name: Retry failed matrix jobs
18+
if: >-
19+
${{
20+
github.event.workflow_run.head_repository.full_name == github.repository &&
21+
github.event.workflow_run.run_attempt == 1 &&
22+
(
23+
github.event.workflow_run.conclusion == 'failure' ||
24+
github.event.workflow_run.conclusion == 'timed_out'
25+
)
26+
}}
27+
runs-on: ubuntu-latest
28+
steps:
29+
- name: Re-run failed jobs when most matrix jobs passed
30+
uses: actions/github-script@v8
31+
with:
32+
script: |
33+
const run = context.payload.workflow_run;
34+
const owner = context.repo.owner;
35+
const repo = context.repo.repo;
36+
const workflowName = run.name;
37+
38+
const excludedByWorkflow = {
39+
"Test": [
40+
"Create job matrix",
41+
"Test - summary",
42+
],
43+
"Sanitize": [
44+
"Create job matrix",
45+
"Sanitize - summary",
46+
],
47+
"Deploy": [
48+
"Workflow Overview",
49+
"Create Job Matrices",
50+
"Mirror images to Docker Hub",
51+
"Update Docker Hub description",
52+
"Deploy - summary",
53+
],
54+
"Binary Tarballs": [
55+
"Attach GEMC tarballs to dev release",
56+
"Create Job Matrix",
57+
],
58+
};
59+
60+
const excluded = new Set(
61+
(excludedByWorkflow[workflowName] || []).map((name) => name.toLowerCase()),
62+
);
63+
64+
const jobs = await github.paginate(github.rest.actions.listJobsForWorkflowRun, {
65+
owner,
66+
repo,
67+
run_id: run.id,
68+
per_page: 100,
69+
});
70+
71+
const countedConclusions = new Set([
72+
"success",
73+
"failure",
74+
"cancelled",
75+
"timed_out",
76+
"action_required",
77+
]);
78+
79+
const matrixJobs = jobs.filter((job) => {
80+
const name = job.name.trim();
81+
return job.status === "completed"
82+
&& countedConclusions.has(job.conclusion)
83+
&& !excluded.has(name.toLowerCase());
84+
});
85+
86+
const successCount = matrixJobs.filter((job) => job.conclusion === "success").length;
87+
const failedCount = matrixJobs.length - successCount;
88+
const successRatio = matrixJobs.length === 0 ? 0 : successCount / matrixJobs.length;
89+
90+
core.info(`Workflow: ${workflowName}`);
91+
core.info(`Run: ${run.html_url}`);
92+
core.info(`Matrix jobs: ${matrixJobs.length}`);
93+
core.info(`Matrix successes: ${successCount}`);
94+
core.info(`Matrix failed/cancelled/timed out: ${failedCount}`);
95+
96+
if (matrixJobs.length === 0) {
97+
core.notice("No matrix jobs were found for this workflow; not retrying.");
98+
return;
99+
}
100+
101+
if (failedCount === 0) {
102+
core.notice("No failed matrix jobs were found; not retrying.");
103+
return;
104+
}
105+
106+
if (successRatio < 0.5) {
107+
core.notice("Fewer than 50% of matrix jobs succeeded; not retrying failed jobs.");
108+
return;
109+
}
110+
111+
await github.rest.actions.reRunWorkflowFailedJobs({
112+
owner,
113+
repo,
114+
run_id: run.id,
115+
});
116+
117+
core.notice("Requested a re-run of failed jobs for this workflow run.");

.github/workflows/sanitize.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ jobs:
6464
fail-fast: false
6565
matrix: ${{ fromJSON(needs.discover.outputs.matrix_sanitize) }}
6666
runs-on: ${{ matrix.runner }}
67+
timeout-minutes: 180
6768
container: ${{ matrix.container }}
6869

6970
steps:

.github/workflows/test.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ jobs:
6868
name: ${{ matrix.image }}/${{ matrix.image_tag }} ${{ matrix.arch }}
6969
needs: [ discover ]
7070
runs-on: ${{ matrix.runner }}
71+
timeout-minutes: 180
7172
strategy:
7273
fail-fast: false
7374
matrix: ${{ fromJSON(needs.discover.outputs.matrix_build) }}

CITATION.cff

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ preferred-citation:
1717
authors:
1818
- family-names: Ungaro
1919
given-names: Maurizio
20+
orcid: "https://orcid.org/0000-0001-6982-3310"
2021
collection-title: "European Physical Journal Web of Conferences"
2122
volume: 295
2223
year: 2024

releases/0.4.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ This version includes:
66
- A `-global_field=<name>` option to attach a configured field to the ROOT world volume.
77
- A `-no_field=<volume|list|all>` option to remove field associations; unused field plugins and maps
88
are no longer loaded.
9+
- A `-max_field_step=<length>` option to cap the largest Geant4 magnetic-field propagation step.
910
- A new `asciimap` field plugin (`type: asciimap`) that loads ASCII field maps from data-only files
1011
with the grid defined in YAML — the GEMC3 successor of the clas12 `asciiField`.
1112
- Field-only runs now initialize against the default ROOT world when no detector system is selected.
@@ -35,6 +36,9 @@ This version includes:
3536
gvolume name, a whitespace- or comma-separated list of names, or the special value `all`, which
3637
resets every per-volume field and also clears `-global_field`. Fields left unused after the reset
3738
are skipped: their plugins and field maps are no longer loaded.
39+
- Added `-max_field_step=<length>` to set Geant4's largest acceptable magnetic-field propagation
40+
step through `G4PropagatorInField::SetLargestAcceptableStep()`. The default `0*mm` leaves the
41+
Geant4 propagator default unchanged.
3842
- Field-line visualization now applies `/vis/set/extentForField` from the configured ROOT `G4Box`
3943
half-lengths before issuing `/vis/scene/add/magneticField`, avoiding misleading field-line
4044
sampling when the scene extent is dominated by detector geometry.
@@ -98,6 +102,8 @@ This version includes:
98102
- Added gfield tests for `-fieldAt` and `-fieldMapPoints` query workflows.
99103
- Added `asciimap` load and `-fieldAt` tests for the dipole, solenoid, and phi-segmented torus
100104
example maps.
105+
- Updated the `gemc_gfield_verbose` integration test to install a global field and exercise
106+
`-max_field_step=5*mm`.
101107

102108
<br/>
103109

@@ -107,7 +113,7 @@ This version includes:
107113
- [Issue #28](https://github.com/gemc/src/issues/28): add buttons to g4display
108114
- [Issue #31](https://github.com/gemc/src/issues/31): improve field ASCII and add SQLITE factory and apply it to gemc3
109115
- [Issue #42](https://github.com/gemc/src/issues/42): output options for digitized and true infos
110-
- [Issue #57](https://github.com/gemc/src/issues/57): min and max field steps definitions
116+
- [Issue #57](https://github.com/gemc/src/issues/57): add max field steps definitions
111117
- [Issue #59](https://github.com/gemc/src/issues/59): TSGQt plotter see B5 example from presentation
112118
- [Issue #75](https://github.com/gemc/src/issues/75): add histo tab with g4histos
113119
- [Issue #76](https://github.com/gemc/src/issues/76): refined CAD interface, add SQLITE like gemc2
@@ -175,6 +181,10 @@ Both x86_64 and ARM64 platforms are supported.
175181
name and the fully-qualified Geant4 name, an unmatched name emits a warning, and `all` additionally
176182
suppresses the global field. `GMagneto`'s constructor gained an optional `required_fields` set
177183
(empty meaning "load everything", preserving the field-query path).
184+
- Added the `-max_field_step=<length>` option (defined in the gfields module, consumed in
185+
`GMagneto`). Positive values are parsed with GEMC's Geant4-unit parser and passed to
186+
`G4PropagatorInField::SetLargestAcceptableStep()`, while the default `0*mm` leaves Geant4's
187+
propagation limit unchanged. Per-field `minimum_step` still controls the field integration driver.
178188
- The GUI field-line toggle now parses the configured ROOT `G4Box` dimensions and sends
179189
`/vis/set/extentForField -dx dx -dy dy -dz dz mm` before adding or refreshing the magnetic-field
180190
model. Non-`G4Box` roots keep Geant4's default scene-extent behavior.

0 commit comments

Comments
 (0)