|
| 1 | +# ------------------------------------------------------------------------------------ |
| 2 | +# Cancel Workflow On Failure (Composite Action) (GoFortress) |
| 3 | +# |
| 4 | +# Purpose: Provide a reusable action that cancels the entire workflow run when |
| 5 | +# a critical job fails. This prevents wasting CI resources on parallel jobs |
| 6 | +# that will ultimately be invalidated by an earlier failure. |
| 7 | +# |
| 8 | +# This action handles: |
| 9 | +# - Cancelling the current workflow run via GitHub API |
| 10 | +# - Optional grace period before cancellation |
| 11 | +# - Detailed logging of the cancellation reason |
| 12 | +# - Skipping cancellation in specific scenarios (e.g., release tags) |
| 13 | +# - Safe handling of API errors and edge cases |
| 14 | +# |
| 15 | +# Usage: Add this as the LAST step in critical jobs with `if: failure()` |
| 16 | +# |
| 17 | +# - name: 🚨 Cancel workflow on failure |
| 18 | +# if: failure() |
| 19 | +# uses: ./.github/actions/cancel-workflow-on-failure |
| 20 | +# with: |
| 21 | +# reason: "Code quality checks failed" |
| 22 | +# |
| 23 | +# Permission Requirements: |
| 24 | +# The calling job needs `actions: write` permission to cancel workflows. |
| 25 | +# Add this to your job's permissions block: |
| 26 | +# |
| 27 | +# permissions: |
| 28 | +# contents: read |
| 29 | +# actions: write # Required for workflow cancellation |
| 30 | +# |
| 31 | +# Security Considerations: |
| 32 | +# - Uses GitHub's built-in GITHUB_TOKEN (no external secrets required) |
| 33 | +# - API calls are scoped to the current repository only |
| 34 | +# - Cancellation only affects the current workflow run |
| 35 | +# - No external network calls or dependencies |
| 36 | +# |
| 37 | +# Maintainer: @mrz1836 |
| 38 | +# |
| 39 | +# ------------------------------------------------------------------------------------ |
| 40 | + |
| 41 | +name: "Cancel Workflow On Failure" |
| 42 | +description: "Cancels the entire workflow run when a critical job fails to save CI resources" |
| 43 | + |
| 44 | +inputs: |
| 45 | + reason: |
| 46 | + description: "Reason for cancellation (displayed in logs and job summary)" |
| 47 | + required: false |
| 48 | + default: "A critical job failed" |
| 49 | + skip-on-tags: |
| 50 | + description: "Skip cancellation for tag pushes (useful for release workflows)" |
| 51 | + required: false |
| 52 | + default: "false" |
| 53 | + grace-period: |
| 54 | + description: "Seconds to wait before cancelling (allows logs to flush)" |
| 55 | + required: false |
| 56 | + default: "2" |
| 57 | + github-token: |
| 58 | + description: "GitHub token with actions:write permission (defaults to GITHUB_TOKEN)" |
| 59 | + required: false |
| 60 | + default: "" |
| 61 | + |
| 62 | +outputs: |
| 63 | + cancelled: |
| 64 | + description: "Whether the workflow cancellation was requested" |
| 65 | + value: ${{ steps.cancel.outputs.cancelled }} |
| 66 | + skipped: |
| 67 | + description: "Whether cancellation was skipped (e.g., tag push with skip-on-tags)" |
| 68 | + value: ${{ steps.cancel.outputs.skipped }} |
| 69 | + skip-reason: |
| 70 | + description: "Reason why cancellation was skipped (if applicable)" |
| 71 | + value: ${{ steps.cancel.outputs.skip-reason }} |
| 72 | + |
| 73 | +runs: |
| 74 | + using: "composite" |
| 75 | + steps: |
| 76 | + - name: 🚨 Cancel workflow run |
| 77 | + id: cancel |
| 78 | + shell: bash |
| 79 | + env: |
| 80 | + # Use provided token or fall back to github.token (composite action *input defaults* cannot use expressions like github.token, so the fallback is handled here) |
| 81 | + GH_TOKEN: ${{ inputs.github-token || github.token }} |
| 82 | + CANCEL_REASON: ${{ inputs.reason }} |
| 83 | + SKIP_ON_TAGS: ${{ inputs.skip-on-tags }} |
| 84 | + GRACE_PERIOD: ${{ inputs.grace-period }} |
| 85 | + GITHUB_REF: ${{ github.ref }} |
| 86 | + GITHUB_RUN_ID: ${{ github.run_id }} |
| 87 | + GITHUB_REPOSITORY: ${{ github.repository }} |
| 88 | + GITHUB_JOB: ${{ github.job }} |
| 89 | + GITHUB_WORKFLOW: ${{ github.workflow }} |
| 90 | + GITHUB_API_URL: ${{ github.api_url }} |
| 91 | + run: | |
| 92 | + # Note: Using set -uo pipefail (but not -e): |
| 93 | + # -e is avoided so we can handle errors (like curl failures and HTTP status codes) explicitly |
| 94 | + # -o pipefail causes a pipeline to return a failure exit status if any command in the pipeline fails, |
| 95 | + # but it does not by itself cause the script to exit without -e; exit behavior is controlled explicitly. |
| 96 | + # When adding new pipelines, ensure each one either handles failures explicitly (e.g., curl ... || CURL_EXIT=$?) |
| 97 | + # or that you deliberately check and react to the pipeline's exit status if early exit is desired. |
| 98 | + set -uo pipefail |
| 99 | +
|
| 100 | + echo "🚨 Workflow Cancellation Triggered" |
| 101 | + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" |
| 102 | + echo "" |
| 103 | + echo "📋 Context:" |
| 104 | + echo " • Workflow: $GITHUB_WORKFLOW" |
| 105 | + echo " • Job: $GITHUB_JOB" |
| 106 | + echo " • Run ID: $GITHUB_RUN_ID" |
| 107 | + echo " • Ref: $GITHUB_REF" |
| 108 | + printf ' • Reason: %s\n' "$CANCEL_REASON" |
| 109 | + echo "" |
| 110 | +
|
| 111 | + # Initialize outputs |
| 112 | + echo "cancelled=false" >> "$GITHUB_OUTPUT" |
| 113 | + echo "skipped=false" >> "$GITHUB_OUTPUT" |
| 114 | + echo "skip-reason=" >> "$GITHUB_OUTPUT" |
| 115 | +
|
| 116 | + # Check if we should skip cancellation for tags |
| 117 | + if [[ "$SKIP_ON_TAGS" == "true" && "$GITHUB_REF" =~ ^refs/tags/.+ ]]; then |
| 118 | + echo "⏭️ Skipping cancellation: Running on a tag and skip-on-tags is enabled" |
| 119 | + echo " This allows release workflows to complete even if non-critical jobs fail" |
| 120 | + echo "" |
| 121 | + echo "skipped=true" >> "$GITHUB_OUTPUT" |
| 122 | + echo "skip-reason=tag-push-with-skip-enabled" >> "$GITHUB_OUTPUT" |
| 123 | + exit 0 |
| 124 | + fi |
| 125 | +
|
| 126 | + # Validate grace period is a number |
| 127 | + if ! [[ "$GRACE_PERIOD" =~ ^[0-9]+$ ]]; then |
| 128 | + echo "⚠️ Invalid grace period '$GRACE_PERIOD', using default of 2 seconds" |
| 129 | + GRACE_PERIOD=2 |
| 130 | + fi |
| 131 | +
|
| 132 | + # Grace period to allow logs to flush |
| 133 | + if [[ "$GRACE_PERIOD" -gt 0 ]]; then |
| 134 | + echo "⏳ Waiting ${GRACE_PERIOD}s grace period before cancellation..." |
| 135 | + sleep "$GRACE_PERIOD" |
| 136 | + fi |
| 137 | +
|
| 138 | + echo "🛑 Cancelling workflow run $GITHUB_RUN_ID..." |
| 139 | + echo "" |
| 140 | +
|
| 141 | + # Cancel the workflow run using curl for better control and error handling |
| 142 | + # This avoids dependency on gh CLI being installed |
| 143 | + # Added timeouts to prevent hanging on network issues |
| 144 | + # Note: stderr is NOT redirected to avoid interfering with HTTP status code parsing |
| 145 | + CURL_EXIT=0 |
| 146 | + HTTP_RESPONSE=$(curl -s -w "\n%{http_code}" \ |
| 147 | + --connect-timeout 10 \ |
| 148 | + --max-time 30 \ |
| 149 | + -X POST \ |
| 150 | + -H "Accept: application/vnd.github+json" \ |
| 151 | + -H "Authorization: Bearer $GH_TOKEN" \ |
| 152 | + -H "X-GitHub-Api-Version: 2022-11-28" \ |
| 153 | + "${GITHUB_API_URL}/repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/cancel") || CURL_EXIT=$? |
| 154 | +
|
| 155 | + # Handle curl-level errors (network issues, timeouts) |
| 156 | + if [[ $CURL_EXIT -ne 0 ]]; then |
| 157 | + echo "" |
| 158 | + echo "⚠️ Network error during cancel request (curl exit code: $CURL_EXIT)" |
| 159 | + echo " This could be a temporary network issue." |
| 160 | + echo "" |
| 161 | + echo "skipped=true" >> "$GITHUB_OUTPUT" |
| 162 | + echo "skip-reason=network-error-$CURL_EXIT" >> "$GITHUB_OUTPUT" |
| 163 | + # Don't exit with error - we don't want to mask the original failure |
| 164 | + # The workflow will fail anyway due to the job that triggered this |
| 165 | + exit 0 |
| 166 | + fi |
| 167 | +
|
| 168 | + # Extract HTTP status code (last line) and response body (everything except last line) |
| 169 | + # Note: This spawns subshells, but prioritizes readability and portability over micro-optimization. |
| 170 | + # This code only runs on job failures (rare), and the performance difference is negligible (milliseconds). |
| 171 | + # Alternative approaches using mapfile/arrays add complexity and bash 4+ dependency for minimal gain. |
| 172 | + HTTP_STATUS=$(echo "$HTTP_RESPONSE" | tail -n1) |
| 173 | + RESPONSE_BODY=$(echo "$HTTP_RESPONSE" | sed '$d') |
| 174 | +
|
| 175 | + # Validate HTTP_STATUS is non-empty (malformed response protection) |
| 176 | + if [[ -z "$HTTP_STATUS" ]]; then |
| 177 | + echo "" |
| 178 | + echo "⚠️ Unable to determine HTTP status from cancel API response" |
| 179 | + if [[ -n "$RESPONSE_BODY" ]]; then |
| 180 | + printf ' Raw response (sanitized): %q\n' "$RESPONSE_BODY" |
| 181 | + fi |
| 182 | + echo "" |
| 183 | + echo "skipped=true" >> "$GITHUB_OUTPUT" |
| 184 | + echo "skip-reason=api-error-empty-status" >> "$GITHUB_OUTPUT" |
| 185 | + exit 0 |
| 186 | + fi |
| 187 | +
|
| 188 | + echo "📡 API Response Status: $HTTP_STATUS" |
| 189 | +
|
| 190 | + case "$HTTP_STATUS" in |
| 191 | + 202) |
| 192 | + echo "" |
| 193 | + echo "✅ Workflow cancellation request accepted" |
| 194 | + echo "" |
| 195 | + echo "📝 Note: Other jobs may complete their current step before stopping." |
| 196 | + echo " This is expected GitHub Actions behavior." |
| 197 | + echo "" |
| 198 | + echo "cancelled=true" >> "$GITHUB_OUTPUT" |
| 199 | + ;; |
| 200 | + 403) |
| 201 | + echo "" |
| 202 | + echo "⚠️ Permission denied (403)" |
| 203 | + echo " The job may be missing 'actions: write' permission." |
| 204 | + echo " Add this to the job's permissions block:" |
| 205 | + echo "" |
| 206 | + echo " permissions:" |
| 207 | + echo " contents: read" |
| 208 | + echo " actions: write" |
| 209 | + echo "" |
| 210 | + echo "skipped=true" >> "$GITHUB_OUTPUT" |
| 211 | + echo "skip-reason=permission-denied" >> "$GITHUB_OUTPUT" |
| 212 | + ;; |
| 213 | + 404) |
| 214 | + echo "" |
| 215 | + echo "⚠️ Workflow run not found (404)" |
| 216 | + echo " The workflow run may have already completed." |
| 217 | + echo "" |
| 218 | + echo "skipped=true" >> "$GITHUB_OUTPUT" |
| 219 | + echo "skip-reason=run-not-found" >> "$GITHUB_OUTPUT" |
| 220 | + ;; |
| 221 | + 409) |
| 222 | + echo "" |
| 223 | + echo "⚠️ Conflict (409) - Workflow is already being cancelled or has completed" |
| 224 | + echo "" |
| 225 | + echo "skipped=true" >> "$GITHUB_OUTPUT" |
| 226 | + echo "skip-reason=already-cancelled-or-completed" >> "$GITHUB_OUTPUT" |
| 227 | + ;; |
| 228 | + *) |
| 229 | + echo "" |
| 230 | + echo "⚠️ Unexpected response (HTTP $HTTP_STATUS)" |
| 231 | + if [[ -n "$RESPONSE_BODY" ]]; then |
| 232 | + printf ' Response (sanitized): %q\n' "$RESPONSE_BODY" |
| 233 | + fi |
| 234 | + echo "" |
| 235 | + echo "skipped=true" >> "$GITHUB_OUTPUT" |
| 236 | + echo "skip-reason=api-error-$HTTP_STATUS" >> "$GITHUB_OUTPUT" |
| 237 | + ;; |
| 238 | + esac |
| 239 | +
|
| 240 | + # Escape markdown special characters that could break tables or formatting |
| 241 | + # Note: Multiple parameter expansions are used intentionally over sed for: |
| 242 | + # - Pure bash (no external process overhead) |
| 243 | + # - Explicit, self-documenting escapes |
| 244 | + # - Easier maintenance and debugging |
| 245 | + # This only runs on failures, so micro-optimization is not a priority. |
| 246 | + CANCEL_REASON_MD=$CANCEL_REASON |
| 247 | + CANCEL_REASON_MD=${CANCEL_REASON_MD//\\/\\\\} |
| 248 | + CANCEL_REASON_MD=${CANCEL_REASON_MD//|/\\|} |
| 249 | + CANCEL_REASON_MD=${CANCEL_REASON_MD//\`/\\\`} |
| 250 | + CANCEL_REASON_MD=${CANCEL_REASON_MD//\*/\\*} |
| 251 | + CANCEL_REASON_MD=${CANCEL_REASON_MD//_/\\_} |
| 252 | + CANCEL_REASON_MD=${CANCEL_REASON_MD//[/\\[} |
| 253 | + CANCEL_REASON_MD=${CANCEL_REASON_MD//]/\\]} |
| 254 | + CANCEL_REASON_MD=${CANCEL_REASON_MD//</\\<} |
| 255 | + CANCEL_REASON_MD=${CANCEL_REASON_MD//>/\\>} |
| 256 | +
|
| 257 | + # Determine status cell value for job summary |
| 258 | + if [[ "$HTTP_STATUS" == "202" ]]; then |
| 259 | + STATUS_CELL_VALUE="✅ Accepted" |
| 260 | + else |
| 261 | + STATUS_CELL_VALUE="⚠️ HTTP $HTTP_STATUS" |
| 262 | + fi |
| 263 | +
|
| 264 | + # Add to job summary (always, regardless of success/failure) |
| 265 | + { |
| 266 | + echo "## 🚨 Workflow Cancellation" |
| 267 | + echo "" |
| 268 | + echo "| Detail | Value |" |
| 269 | + echo "|--------|-------|" |
| 270 | + echo "| **Reason** | $CANCEL_REASON_MD |" |
| 271 | + echo "| **Triggered by** | \`$GITHUB_JOB\` |" |
| 272 | + echo "| **Run ID** | $GITHUB_RUN_ID |" |
| 273 | + echo "| **Status** | $STATUS_CELL_VALUE |" |
| 274 | + echo "" |
| 275 | + if [[ "$HTTP_STATUS" == "202" ]]; then |
| 276 | + echo "This cancellation was triggered to save CI resources after a critical failure." |
| 277 | + elif [[ "$HTTP_STATUS" == "403" ]]; then |
| 278 | + echo "⚠️ **Permission Issue**: Add \`actions: write\` permission to enable cancellation." |
| 279 | + elif [[ "$HTTP_STATUS" == "409" ]]; then |
| 280 | + echo "ℹ️ **Already Cancelling**: Another job already triggered cancellation." |
| 281 | + fi |
| 282 | + } >> "$GITHUB_STEP_SUMMARY" |
0 commit comments