Monitor Weekly Test Results #1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This workflow monitors a weekly run of the elevate CI. | |
| # It runs weekly at 3:00 PM UTC (9:00 AM CST, 10:00 AM CDT) to check the status of tests | |
| # that were triggered by weekly-testing.yml at 9:00 AM UTC (3:00 AM CST, 4:00 AM CDT). | |
| # | |
| # IMPORTANT: When running on schedule, tests that are still "in progress" after 5+ hours | |
| # are treated as failures/timeouts since this indicates a problem with the test run. | |
| # | |
| # The workflow: | |
| # 1. Checks the most recent testsuite.yml workflow run | |
| # 2. Checks each of hte openstack-*.yml workflow runs | |
| # 3. Reports detailed status | |
| # 5. Creates a summary report and fails if any tests failed | |
| # | |
| # Manual triggering is supported with a configurable lookback period. | |
| name: Monitor Weekly Test Results | |
| on: | |
| schedule: | |
| - cron: '0 16 * * 5' | |
| workflow_dispatch: # Allow manual triggering for testing | |
| inputs: | |
| hours_back: | |
| description: 'Hours to look back for workflow runs' | |
| required: false | |
| default: '24' | |
| type: string | |
| jobs: | |
| monitor-weekly-results: | |
| name: Check weekly Elevate CI results | |
| runs-on: ubuntu-latest | |
| permissions: | |
| actions: read | |
| contents: read | |
| env: | |
| # Get the thing from Todd unless he adds it or just reuse the ulc one | |
| SLACK_WEBHOOK_FOR_ELEVATE: ${{ secrets.SLACK_WEBHOOK_FOR_ELEVATE }} | |
| steps: | |
| - name: Check workflow runs | |
| id: check-runs | |
| run: | | |
| TABLE_ROWS="" | |
| OVERALL_STATUS="success" | |
| HOURS_BACK="${{ github.event.inputs.hours_back || '24' }}" | |
| echo "Looking back $HOURS_BACK hours for workflow runs" | |
| # Get the most recent workflow run | |
| # Look for runs from the specified hours back to catch the weekly run | |
| SINCE=$(date -d "$HOURS_BACK hours ago" --iso-8601=seconds) | |
| TESTSUITE_RUN=$(curl -s \ | |
| -H "Accept: application/vnd.github+json" \ | |
| -H "Authorization: Bearer ${{ github.token }}" \ | |
| -H "X-GitHub-Api-Version: 2022-11-28" \ | |
| "${{ github.api_url }}/repos/${{ github.repository }}/actions/workflows/testsuite.yml/runs?branch=main&event=workflow_dispatch&created=>=$SINCE&per_page=1") | |
| # Check if we got any runs | |
| RUN_COUNT=$(echo "$TESTSUITE_RUN" | jq '.total_count') | |
| if [ "$RUN_COUNT" -eq 0 ]; then | |
| echo "🔴 FAILED: No recent testsuite runs found for branch main" | |
| TABLE_ROWS="${TABLE_ROWS}| main | 🔴 **FAILED** | No recent testsuite runs found | - | - | - |"$'\n' | |
| OVERALL_STATUS="failure" | |
| else | |
| # Get the most recent run details | |
| RUN_STATUS=$(echo "$TESTSUITE_RUN" | jq -r '.workflow_runs[0].conclusion') | |
| RUN_URL=$(echo "$TESTSUITE_RUN" | jq -r '.workflow_runs[0].html_url') | |
| RUN_ID=$(echo "$TESTSUITE_RUN" | jq -r '.workflow_runs[0].id') | |
| CREATED_AT=$(echo "$TESTSUITE_RUN" | jq -r '.workflow_runs[0].created_at') | |
| COMMIT_SHA=$(echo "$TESTSUITE_RUN" | jq -r '.workflow_runs[0].head_sha') | |
| SHORT_SHA=${COMMIT_SHA:0:12} | |
| COMMIT_LINK="[\`$SHORT_SHA\`](${{ github.server_url }}/${{ github.repository }}/commit/$COMMIT_SHA)" | |
| echo "Run ID: $RUN_ID" | |
| echo "Status: $RUN_STATUS" | |
| echo "Created: $CREATED_AT" | |
| echo "URL: $RUN_URL" | |
| # Get all the openstack-*.yml workflow runs | |
| WORKFLOWS=$(curl -s \ | |
| -H 'Accept: application/vnd.github+json' \ | |
| -H 'Authorization: Bearer ${{ github.token }}' \ | |
| -H 'X-GitHub-Api-Version: 2022-11-28' \ | |
| "https://api.github.com/repos/webpros-cpanel/app-elevate-cpanel/actions/workflows") | |
| OPENSTACK_WORKFLOWS=$(echo "$WORKFLOWS" | jq -r '.workflows[].path | split("/") | last | select(test("^openstack-"))') | |
| if [[ "$RUN_STATUS" == "null" || -z "$RUN_STATUS" ]]; then | |
| echo "testsuite.yml timed out" | |
| TABLE_ROWS="${TABLE_ROWS}| main | 🔴 **TIMEOUT** | testsuite job still running after 5+ hours | [View Run]($RUN_URL) | $COMMIT_LINK | $(TZ='America/Chicago' date -d "$CREATED_AT" '+%Y-%m-%d %H:%M %Z') |"$'\n' | |
| OVERALL_STATUS="failure" | |
| elif [[ "$RUN_STATUS" == "failure" ]]; then | |
| echo "testsuite.yml failed" | |
| TABLE_ROWS="${TABLE_ROWS}| main | 🔴 **FAILED** | Failed: testsuite.yml | [View Run]($RUN_URL) | $COMMIT_LINK | $(TZ='America/Chicago' date -d "$CREATED_AT" '+%Y-%m-%d %H:%M %Z') |"$'\n' | |
| OVERALL_STATUS="failure" | |
| elif [[ "$RUN_STATUS" == "success" ]]; then | |
| echo "testsuite.yml succeeded" | |
| TABLE_ROWS="${TABLE_ROWS}| main | 🟢 **SUCCESS** | Success: testsuite.yml | [View Run]($RUN_URL) | $COMMIT_LINK | $(TZ='America/Chicago' date -d "$CREATED_AT" '+%Y-%m-%d %H:%M %Z') |"$'\n' | |
| else | |
| echo "testsuite.yml had unknown status" | |
| OVERALL_STATUS="failure" | |
| fi | |
| # iterate through each openstack workflow and do the same | |
| for file in $(echo "$OPENSTACK_WORKFLOWS"); do | |
| OPENSTACK_RUN=$(curl -s \ | |
| -H "Accept: application/vnd.github+json" \ | |
| -H "Authorization: Bearer ${{ github.token }}" \ | |
| -H "X-GitHub-Api-Version: 2022-11-28" \ | |
| "${{ github.api_url }}/repos/${{ github.repository }}/actions/workflows/$file/runs?branch=main&event=workflow_dispatch&created=>=$SINCE&per_page=1") | |
| RUN_COUNT=$(echo "$OPENSTACK_RUN" | jq '.total_count') | |
| if [ "$RUN_COUNT" -eq 0 ]; then | |
| echo "🔴 FAILED: No recent $file runs found for branch main" | |
| TABLE_ROWS="${TABLE_ROWS}| main | 🔴 **FAILED** | No recent $file runs found | - | - | - |"$'\n' | |
| OVERALL_STATUS="failure" | |
| else | |
| # Get the most recent run details | |
| OPENSTACK_RUN_STATUS=$(echo "$OPENSTACK_RUN" | jq -r '.workflow_runs[0].conclusion') | |
| OPENSTACK_RUN_URL=$(echo "$OPENSTACK_RUN" | jq -r '.workflow_runs[0].html_url') | |
| OPENSTACK_RUN_ID=$(echo "$OPENSTACK_RUN" | jq -r '.workflow_runs[0].id') | |
| OPENSTACK_CREATED_AT=$(echo "$OPENSTACK_RUN" | jq -r '.workflow_runs[0].created_at') | |
| OPENSTACK_COMMIT_SHA=$(echo "$OPENSTACK_RUN" | jq -r '.workflow_runs[0].head_sha') | |
| OPENSTACK_SHORT_SHA=${COMMIT_SHA:0:12} | |
| OPENSTACK_COMMIT_LINK="[\`$SHORT_SHA\`](${{ github.server_url }}/${{ github.repository }}/commit/$COMMIT_SHA)" | |
| echo "Run ID: $OPENSTACK_RUN_ID" | |
| echo "Status: $OPENSTACK_RUN_STATUS" | |
| echo "Created: $OPENSTACK_CREATED_AT" | |
| echo "URL: $OPENSTACK_RUN_URL" | |
| if [[ "$OPENSTACK_RUN_STATUS" == "null" || -z "$OPENSTACK_RUN_STATUS" ]]; then | |
| echo "$file timed out" | |
| TABLE_ROWS="${TABLE_ROWS}| main | 🔴 **TIMEOUT** | $file job still running after 5+ hours | [View Run]($OPENSTACK_RUN_URL) | $OPENSTACK_COMMIT_LINK | $(TZ='America/Chicago' date -d "$CREATED_AT" '+%Y-%m-%d %H:%M %Z') |"$'\n' | |
| OVERALL_STATUS="failure" | |
| elif [[ "$OPENSTACK_RUN_STATUS" == "failure" ]]; then | |
| echo "$file failed" | |
| TABLE_ROWS="${TABLE_ROWS}| main | 🔴 **FAILED** | Failed: $file | [View Run]($OPENSTACK_RUN_URL) | $OPENSTACK_COMMIT_LINK | $(TZ='America/Chicago' date -d "$CREATED_AT" '+%Y-%m-%d %H:%M %Z') |"$'\n' | |
| OVERALL_STATUS="failure" | |
| elif [[ "$RUN_STATUS" == "success" ]]; then | |
| echo "$file succeeded" | |
| TABLE_ROWS="${TABLE_ROWS}| main | 🟢 **SUCCESS** | Success: $file | [View Run]($OPENSTACK_RUN_URL) | $OPENSTACK_COMMIT_LINK | $(TZ='America/Chicago' date -d "$CREATED_AT" '+%Y-%m-%d %H:%M %Z') |"$'\n' | |
| else | |
| echo "$file had unknown status" | |
| OVERALL_STATUS="failure" | |
| fi | |
| fi | |
| done | |
| fi | |
| # Save table rows for the summary | |
| echo "table_rows<<EOF" >> $GITHUB_OUTPUT | |
| echo "$TABLE_ROWS" >> $GITHUB_OUTPUT | |
| echo "EOF" >> $GITHUB_OUTPUT | |
| echo "overall_status=$OVERALL_STATUS" >> $GITHUB_OUTPUT | |
| - name: Create summary | |
| run: | | |
| cat << 'EOF' >> $GITHUB_STEP_SUMMARY | |
| # Elevate cPanel Weekly Test Results Summary | |
| **Overall Status**: ${{ steps.check-runs.outputs.overall_status == 'success' && '🟢 **All tests passing**' || '🔴 **Some tests failed**' }} | |
| ## Branch Results | |
| | Branch | Status | Details | Run Link | Commit | Houston Time | | |
| |--------|--------|---------|----------|--------|--------------| | |
| ${{ steps.check-runs.outputs.table_rows }} | |
| --- | |
| *Report generated at $(date -u '+%Y-%m-%d %H:%M:%S UTC')* | |
| **Legend:** | |
| - 🟢 **SUCCESS**: All matrix jobs passed | |
| - 🔴 **FAILED**: One or more matrix jobs failed (shows which platforms) | |
| - 🔴 **TIMEOUT**: Tests still running after 5+ hours (scheduled runs only) | |
| - ⚪ **UNKNOWN**: No recent runs found or unexpected status | |
| EOF | |
| - name: Post table to Slack | |
| if: ${{ env.SLACK_WEBHOOK_FOR_ELEVATE != '' }} | |
| run: | | |
| # Build Slack-friendly mrkdwn from the GitHub table rows | |
| ROWS=$(cat << 'EOT' | |
| ${{ steps.check-runs.outputs.table_rows }} | |
| EOT | |
| ) | |
| # Transform pipe-separated rows into bullet lines; link Branch and "View Run" only | |
| LINES=$(awk -F '|' ' | |
| function trim(s){ sub(/^ +/,"",s); sub(/ +$/,"",s); return s } | |
| NF>1 { | |
| b=trim($2); s=trim($3); d=trim($4); r=trim($5); c=trim($6); t=trim($7); | |
| # Extract URL from markdown link in r (Run Link) | |
| run_url=r; | |
| if (match(r, /\(([^)]+)\)/)) { run_url=substr(r, RSTART+1, RLENGTH-2) } | |
| # Link Branch to the run; keep Details as plain text; keep an explicit View Run link; drop timestamp to avoid wrapping | |
| print "• <" run_url "|*" b "*> — " s " — " d " — <" run_url "|View Run> — " c | |
| } | |
| ' <<< "$ROWS") | |
| # Convert [text](url) -> <url|text> and **bold** -> *bold* | |
| LINES=$(printf "%s\n" "$LINES" | sed -E 's/\[([^]]+)\]\(([^)]+)\)/<\2|\1>/g' | sed -E 's/\*\*([^*]+)\*\*/*\1*/g') | |
| # Build Slack blocks payload | |
| PAYLOAD=$(jq -n \ | |
| --arg status "${{ steps.check-runs.outputs.overall_status }}" \ | |
| --arg text "$LINES" \ | |
| '{ | |
| blocks: [ | |
| { type: "header", text: { type: "plain_text", text: ("Elevate cPanel Weekly Test Results: " + (if $status == "success" then "All tests passing 🟢" else "Issues detected 🔴" end)), emoji: true } }, | |
| { type: "divider" }, | |
| { type: "section", text: { type: "mrkdwn", text: $text } } | |
| ] | |
| }') | |
| curl -sS -X POST -H 'Content-type: application/json' \ | |
| --data "$PAYLOAD" \ | |
| "$SLACK_WEBHOOK_FOR_ELEVATE" | |
| - name: Send notification on failure | |
| if: steps.check-runs.outputs.overall_status != 'success' | |
| run: | | |
| echo "::warning::One or more weekly test runs failed or are missing" | |
| echo "Check the summary above for details on which branches failed" | |
| # Create a more detailed error message | |
| cat << 'EOF' >> $GITHUB_STEP_SUMMARY | |
| ## Action Required | |
| Some nightly tests have failed, timed out, or are missing. Please: | |
| 1. Check the individual workflow runs linked above | |
| 2. Investigate any failures or timeouts in the failing branches | |
| 3. For TIMEOUT status: Check if tests are stuck or infrastructure issues exist | |
| 4. Consider re-running failed tests if they appear to be flaky | |
| 5. Update relevant teams if there are persistent issues | |
| EOF | |
| - name: Fail if any tests failed | |
| if: steps.check-runs.outputs.overall_status != 'success' | |
| run: | | |
| echo "::error::One or more nightly test runs failed or are missing" | |
| exit 1 |