Skip to content

Monitor Weekly Test Results #1

Monitor Weekly Test Results

Monitor Weekly Test Results #1

# This workflow monitors a weekly run of the elevate CI.
# It runs weekly at 3:00 PM UTC (9:00 AM CST, 10:00 AM CDT) to check the status of tests
# that were triggered by weekly-testing.yml at 9:00 AM UTC (3:00 AM CST, 4:00 AM CDT).
#
# IMPORTANT: When running on schedule, tests that are still "in progress" after 5+ hours
# are treated as failures/timeouts since this indicates a problem with the test run.
#
# The workflow:
# 1. Checks the most recent testsuite.yml workflow run
# 2. Checks each of hte openstack-*.yml workflow runs
# 3. Reports detailed status
# 5. Creates a summary report and fails if any tests failed
#
# Manual triggering is supported with a configurable lookback period.
name: Monitor Weekly Test Results
on:
schedule:
- cron: '0 16 * * 5'
workflow_dispatch: # Allow manual triggering for testing
inputs:
hours_back:
description: 'Hours to look back for workflow runs'
required: false
default: '24'
type: string
jobs:
monitor-weekly-results:
name: Check weekly Elevate CI results
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
env:
# Get the thing from Todd unless he adds it or just reuse the ulc one
SLACK_WEBHOOK_FOR_ELEVATE: ${{ secrets.SLACK_WEBHOOK_FOR_ELEVATE }}
steps:
- name: Check workflow runs
id: check-runs
run: |
TABLE_ROWS=""
OVERALL_STATUS="success"
HOURS_BACK="${{ github.event.inputs.hours_back || '24' }}"
echo "Looking back $HOURS_BACK hours for workflow runs"
# Get the most recent workflow run
# Look for runs from the specified hours back to catch the weekly run
SINCE=$(date -d "$HOURS_BACK hours ago" --iso-8601=seconds)
TESTSUITE_RUN=$(curl -s \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ github.token }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${{ github.api_url }}/repos/${{ github.repository }}/actions/workflows/testsuite.yml/runs?branch=main&event=workflow_dispatch&created=>=$SINCE&per_page=1")
# Check if we got any runs
RUN_COUNT=$(echo "$TESTSUITE_RUN" | jq '.total_count')
if [ "$RUN_COUNT" -eq 0 ]; then
echo "🔴 FAILED: No recent testsuite runs found for branch main"
TABLE_ROWS="${TABLE_ROWS}| main | 🔴 **FAILED** | No recent testsuite runs found | - | - | - |"$'\n'
OVERALL_STATUS="failure"
else
# Get the most recent run details
RUN_STATUS=$(echo "$TESTSUITE_RUN" | jq -r '.workflow_runs[0].conclusion')
RUN_URL=$(echo "$TESTSUITE_RUN" | jq -r '.workflow_runs[0].html_url')
RUN_ID=$(echo "$TESTSUITE_RUN" | jq -r '.workflow_runs[0].id')
CREATED_AT=$(echo "$TESTSUITE_RUN" | jq -r '.workflow_runs[0].created_at')
COMMIT_SHA=$(echo "$TESTSUITE_RUN" | jq -r '.workflow_runs[0].head_sha')
SHORT_SHA=${COMMIT_SHA:0:12}
COMMIT_LINK="[\`$SHORT_SHA\`](${{ github.server_url }}/${{ github.repository }}/commit/$COMMIT_SHA)"
echo "Run ID: $RUN_ID"
echo "Status: $RUN_STATUS"
echo "Created: $CREATED_AT"
echo "URL: $RUN_URL"
# Get all the openstack-*.yml workflow runs
WORKFLOWS=$(curl -s \
-H 'Accept: application/vnd.github+json' \
-H 'Authorization: Bearer ${{ github.token }}' \
-H 'X-GitHub-Api-Version: 2022-11-28' \
"https://api.github.com/repos/webpros-cpanel/app-elevate-cpanel/actions/workflows")
OPENSTACK_WORKFLOWS=$(echo "$WORKFLOWS" | jq -r '.workflows[].path | split("/") | last | select(test("^openstack-"))')
if [[ "$RUN_STATUS" == "null" || -z "$RUN_STATUS" ]]; then
echo "testsuite.yml timed out"
TABLE_ROWS="${TABLE_ROWS}| main | 🔴 **TIMEOUT** | testsuite job still running after 5+ hours | [View Run]($RUN_URL) | $COMMIT_LINK | $(TZ='America/Chicago' date -d "$CREATED_AT" '+%Y-%m-%d %H:%M %Z') |"$'\n'
OVERALL_STATUS="failure"
elif [[ "$RUN_STATUS" == "failure" ]]; then
echo "testsuite.yml failed"
TABLE_ROWS="${TABLE_ROWS}| main | 🔴 **FAILED** | Failed: testsuite.yml | [View Run]($RUN_URL) | $COMMIT_LINK | $(TZ='America/Chicago' date -d "$CREATED_AT" '+%Y-%m-%d %H:%M %Z') |"$'\n'
OVERALL_STATUS="failure"
elif [[ "$RUN_STATUS" == "success" ]]; then
echo "testsuite.yml succeeded"
TABLE_ROWS="${TABLE_ROWS}| main | 🟢 **SUCCESS** | Success: testsuite.yml | [View Run]($RUN_URL) | $COMMIT_LINK | $(TZ='America/Chicago' date -d "$CREATED_AT" '+%Y-%m-%d %H:%M %Z') |"$'\n'
else
echo "testsuite.yml had unknown status"
OVERALL_STATUS="failure"
fi
# iterate through each openstack workflow and do the same
for file in $(echo "$OPENSTACK_WORKFLOWS"); do
OPENSTACK_RUN=$(curl -s \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ github.token }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${{ github.api_url }}/repos/${{ github.repository }}/actions/workflows/$file/runs?branch=main&event=workflow_dispatch&created=>=$SINCE&per_page=1")
RUN_COUNT=$(echo "$OPENSTACK_RUN" | jq '.total_count')
if [ "$RUN_COUNT" -eq 0 ]; then
echo "🔴 FAILED: No recent $file runs found for branch main"
TABLE_ROWS="${TABLE_ROWS}| main | 🔴 **FAILED** | No recent $file runs found | - | - | - |"$'\n'
OVERALL_STATUS="failure"
else
# Get the most recent run details
OPENSTACK_RUN_STATUS=$(echo "$OPENSTACK_RUN" | jq -r '.workflow_runs[0].conclusion')
OPENSTACK_RUN_URL=$(echo "$OPENSTACK_RUN" | jq -r '.workflow_runs[0].html_url')
OPENSTACK_RUN_ID=$(echo "$OPENSTACK_RUN" | jq -r '.workflow_runs[0].id')
OPENSTACK_CREATED_AT=$(echo "$OPENSTACK_RUN" | jq -r '.workflow_runs[0].created_at')
OPENSTACK_COMMIT_SHA=$(echo "$OPENSTACK_RUN" | jq -r '.workflow_runs[0].head_sha')
OPENSTACK_SHORT_SHA=${COMMIT_SHA:0:12}
OPENSTACK_COMMIT_LINK="[\`$SHORT_SHA\`](${{ github.server_url }}/${{ github.repository }}/commit/$COMMIT_SHA)"
echo "Run ID: $OPENSTACK_RUN_ID"
echo "Status: $OPENSTACK_RUN_STATUS"
echo "Created: $OPENSTACK_CREATED_AT"
echo "URL: $OPENSTACK_RUN_URL"
if [[ "$OPENSTACK_RUN_STATUS" == "null" || -z "$OPENSTACK_RUN_STATUS" ]]; then
echo "$file timed out"
TABLE_ROWS="${TABLE_ROWS}| main | 🔴 **TIMEOUT** | $file job still running after 5+ hours | [View Run]($OPENSTACK_RUN_URL) | $OPENSTACK_COMMIT_LINK | $(TZ='America/Chicago' date -d "$CREATED_AT" '+%Y-%m-%d %H:%M %Z') |"$'\n'
OVERALL_STATUS="failure"
elif [[ "$OPENSTACK_RUN_STATUS" == "failure" ]]; then
echo "$file failed"
TABLE_ROWS="${TABLE_ROWS}| main | 🔴 **FAILED** | Failed: $file | [View Run]($OPENSTACK_RUN_URL) | $OPENSTACK_COMMIT_LINK | $(TZ='America/Chicago' date -d "$CREATED_AT" '+%Y-%m-%d %H:%M %Z') |"$'\n'
OVERALL_STATUS="failure"
elif [[ "$RUN_STATUS" == "success" ]]; then
echo "$file succeeded"
TABLE_ROWS="${TABLE_ROWS}| main | 🟢 **SUCCESS** | Success: $file | [View Run]($OPENSTACK_RUN_URL) | $OPENSTACK_COMMIT_LINK | $(TZ='America/Chicago' date -d "$CREATED_AT" '+%Y-%m-%d %H:%M %Z') |"$'\n'
else
echo "$file had unknown status"
OVERALL_STATUS="failure"
fi
fi
done
fi
# Save table rows for the summary
echo "table_rows<<EOF" >> $GITHUB_OUTPUT
echo "$TABLE_ROWS" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "overall_status=$OVERALL_STATUS" >> $GITHUB_OUTPUT
- name: Create summary
run: |
cat << 'EOF' >> $GITHUB_STEP_SUMMARY
# Elevate cPanel Weekly Test Results Summary
**Overall Status**: ${{ steps.check-runs.outputs.overall_status == 'success' && '🟢 **All tests passing**' || '🔴 **Some tests failed**' }}
## Branch Results
| Branch | Status | Details | Run Link | Commit | Houston Time |
|--------|--------|---------|----------|--------|--------------|
${{ steps.check-runs.outputs.table_rows }}
---
*Report generated at $(date -u '+%Y-%m-%d %H:%M:%S UTC')*
**Legend:**
- 🟢 **SUCCESS**: All matrix jobs passed
- 🔴 **FAILED**: One or more matrix jobs failed (shows which platforms)
- 🔴 **TIMEOUT**: Tests still running after 5+ hours (scheduled runs only)
- ⚪ **UNKNOWN**: No recent runs found or unexpected status
EOF
- name: Post table to Slack
if: ${{ env.SLACK_WEBHOOK_FOR_ELEVATE != '' }}
run: |
# Build Slack-friendly mrkdwn from the GitHub table rows
ROWS=$(cat << 'EOT'
${{ steps.check-runs.outputs.table_rows }}
EOT
)
# Transform pipe-separated rows into bullet lines; link Branch and "View Run" only
LINES=$(awk -F '|' '
function trim(s){ sub(/^ +/,"",s); sub(/ +$/,"",s); return s }
NF>1 {
b=trim($2); s=trim($3); d=trim($4); r=trim($5); c=trim($6); t=trim($7);
# Extract URL from markdown link in r (Run Link)
run_url=r;
if (match(r, /\(([^)]+)\)/)) { run_url=substr(r, RSTART+1, RLENGTH-2) }
# Link Branch to the run; keep Details as plain text; keep an explicit View Run link; drop timestamp to avoid wrapping
print "• <" run_url "|*" b "*> — " s " — " d " — <" run_url "|View Run> — " c
}
' <<< "$ROWS")
# Convert [text](url) -> <url|text> and **bold** -> *bold*
LINES=$(printf "%s\n" "$LINES" | sed -E 's/\[([^]]+)\]\(([^)]+)\)/<\2|\1>/g' | sed -E 's/\*\*([^*]+)\*\*/*\1*/g')
# Build Slack blocks payload
PAYLOAD=$(jq -n \
--arg status "${{ steps.check-runs.outputs.overall_status }}" \
--arg text "$LINES" \
'{
blocks: [
{ type: "header", text: { type: "plain_text", text: ("Elevate cPanel Weekly Test Results: " + (if $status == "success" then "All tests passing 🟢" else "Issues detected 🔴" end)), emoji: true } },
{ type: "divider" },
{ type: "section", text: { type: "mrkdwn", text: $text } }
]
}')
curl -sS -X POST -H 'Content-type: application/json' \
--data "$PAYLOAD" \
"$SLACK_WEBHOOK_FOR_ELEVATE"
- name: Send notification on failure
if: steps.check-runs.outputs.overall_status != 'success'
run: |
echo "::warning::One or more weekly test runs failed or are missing"
echo "Check the summary above for details on which branches failed"
# Create a more detailed error message
cat << 'EOF' >> $GITHUB_STEP_SUMMARY
## Action Required
Some nightly tests have failed, timed out, or are missing. Please:
1. Check the individual workflow runs linked above
2. Investigate any failures or timeouts in the failing branches
3. For TIMEOUT status: Check if tests are stuck or infrastructure issues exist
4. Consider re-running failed tests if they appear to be flaky
5. Update relevant teams if there are persistent issues
EOF
- name: Fail if any tests failed
if: steps.check-runs.outputs.overall_status != 'success'
run: |
echo "::error::One or more nightly test runs failed or are missing"
exit 1