|
| 1 | +""" |
| 2 | +Show runner job status across all workflows in llvm/offload-test-suite. |
| 3 | +
|
| 4 | +Includes queued/in-progress runs and recently completed runs so you |
| 5 | +can see what is (or was) occupying the runners. |
| 6 | +
|
| 7 | +Requires the GitHub CLI (`gh`) to be installed and authenticated |
| 8 | +(`gh auth login`). API calls are issued through `gh api`. |
| 9 | +
|
| 10 | +Usage: |
| 11 | + python runner_status.py [vendor] |
| 12 | +
|
| 13 | + vendor: intel | amd | nvidia | qc (omit to show all vendors) |
| 14 | +""" |
| 15 | + |
| 16 | +import sys |
| 17 | +import os |
| 18 | +import json |
| 19 | +import subprocess |
| 20 | +from concurrent.futures import ThreadPoolExecutor, as_completed |
| 21 | +from datetime import datetime, timezone, timedelta |
| 22 | + |
| 23 | +OWNER = "llvm" |
| 24 | +REPO = "offload-test-suite" |
| 25 | +VALID_VENDORS = ("intel", "amd", "nvidia", "qc") |
| 26 | +COMPLETED_WINDOW_HOURS = 3 |
| 27 | + |
| 28 | +# ANSI color codes per vendor |
| 29 | +VENDOR_COLORS = { |
| 30 | + "intel": "\033[34m", # blue |
| 31 | + "amd": "\033[31m", # red |
| 32 | + "nvidia": "\033[32m", # green |
| 33 | + "qc": "\033[90m", # gray |
| 34 | +} |
| 35 | +RESET = "\033[0m" |
| 36 | + |
| 37 | +# Workflow names that are exclusive to a specific vendor |
| 38 | +VENDOR_WORKFLOW_KEYWORDS = { |
| 39 | + "intel": "intel", |
| 40 | + "amd": "amd", |
| 41 | + "nvidia": "nvidia", |
| 42 | + "qc": "qc", |
| 43 | +} |
| 44 | + |
| 45 | + |
| 46 | +def runner_label(vendor): |
| 47 | + return f"hlsl-windows-{vendor}" |
| 48 | + |
| 49 | + |
| 50 | +def colorize(vendor, text): |
| 51 | + """Wrap text in the vendor's ANSI color.""" |
| 52 | + c = VENDOR_COLORS.get(vendor, "") |
| 53 | + return f"{c}{text}{RESET}" if c else text |
| 54 | + |
| 55 | + |
| 56 | +def api_get(path): |
| 57 | + """Issue a GitHub API GET via `gh api` and return the parsed JSON. |
| 58 | +
|
| 59 | + Raises subprocess.CalledProcessError on non-zero exit (e.g. 403). |
| 60 | + """ |
| 61 | + result = subprocess.run( |
| 62 | + ["gh", "api", "-H", "Accept: application/vnd.github+json", path], |
| 63 | + capture_output=True, |
| 64 | + text=True, |
| 65 | + check=True, |
| 66 | + ) |
| 67 | + return json.loads(result.stdout) |
| 68 | + |
| 69 | + |
| 70 | +def get_runners(label): |
| 71 | + """Fetch self-hosted runners that have the given label. Returns None on error.""" |
| 72 | + try: |
| 73 | + path = f"/repos/{OWNER}/{REPO}/actions/runners?per_page=100" |
| 74 | + runners = api_get(path).get("runners", []) |
| 75 | + return [r for r in runners if label in [l["name"] for l in r.get("labels", [])]] |
| 76 | + except subprocess.CalledProcessError: |
| 77 | + return None |
| 78 | + |
| 79 | + |
| 80 | +def run_could_match_vendor(run, vendor): |
| 81 | + """Quick heuristic: can this run possibly have jobs for the given vendor? |
| 82 | +
|
| 83 | + Scheduled/dispatch runs whose workflow name contains another vendor's |
| 84 | + keyword are skipped. PR runs (Execution Testing) and ambiguous runs |
| 85 | + are always kept. |
| 86 | + """ |
| 87 | + name_lower = run["name"].lower() |
| 88 | + # "Execution Testing" (PR matrix) always includes intel, sometimes others |
| 89 | + if "execution testing" in name_lower or "hlsl test" in name_lower: |
| 90 | + return True |
| 91 | + # If the workflow name mentions a specific vendor, only match that one |
| 92 | + for v, kw in VENDOR_WORKFLOW_KEYWORDS.items(): |
| 93 | + if kw in name_lower: |
| 94 | + return v == vendor |
| 95 | + return True |
| 96 | + |
| 97 | + |
| 98 | +def get_runs(vendors): |
| 99 | + """Fetch runs that are queued, in_progress, or recently completed. |
| 100 | +
|
| 101 | + When a single vendor is requested, only fetches runs whose workflow |
| 102 | + could plausibly contain jobs for that vendor. |
| 103 | + """ |
| 104 | + results = [] |
| 105 | + for status in ("queued", "in_progress"): |
| 106 | + path = f"/repos/{OWNER}/{REPO}/actions/runs?status={status}&per_page=100" |
| 107 | + results.extend(api_get(path)["workflow_runs"]) |
| 108 | + |
| 109 | + # Also grab recently completed runs (within COMPLETED_WINDOW_HOURS) |
| 110 | + cutoff = datetime.now(timezone.utc) - timedelta(hours=COMPLETED_WINDOW_HOURS) |
| 111 | + path = f"/repos/{OWNER}/{REPO}/actions/runs?status=completed&per_page=50" |
| 112 | + for r in api_get(path)["workflow_runs"]: |
| 113 | + updated = datetime.fromisoformat(r["updated_at"].replace("Z", "+00:00")) |
| 114 | + if updated >= cutoff: |
| 115 | + results.append(r) |
| 116 | + |
| 117 | + # Deduplicate by run ID |
| 118 | + seen = set() |
| 119 | + unique = [] |
| 120 | + for r in results: |
| 121 | + if r["id"] not in seen: |
| 122 | + seen.add(r["id"]) |
| 123 | + unique.append(r) |
| 124 | + |
| 125 | + # Pre-filter: if only one vendor requested, skip runs that clearly |
| 126 | + # belong to a different vendor (avoids fetching their jobs). |
| 127 | + if len(vendors) == 1: |
| 128 | + vendor = vendors[0] |
| 129 | + unique = [r for r in unique if run_could_match_vendor(r, vendor)] |
| 130 | + |
| 131 | + return unique |
| 132 | + |
| 133 | + |
| 134 | +def prefetch_jobs(runs, jobs_cache): |
| 135 | + """Fetch jobs for all runs in parallel to minimize wall-clock time.""" |
| 136 | + to_fetch = [r for r in runs if r["id"] not in jobs_cache] |
| 137 | + if not to_fetch: |
| 138 | + return |
| 139 | + |
| 140 | + def fetch_one(run_id): |
| 141 | + return run_id, get_jobs(run_id) |
| 142 | + |
| 143 | + with ThreadPoolExecutor(max_workers=8) as pool: |
| 144 | + futures = {pool.submit(fetch_one, r["id"]): r["id"] for r in to_fetch} |
| 145 | + for future in as_completed(futures): |
| 146 | + run_id, jobs = future.result() |
| 147 | + jobs_cache[run_id] = jobs |
| 148 | + |
| 149 | + |
| 150 | +def get_jobs(run_id): |
| 151 | + path = f"/repos/{OWNER}/{REPO}/actions/runs/{run_id}/jobs?per_page=100" |
| 152 | + return api_get(path)["jobs"] |
| 153 | + |
| 154 | + |
| 155 | +def tz_abbrev(dt): |
| 156 | + """Get short timezone abbreviation, e.g. 'PDT' instead of 'Pacific Daylight Time'.""" |
| 157 | + name = dt.strftime("%Z") |
| 158 | + if len(name) <= 5: |
| 159 | + return name |
| 160 | + return "".join(w[0] for w in name.split()) |
| 161 | + |
| 162 | + |
| 163 | +def format_time(iso_str): |
| 164 | + """Convert ISO timestamp to short time like '12:40 PM PDT / 7:40 PM UTC'.""" |
| 165 | + dt = datetime.fromisoformat(iso_str.replace("Z", "+00:00")) |
| 166 | + h = dt.hour % 12 or 12 |
| 167 | + ampm = "AM" if dt.hour < 12 else "PM" |
| 168 | + utc = f"{h}:{dt.minute:02d} {ampm} UTC" |
| 169 | + local = dt.astimezone() |
| 170 | + lh = local.hour % 12 or 12 |
| 171 | + lampm = "AM" if local.hour < 12 else "PM" |
| 172 | + tz = tz_abbrev(local) |
| 173 | + return f"{lh}:{local.minute:02d} {lampm} {tz} / {utc}" |
| 174 | + |
| 175 | + |
| 176 | +def print_vendor_table(vendor, runs, jobs_cache, runners_cache): |
| 177 | + """Print the status table for a single vendor. Returns True if any rows.""" |
| 178 | + label = runner_label(vendor) |
| 179 | + |
| 180 | + # Fetch and cache runners for this label |
| 181 | + if label not in runners_cache: |
| 182 | + runners_cache[label] = get_runners(label) |
| 183 | + runners = runners_cache[label] |
| 184 | + if runners is not None: |
| 185 | + online = len([r for r in runners if r.get("status") == "online"]) |
| 186 | + runner_info = f", {online}/{len(runners)} online" |
| 187 | + else: |
| 188 | + runner_info = "" |
| 189 | + |
| 190 | + rows = [] |
| 191 | + active_details = [] |
| 192 | + |
| 193 | + for run in runs: |
| 194 | + run_id = run["id"] |
| 195 | + if run_id not in jobs_cache: |
| 196 | + jobs_cache[run_id] = get_jobs(run_id) |
| 197 | + jobs = jobs_cache[run_id] |
| 198 | + |
| 199 | + # Match by label OR by runner name containing the vendor (covers |
| 200 | + # workflow_dispatch runs that didn't pin a vendor SKU label). |
| 201 | + vendor_jobs = [ |
| 202 | + j |
| 203 | + for j in jobs |
| 204 | + if label in j.get("labels", []) |
| 205 | + or vendor.lower() in (j.get("runner_name") or "").lower() |
| 206 | + ] |
| 207 | + if not vendor_jobs: |
| 208 | + continue |
| 209 | + |
| 210 | + title = run["display_title"] |
| 211 | + workflow = run["name"] |
| 212 | + created = format_time(run["created_at"]) |
| 213 | + |
| 214 | + done = len([j for j in vendor_jobs if j["status"] == "completed"]) |
| 215 | + active = [j for j in vendor_jobs if j["status"] == "in_progress"] |
| 216 | + queued = len([j for j in vendor_jobs if j["status"] == "queued"]) |
| 217 | + |
| 218 | + # Skip runs where all jobs are done (nothing active or queued) |
| 219 | + if not active and queued == 0: |
| 220 | + continue |
| 221 | + |
| 222 | + if run["event"] == "schedule": |
| 223 | + prefix = "[Scheduled]" |
| 224 | + elif run["event"] == "pull_request": |
| 225 | + prefix = "[PR]" |
| 226 | + else: |
| 227 | + prefix = f"[{run['event']}]" |
| 228 | + run_label = f"{prefix} {title} ({created})" |
| 229 | + rows.append((run_label, workflow, done, len(active), queued)) |
| 230 | + |
| 231 | + for j in active: |
| 232 | + short = j["name"].split(",")[-1].strip().rstrip(") / build") |
| 233 | + active_details.append((title, short, j.get("runner_name", "?"))) |
| 234 | + |
| 235 | + header_text = f"=== {vendor.upper()} (runner: {label}{runner_info}) ===" |
| 236 | + print(colorize(vendor, header_text)) |
| 237 | + print() |
| 238 | + |
| 239 | + if not rows: |
| 240 | + print(f"No runs with {vendor} jobs found.\n") |
| 241 | + return False |
| 242 | + |
| 243 | + now_local = datetime.now().astimezone() |
| 244 | + local_str = now_local.strftime("%#I:%M %p ") + tz_abbrev(now_local) |
| 245 | + utc_str = now_local.astimezone(timezone.utc).strftime("%#I:%M %p UTC") |
| 246 | + timestamp = f"as of {local_str} / {utc_str}" |
| 247 | + |
| 248 | + col1_w = max(len(r[0]) for r in rows) |
| 249 | + col2_w = max(max(len(r[1]) for r in rows), len("Workflow")) |
| 250 | + run_col_header = f"Run ({timestamp})" |
| 251 | + col1_w = max(col1_w, len(run_col_header)) |
| 252 | + header = ( |
| 253 | + f"{run_col_header:<{col1_w}} {'Workflow':<{col2_w}}" |
| 254 | + f" {'Done':>6} {'Active':>6} {'Queued':>6}" |
| 255 | + ) |
| 256 | + sep = "=" * len(header) |
| 257 | + |
| 258 | + print(colorize(vendor, sep)) |
| 259 | + print(header) |
| 260 | + print(colorize(vendor, sep)) |
| 261 | + for run_label, workflow, done, active, queued in rows: |
| 262 | + active_str = str(active) if active == 0 else f"*{active}*" |
| 263 | + print( |
| 264 | + f"{run_label:<{col1_w}} {workflow:<{col2_w}}" |
| 265 | + f" {done:>6} {active_str:>6} {queued:>6}" |
| 266 | + ) |
| 267 | + print(colorize(vendor, sep)) |
| 268 | + |
| 269 | + total_done = sum(r[2] for r in rows) |
| 270 | + total_active = sum(r[3] for r in rows) |
| 271 | + total_queued = sum(r[4] for r in rows) |
| 272 | + print( |
| 273 | + f"{'TOTAL':<{col1_w}} {'':<{col2_w}}" |
| 274 | + f" {total_done:>6} {total_active:>6} {total_queued:>6}" |
| 275 | + ) |
| 276 | + print() |
| 277 | + |
| 278 | + if active_details: |
| 279 | + print(colorize(vendor, f"Currently running on {vendor}:")) |
| 280 | + for title, job, runner_name in active_details: |
| 281 | + print(f" -> {job} (runner: {runner_name}, run: {title})") |
| 282 | + else: |
| 283 | + print(f"No {vendor} jobs currently running.") |
| 284 | + print() |
| 285 | + return True |
| 286 | + |
| 287 | + |
| 288 | +def main(): |
| 289 | + if len(sys.argv) >= 2: |
| 290 | + vendor = sys.argv[1].lower() |
| 291 | + if vendor not in VALID_VENDORS: |
| 292 | + print(f"Unknown vendor '{vendor}'. Choose from: {', '.join(VALID_VENDORS)}") |
| 293 | + print(f"Usage: python {os.path.basename(__file__)} [vendor]") |
| 294 | + sys.exit(1) |
| 295 | + vendors = [vendor] |
| 296 | + else: |
| 297 | + vendors = list(VALID_VENDORS) |
| 298 | + |
| 299 | + runs = get_runs(vendors) |
| 300 | + if not runs: |
| 301 | + print("No queued, in-progress, or recently completed runs found.") |
| 302 | + return |
| 303 | + |
| 304 | + runs.sort(key=lambda r: r["created_at"]) |
| 305 | + |
| 306 | + # Fetch all jobs in parallel upfront |
| 307 | + jobs_cache = {} |
| 308 | + prefetch_jobs(runs, jobs_cache) |
| 309 | + |
| 310 | + runners_cache = {} |
| 311 | + |
| 312 | + for v in vendors: |
| 313 | + print_vendor_table(v, runs, jobs_cache, runners_cache) |
| 314 | + |
| 315 | + |
| 316 | +if __name__ == "__main__": |
| 317 | + main() |
0 commit comments