Skip to content

Commit 1ed233a

Browse files
committed
handle failures better
1 parent e0eae4f commit 1ed233a

1 file changed

Lines changed: 69 additions & 17 deletions

File tree

.github/scripts/pull-request-dashboard.py

Lines changed: 69 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
DEFAULT_OUTPUT = "pull-request-dashboard.md"
3131
DEFAULT_JOBS = 4
3232
DEFAULT_MODEL = "gpt-5.4-mini"
33+
GH_RETRY_ATTEMPTS = 4
34+
GH_RETRY_DELAY_SECONDS = 1.5
3335
PER_THREAD_TIMEOUT = 180
3436
PR_COMMENT_WINDOW = 20
3537
MAX_BODY_CHARS = 1200
@@ -89,20 +91,49 @@
8991
# ---------------------------------------------------------------- gh helpers
9092

9193

94+
class TransientGhError(RuntimeError):
95+
pass
96+
97+
98+
def is_retryable_gh_error(stderr: str) -> bool:
99+
text = stderr.lower()
100+
return (
101+
"http 5" in text
102+
or "gateway timeout" in text
103+
or "timeout" in text
104+
or "temporarily unavailable" in text
105+
or "connection reset" in text
106+
or "connection refused" in text
107+
)
108+
109+
110+
def gh_retry_delay(attempt: int) -> None:
111+
time.sleep(GH_RETRY_DELAY_SECONDS * (attempt + 1))
112+
113+
92114
def run_gh_json(cmd: list[str], token: str | None = None) -> Any:
93115
env = {**os.environ, "GH_TOKEN": token} if token else None
94-
proc = subprocess.run(
95-
cmd,
96-
capture_output=True,
97-
text=True,
98-
check=False,
99-
encoding="utf-8",
100-
errors="replace",
101-
env=env,
102-
)
103-
if proc.returncode != 0:
104-
raise RuntimeError(f"{' '.join(cmd)} failed: {proc.stderr.strip()}")
105-
return json.loads(proc.stdout or "null")
116+
last_stderr = ""
117+
for attempt in range(GH_RETRY_ATTEMPTS):
118+
proc = subprocess.run(
119+
cmd,
120+
capture_output=True,
121+
text=True,
122+
check=False,
123+
encoding="utf-8",
124+
errors="replace",
125+
env=env,
126+
)
127+
if proc.returncode == 0:
128+
return json.loads(proc.stdout or "null")
129+
last_stderr = proc.stderr.strip()
130+
if attempt == GH_RETRY_ATTEMPTS - 1 or not is_retryable_gh_error(last_stderr):
131+
break
132+
gh_retry_delay(attempt)
133+
message = f"{' '.join(cmd)} failed: {last_stderr}"
134+
if is_retryable_gh_error(last_stderr):
135+
raise TransientGhError(message)
136+
raise RuntimeError(message)
106137

107138

108139
def gh_api(path: str, paginate: bool = False, token: str | None = None) -> Any:
@@ -140,7 +171,8 @@ def gh_pr_view(repo: str, number: int) -> dict[str, Any]:
140171
"headRefOid", "body",
141172
])
142173
last: dict[str, Any] = {}
143-
for attempt in range(4):
174+
last_stderr = ""
175+
for attempt in range(GH_RETRY_ATTEMPTS):
144176
proc = subprocess.run(
145177
["gh", "pr", "view", str(number), "--repo", repo, "--json", fields],
146178
capture_output=True,
@@ -150,12 +182,19 @@ def gh_pr_view(repo: str, number: int) -> dict[str, Any]:
150182
errors="replace",
151183
)
152184
if proc.returncode != 0:
153-
raise RuntimeError(f"gh pr view {number} failed: {proc.stderr.strip()}")
185+
last_stderr = proc.stderr.strip()
186+
if attempt == GH_RETRY_ATTEMPTS - 1 or not is_retryable_gh_error(last_stderr):
187+
message = f"gh pr view {number} failed: {last_stderr}"
188+
if is_retryable_gh_error(last_stderr):
189+
raise TransientGhError(message)
190+
raise RuntimeError(message)
191+
gh_retry_delay(attempt)
192+
continue
154193
last = json.loads(proc.stdout or "{}")
155194
if last.get("mergeable") not in (None, "", "UNKNOWN"):
156195
return last
157-
if attempt < 3:
158-
time.sleep(1.5)
196+
if attempt < GH_RETRY_ATTEMPTS - 1:
197+
gh_retry_delay(attempt)
159198
return last
160199

161200

@@ -786,9 +825,10 @@ def classify_threads(
786825
"approver": "Waiting on approvers",
787826
"author": "Waiting on authors",
788827
"external": "Waiting on external",
828+
"transient-failure": "Transient GitHub failure retrieving PR data",
789829
"unknown": "Unknown",
790830
}
791-
SIDE_ORDER = ["maintainer", "approver", "author", "external", "unknown"]
831+
SIDE_ORDER = ["maintainer", "approver", "author", "external", "transient-failure", "unknown"]
792832

793833

794834
def action_counts(classifications: list[dict[str, Any]]) -> dict[str, int]:
@@ -862,6 +902,8 @@ def render_workflow_failure_section(issues: list[dict[str, Any]]) -> list[str]:
862902

863903

864904
def ci_cell(facts: dict[str, Any]) -> str:
905+
if "ci_failing_count" not in facts and "ci_pending_count" not in facts:
906+
return "?"
865907
if facts.get("ci_failing_count", 0) > 0:
866908
return "❌"
867909
if facts.get("ci_pending_count", 0) > 0:
@@ -991,6 +1033,16 @@ def build_pr_result(
9911033
"classifications": classifications,
9921034
"side": side,
9931035
}
1036+
except TransientGhError as e:
1037+
return {
1038+
"pr": number,
1039+
"returncode": -1,
1040+
"facts": {},
1041+
"threads": [],
1042+
"classifications": [],
1043+
"side": "transient-failure",
1044+
"raw_stderr": repr(e),
1045+
}
9941046
except Exception as e:
9951047
return {
9961048
"pr": number,

0 commit comments

Comments
 (0)