Skip to content

Commit a634651

Browse files
committed
fix
1 parent 3910c5f commit a634651

1 file changed

Lines changed: 94 additions & 30 deletions

File tree

.github/scripts/flaky-test-remediation/1-select-flaky-test.py

Lines changed: 94 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
WINDOW_DAYS = 7
3232
MIN_FLAKY = 5
3333
RECENT_MODIFY_DAYS = 7
34+
FLAKY_OUTCOMES = ("flaky", "failed")
35+
MAX_HISTORY_SPLIT_DEPTH = 12
36+
MIN_HISTORY_WINDOW_MS = 60 * 1000
3437

3538

3639
def http_get_json(url, *, timeout=60):
@@ -99,6 +102,16 @@ def flaky_day_buckets(history):
99102
return buckets
100103

101104

105+
def flaky_result_count(history):
106+
points = ((history.get("outcomeTrend") or {}).get("dataPoints")) or []
107+
total = 0
108+
for p in points:
109+
dist = p.get("outcomeDistribution") or {}
110+
total += sum(int(dist.get(outcome) or 0)
111+
for outcome in FLAKY_OUTCOMES)
112+
return total
113+
114+
102115
def _all_source_files():
103116
"""Tracked .java/.groovy/.kt files, posix-relative to WORKSPACE_ROOT."""
104117
out = subprocess.check_output(
@@ -148,33 +161,82 @@ def best_failure_sample(history):
148161
return "", ""
149162

150163

151-
def collect_flaky_scans(history, *, base, limit):
164+
def result_scan(result, *, base):
165+
bid = result.get("buildId") or ""
166+
msg = result.get("firstFailureMessage") or ""
167+
return {
168+
"build_id": bid,
169+
"scan_url": f"{base}/s/{bid}",
170+
"outcome": result.get("outcome") or "",
171+
"timestamp_ms": result.get("startTimestamp") or 0,
172+
"tags": result.get("tags") or [],
173+
"work_unit": result.get("workUnitName") or "",
174+
"failure_excerpt": (msg[:600] + (" \u2026" if len(msg) > 600 else ""))
175+
if msg else "",
176+
}
177+
178+
179+
def collect_flaky_scans(history, *, base, limit, seen=None):
152180
"""Up to ``limit`` recent scans where the test failed or flaked."""
153181
out = []
154-
seen = set()
182+
if seen is None:
183+
seen = set()
155184
for r in (history.get("testResults") or []):
156-
if r.get("outcome") not in ("flaky", "failed"):
185+
if r.get("outcome") not in FLAKY_OUTCOMES:
157186
continue
158187
bid = r.get("buildId") or ""
159188
if not bid or bid in seen:
160189
continue
161190
seen.add(bid)
162-
msg = r.get("firstFailureMessage") or ""
163-
out.append({
164-
"build_id": bid,
165-
"scan_url": f"{base}/s/{bid}",
166-
"outcome": r.get("outcome") or "",
167-
"timestamp_ms": r.get("startTimestamp") or 0,
168-
"tags": r.get("tags") or [],
169-
"work_unit": r.get("workUnitName") or "",
170-
"failure_excerpt": (msg[:600] + (" \u2026" if len(msg) > 600 else ""))
171-
if msg else "",
172-
})
191+
out.append(result_scan(r, base=base))
173192
if len(out) >= limit:
174193
break
175194
return out
176195

177196

197+
def collect_flaky_scans_by_time(fetch_history, *, base, since_ms, until_ms,
198+
limit, seen, depth=0):
199+
"""Find flaky/failed scans in busy history windows.
200+
201+
The dashboard history endpoint caps ``testResults`` at 50 recent entries.
202+
For high-volume tests, a day with flaky results can still return only
203+
passed rows. Use the trend counts to split that window until the failed or
204+
flaky rows are visible.
205+
"""
206+
if limit <= 0:
207+
return "", "", []
208+
209+
history = fetch_history(since_ms, until_ms)
210+
sample_build, sample_failure = best_failure_sample(history)
211+
scans = collect_flaky_scans(history, base=base, limit=limit, seen=seen)
212+
if scans or flaky_result_count(history) == 0:
213+
return sample_build, sample_failure, scans
214+
215+
if (depth >= MAX_HISTORY_SPLIT_DEPTH
216+
or until_ms - since_ms <= MIN_HISTORY_WINDOW_MS):
217+
return sample_build, sample_failure, scans
218+
219+
mid_ms = (since_ms + until_ms) // 2
220+
right_build, right_failure, right_scans = collect_flaky_scans_by_time(
221+
fetch_history, base=base, since_ms=mid_ms + 1, until_ms=until_ms,
222+
limit=limit, seen=seen, depth=depth + 1,
223+
)
224+
scans.extend(right_scans)
225+
if not sample_failure:
226+
sample_build, sample_failure = right_build, right_failure
227+
228+
if len(scans) < limit:
229+
left_build, left_failure, left_scans = collect_flaky_scans_by_time(
230+
fetch_history, base=base, since_ms=since_ms, until_ms=mid_ms,
231+
limit=limit - len(scans), seen=seen, depth=depth + 1,
232+
)
233+
scans.extend(left_scans)
234+
if not sample_failure:
235+
sample_build, sample_failure = left_build, left_failure
236+
237+
return sample_build, sample_failure, scans
238+
239+
178240
def per_day_flake_breakdown(history):
179241
points = ((history.get("outcomeTrend") or {}).get("dataPoints")) or []
180242
rows = []
@@ -268,28 +330,30 @@ def main():
268330
since_ms=since_ms, until_ms=until_ms,
269331
)
270332
sample_build, sample_failure = best_failure_sample(history)
271-
recent_scans = collect_flaky_scans(history, base=base, limit=5)
333+
seen_scans = set()
334+
recent_scans = collect_flaky_scans(
335+
history, base=base, limit=5, seen=seen_scans)
272336
# The window-wide /tests-data/test-history response truncates
273337
# results and tends to omit the flaky/failed entries. Re-query
274-
# narrowed to each day-bucket that had a flaky execution to fill
275-
# in the failure sample and recent-scan list.
338+
# narrowed to each day-bucket that had a flaky execution. Busy
339+
# tests can still return only passed rows for an entire day, so
340+
# recursively split those windows until the flaky rows are visible.
276341
if not sample_failure or not recent_scans:
277-
for s_ms, e_ms in flaky_day_buckets(history):
278-
day_history = fetch_test_history(
342+
def fetch_history_window(since_ms, until_ms):
343+
return fetch_test_history(
279344
base, container=cname, test_name=chosen["name"],
280-
since_ms=s_ms, until_ms=e_ms,
345+
since_ms=since_ms, until_ms=until_ms,
281346
)
282-
if not sample_failure:
283-
sample_build, sample_failure = best_failure_sample(
284-
day_history)
285-
day_scans = collect_flaky_scans(
286-
day_history, base=base, limit=5 - len(recent_scans),
347+
348+
for s_ms, e_ms in flaky_day_buckets(history):
349+
day_build, day_failure, day_scans = collect_flaky_scans_by_time(
350+
fetch_history_window, base=base, since_ms=s_ms,
351+
until_ms=e_ms, limit=5 - len(recent_scans),
352+
seen=seen_scans,
287353
)
288-
seen = {s["build_id"] for s in recent_scans}
289-
for s in day_scans:
290-
if s["build_id"] not in seen:
291-
recent_scans.append(s)
292-
seen.add(s["build_id"])
354+
if not sample_failure:
355+
sample_build, sample_failure = day_build, day_failure
356+
recent_scans.extend(day_scans)
293357
if sample_failure and len(recent_scans) >= 5:
294358
break
295359

0 commit comments

Comments
 (0)