|
31 | 31 | WINDOW_DAYS = 7 |
32 | 32 | MIN_FLAKY = 5 |
33 | 33 | RECENT_MODIFY_DAYS = 7 |
| 34 | +FLAKY_OUTCOMES = ("flaky", "failed") |
| 35 | +MAX_HISTORY_SPLIT_DEPTH = 12 |
| 36 | +MIN_HISTORY_WINDOW_MS = 60 * 1000 |
34 | 37 |
|
35 | 38 |
|
36 | 39 | def http_get_json(url, *, timeout=60): |
@@ -99,6 +102,16 @@ def flaky_day_buckets(history): |
99 | 102 | return buckets |
100 | 103 |
|
101 | 104 |
|
| 105 | +def flaky_result_count(history): |
| 106 | + points = ((history.get("outcomeTrend") or {}).get("dataPoints")) or [] |
| 107 | + total = 0 |
| 108 | + for p in points: |
| 109 | + dist = p.get("outcomeDistribution") or {} |
| 110 | + total += sum(int(dist.get(outcome) or 0) |
| 111 | + for outcome in FLAKY_OUTCOMES) |
| 112 | + return total |
| 113 | + |
| 114 | + |
102 | 115 | def _all_source_files(): |
103 | 116 | """Tracked .java/.groovy/.kt files, posix-relative to WORKSPACE_ROOT.""" |
104 | 117 | out = subprocess.check_output( |
@@ -148,33 +161,82 @@ def best_failure_sample(history): |
148 | 161 | return "", "" |
149 | 162 |
|
150 | 163 |
|
151 | | -def collect_flaky_scans(history, *, base, limit): |
| 164 | +def result_scan(result, *, base): |
| 165 | + bid = result.get("buildId") or "" |
| 166 | + msg = result.get("firstFailureMessage") or "" |
| 167 | + return { |
| 168 | + "build_id": bid, |
| 169 | + "scan_url": f"{base}/s/{bid}", |
| 170 | + "outcome": result.get("outcome") or "", |
| 171 | + "timestamp_ms": result.get("startTimestamp") or 0, |
| 172 | + "tags": result.get("tags") or [], |
| 173 | + "work_unit": result.get("workUnitName") or "", |
| 174 | + "failure_excerpt": (msg[:600] + (" \u2026" if len(msg) > 600 else "")) |
| 175 | + if msg else "", |
| 176 | + } |
| 177 | + |
| 178 | + |
| 179 | +def collect_flaky_scans(history, *, base, limit, seen=None): |
152 | 180 | """Up to ``limit`` recent scans where the test failed or flaked.""" |
153 | 181 | out = [] |
154 | | - seen = set() |
| 182 | + if seen is None: |
| 183 | + seen = set() |
155 | 184 | for r in (history.get("testResults") or []): |
156 | | - if r.get("outcome") not in ("flaky", "failed"): |
| 185 | + if r.get("outcome") not in FLAKY_OUTCOMES: |
157 | 186 | continue |
158 | 187 | bid = r.get("buildId") or "" |
159 | 188 | if not bid or bid in seen: |
160 | 189 | continue |
161 | 190 | seen.add(bid) |
162 | | - msg = r.get("firstFailureMessage") or "" |
163 | | - out.append({ |
164 | | - "build_id": bid, |
165 | | - "scan_url": f"{base}/s/{bid}", |
166 | | - "outcome": r.get("outcome") or "", |
167 | | - "timestamp_ms": r.get("startTimestamp") or 0, |
168 | | - "tags": r.get("tags") or [], |
169 | | - "work_unit": r.get("workUnitName") or "", |
170 | | - "failure_excerpt": (msg[:600] + (" \u2026" if len(msg) > 600 else "")) |
171 | | - if msg else "", |
172 | | - }) |
| 191 | + out.append(result_scan(r, base=base)) |
173 | 192 | if len(out) >= limit: |
174 | 193 | break |
175 | 194 | return out |
176 | 195 |
|
177 | 196 |
|
| 197 | +def collect_flaky_scans_by_time(fetch_history, *, base, since_ms, until_ms, |
| 198 | + limit, seen, depth=0): |
| 199 | + """Find flaky/failed scans in busy history windows. |
| 200 | +
|
| 201 | + The dashboard history endpoint caps ``testResults`` at 50 recent entries. |
| 202 | + For high-volume tests, a day with flaky results can still return only |
| 203 | + passed rows. Use the trend counts to split that window until the failed or |
| 204 | + flaky rows are visible. |
| 205 | + """ |
| 206 | + if limit <= 0: |
| 207 | + return "", "", [] |
| 208 | + |
| 209 | + history = fetch_history(since_ms, until_ms) |
| 210 | + sample_build, sample_failure = best_failure_sample(history) |
| 211 | + scans = collect_flaky_scans(history, base=base, limit=limit, seen=seen) |
| 212 | + if scans or flaky_result_count(history) == 0: |
| 213 | + return sample_build, sample_failure, scans |
| 214 | + |
| 215 | + if (depth >= MAX_HISTORY_SPLIT_DEPTH |
| 216 | + or until_ms - since_ms <= MIN_HISTORY_WINDOW_MS): |
| 217 | + return sample_build, sample_failure, scans |
| 218 | + |
| 219 | + mid_ms = (since_ms + until_ms) // 2 |
| 220 | + right_build, right_failure, right_scans = collect_flaky_scans_by_time( |
| 221 | + fetch_history, base=base, since_ms=mid_ms + 1, until_ms=until_ms, |
| 222 | + limit=limit, seen=seen, depth=depth + 1, |
| 223 | + ) |
| 224 | + scans.extend(right_scans) |
| 225 | + if not sample_failure: |
| 226 | + sample_build, sample_failure = right_build, right_failure |
| 227 | + |
| 228 | + if len(scans) < limit: |
| 229 | + left_build, left_failure, left_scans = collect_flaky_scans_by_time( |
| 230 | + fetch_history, base=base, since_ms=since_ms, until_ms=mid_ms, |
| 231 | + limit=limit - len(scans), seen=seen, depth=depth + 1, |
| 232 | + ) |
| 233 | + scans.extend(left_scans) |
| 234 | + if not sample_failure: |
| 235 | + sample_build, sample_failure = left_build, left_failure |
| 236 | + |
| 237 | + return sample_build, sample_failure, scans |
| 238 | + |
| 239 | + |
178 | 240 | def per_day_flake_breakdown(history): |
179 | 241 | points = ((history.get("outcomeTrend") or {}).get("dataPoints")) or [] |
180 | 242 | rows = [] |
@@ -268,28 +330,30 @@ def main(): |
268 | 330 | since_ms=since_ms, until_ms=until_ms, |
269 | 331 | ) |
270 | 332 | sample_build, sample_failure = best_failure_sample(history) |
271 | | - recent_scans = collect_flaky_scans(history, base=base, limit=5) |
| 333 | + seen_scans = set() |
| 334 | + recent_scans = collect_flaky_scans( |
| 335 | + history, base=base, limit=5, seen=seen_scans) |
272 | 336 | # The window-wide /tests-data/test-history response truncates |
273 | 337 | # results and tends to omit the flaky/failed entries. Re-query |
274 | | - # narrowed to each day-bucket that had a flaky execution to fill |
275 | | - # in the failure sample and recent-scan list. |
| 338 | + # narrowed to each day-bucket that had a flaky execution. Busy |
| 339 | + # tests can still return only passed rows for an entire day, so |
| 340 | + # recursively split those windows until the flaky rows are visible. |
276 | 341 | if not sample_failure or not recent_scans: |
277 | | - for s_ms, e_ms in flaky_day_buckets(history): |
278 | | - day_history = fetch_test_history( |
| 342 | + def fetch_history_window(since_ms, until_ms): |
| 343 | + return fetch_test_history( |
279 | 344 | base, container=cname, test_name=chosen["name"], |
280 | | - since_ms=s_ms, until_ms=e_ms, |
| 345 | + since_ms=since_ms, until_ms=until_ms, |
281 | 346 | ) |
282 | | - if not sample_failure: |
283 | | - sample_build, sample_failure = best_failure_sample( |
284 | | - day_history) |
285 | | - day_scans = collect_flaky_scans( |
286 | | - day_history, base=base, limit=5 - len(recent_scans), |
| 347 | + |
| 348 | + for s_ms, e_ms in flaky_day_buckets(history): |
| 349 | + day_build, day_failure, day_scans = collect_flaky_scans_by_time( |
| 350 | + fetch_history_window, base=base, since_ms=s_ms, |
| 351 | + until_ms=e_ms, limit=5 - len(recent_scans), |
| 352 | + seen=seen_scans, |
287 | 353 | ) |
288 | | - seen = {s["build_id"] for s in recent_scans} |
289 | | - for s in day_scans: |
290 | | - if s["build_id"] not in seen: |
291 | | - recent_scans.append(s) |
292 | | - seen.add(s["build_id"]) |
| 354 | + if not sample_failure: |
| 355 | + sample_build, sample_failure = day_build, day_failure |
| 356 | + recent_scans.extend(day_scans) |
293 | 357 | if sample_failure and len(recent_scans) >= 5: |
294 | 358 | break |
295 | 359 |
|
|
0 commit comments