|
| 1 | +"""Fetch and process GitHub issues + PR data for aws/agentcore-cli.""" |
| 2 | +import json |
| 3 | +import subprocess |
| 4 | +import sys |
| 5 | +from collections import Counter, defaultdict |
| 6 | +from datetime import datetime, timedelta |
| 7 | + |
| 8 | +REPO = "aws/agentcore-cli" |
| 9 | + |
| 10 | + |
| 11 | +def _gh(endpoint): |
| 12 | + r = subprocess.run(["gh", "api", "--paginate", f"/repos/{REPO}/{endpoint}"], |
| 13 | + capture_output=True, text=True, check=True) |
| 14 | + return json.loads(r.stdout) |
| 15 | + |
| 16 | + |
| 17 | +def _gh_single(endpoint): |
| 18 | + r = subprocess.run(["gh", "api", f"/repos/{REPO}/{endpoint}"], |
| 19 | + capture_output=True, text=True, check=True) |
| 20 | + return json.loads(r.stdout) |
| 21 | + |
| 22 | + |
| 23 | +def _dt(s): |
| 24 | + return datetime.fromisoformat(s.replace("Z", "+00:00")) if s else None |
| 25 | + |
| 26 | + |
| 27 | +def _fmt_h(h): |
| 28 | + if h < 1: return f"{h*60:.0f}m" |
| 29 | + if h < 24: return f"{h:.1f}h" |
| 30 | + return f"{h/24:.1f}d" |
| 31 | + |
| 32 | + |
| 33 | +def _percentiles(vals): |
| 34 | + if not vals: return {"med": 0, "avg": 0, "p90": 0} |
| 35 | + s = sorted(vals) |
| 36 | + return {"med": round(s[len(s)//2], 1), "avg": round(sum(s)/len(s), 1), "p90": round(s[int(len(s)*0.9)], 1)} |
| 37 | + |
| 38 | + |
| 39 | +# ── Issues ────────────────────────────────────────────────────────── |
| 40 | + |
| 41 | +def fetch_issues(): |
| 42 | + print("Fetching issues...") |
| 43 | + raw = _gh("issues?state=all&per_page=100") |
| 44 | + issues = [i for i in raw if "pull_request" not in i] |
| 45 | + print(f" {len(issues)} issues") |
| 46 | + return issues |
| 47 | + |
| 48 | + |
| 49 | +def compute_issues(raw): |
| 50 | + issues = [{ |
| 51 | + "number": i["number"], "title": i["title"], "state": i["state"], |
| 52 | + "created": _dt(i["created_at"]), "closed": _dt(i["closed_at"]), |
| 53 | + "labels": [l["name"] for l in i["labels"]], |
| 54 | + "assignees": [a["login"] for a in i["assignees"]], |
| 55 | + "comments": i["comments"], "reactions": i["reactions"]["total_count"], |
| 56 | + "state_reason": i.get("state_reason"), |
| 57 | + "author": i["user"]["login"], "author_type": i["author_association"], |
| 58 | + } for i in raw] |
| 59 | + issues.sort(key=lambda i: i["created"]) |
| 60 | + now = datetime.now(issues[0]["created"].tzinfo) |
| 61 | + total = len(issues) |
| 62 | + opened = sum(1 for i in issues if i["state"] == "open") |
| 63 | + labeled = sum(1 for i in issues if i["labels"]) |
| 64 | + assigned = sum(1 for i in issues if i["assignees"]) |
| 65 | + dates = [i["created"] for i in issues] |
| 66 | + span = (max(dates) - min(dates)).days |
| 67 | + res = sorted([(i["closed"] - i["created"]).total_seconds() / 3600 for i in issues if i["closed"]]) |
| 68 | + p = _percentiles(res) |
| 69 | + |
| 70 | + # Weekly timeline |
| 71 | + close_dates = [i["closed"] for i in issues if i["closed"]] |
| 72 | + start = min(dates).replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=min(dates).weekday()) |
| 73 | + weeks, cum = [], 0 |
| 74 | + cur = start |
| 75 | + while cur <= max(dates): |
| 76 | + nxt = cur + timedelta(days=7) |
| 77 | + wo = sum(1 for d in dates if cur <= d < nxt) |
| 78 | + wc = sum(1 for d in close_dates if cur <= d < nxt) |
| 79 | + cum += wo - wc |
| 80 | + weeks.append({"week": cur.strftime("%b %d"), "opened": wo, "closed": wc, "cum": cum}) |
| 81 | + cur = nxt |
| 82 | + |
| 83 | + # Labels |
| 84 | + lc = Counter() |
| 85 | + for i in issues: |
| 86 | + for l in i["labels"]: lc[l] += 1 |
| 87 | + lc["(unlabeled)"] = total - labeled |
| 88 | + label_stats = [] |
| 89 | + for label, count in lc.most_common(): |
| 90 | + sub = [i for i in issues if label in i["labels"]] if label != "(unlabeled)" else [i for i in issues if not i["labels"]] |
| 91 | + o = sum(1 for i in sub if i["state"] == "open") |
| 92 | + label_stats.append({"label": label, "count": count, "open": o, "closed": len(sub)-o, "pct": round((len(sub)-o)*100/len(sub)) if sub else 0}) |
| 93 | + |
| 94 | + # Resolution by label |
| 95 | + lt = defaultdict(list) |
| 96 | + for i in issues: |
| 97 | + if not i["closed"]: continue |
| 98 | + h = (i["closed"] - i["created"]).total_seconds() / 3600 |
| 99 | + for l in (i["labels"] or ["(unlabeled)"]): lt[l].append(h) |
| 100 | + res_by_label = [{"label": l, "n": len(t), **_percentiles(t)} for l, t in sorted(lt.items(), key=lambda x: -len(x[1]))] |
| 101 | + |
| 102 | + # Age buckets |
| 103 | + ab = {"< 1 day": 0, "1-3 days": 0, "3-7 days": 0, "1-2 weeks": 0, "2-4 weeks": 0, "1-2 months": 0, "> 2 months": 0} |
| 104 | + for i in issues: |
| 105 | + if i["state"] != "open": continue |
| 106 | + h = (now - i["created"]).total_seconds() / 3600 |
| 107 | + if h < 24: ab["< 1 day"] += 1 |
| 108 | + elif h < 72: ab["1-3 days"] += 1 |
| 109 | + elif h < 168: ab["3-7 days"] += 1 |
| 110 | + elif h < 336: ab["1-2 weeks"] += 1 |
| 111 | + elif h < 672: ab["2-4 weeks"] += 1 |
| 112 | + elif h < 1440: ab["1-2 months"] += 1 |
| 113 | + else: ab["> 2 months"] += 1 |
| 114 | + |
| 115 | + # Authors |
| 116 | + ac = Counter(i["author"] for i in issues) |
| 117 | + authors = [{"author": a, "count": c, "type": next((i["author_type"] for i in issues if i["author"] == a), "")} for a, c in ac.most_common(15)] |
| 118 | + |
| 119 | + # Engagement |
| 120 | + top_eng = [{"number": i["number"], "title": i["title"][:60], "comments": i["comments"], "reactions": i["reactions"], "state": i["state"]} for i in sorted(issues, key=lambda i: i["comments"], reverse=True)[:10]] |
| 121 | + |
| 122 | + # Stale |
| 123 | + stale = sorted([{"number": i["number"], "title": i["title"][:55], "age": (now - i["created"]).days, "labels": i["labels"]} for i in issues if i["state"] == "open" and (now - i["created"]).days > 14 and i["comments"] == 0], key=lambda x: -x["age"]) |
| 124 | + |
| 125 | + # Auto-extract common terms from unlabeled issue titles |
| 126 | + ul = [i for i in issues if not i["labels"]] |
| 127 | + stop = {"the","a","an","in","on","of","to","for","is","and","or","not","with","from","by","at","it","as","be","was","are","this","that","but","if","no","do","does","can","has","have","had","i","my","we","our","you","your","its","all","any","new","after","when","should","would","could","into","than","then","also","just","about","up","out","so","how","what","why","which","where","who","been","being","will","more","some","only","other","each","both","few","most","very","too","here","there","these","those","such","same","own","between","through","during","before","while","since","until","against","above","below","over","under","again","further","once","already","still","now","get","set","use","add","run","try","see","let","make","take","give","go","come","find","keep","put","show","tell","say","ask","work","seem","feel","leave","call","need","may","must","shall","might","done","got","went","came","made","took","gave","said","told","used","found","left","called","started","tried","ran","saw","let","known","given","taken","shown","become","gone","kept","put","brought","thought","told","sent","received","held","read","written","spoken","broken","chosen","driven","eaten","fallen","forgotten","gotten","hidden","ridden","risen","shaken","stolen","sworn","thrown","worn","woken","wound","wrung","built","burnt","dealt","dreamt","felt","hung","knelt","leant","leapt","learnt","meant","met","paid","sold","shot","slid","slung","slit","smelt","spelt","spent","spilt","spun","split","spoilt","spread","sprung","stood","stuck","stung","stunk","struck","strung","swept","swum","swung","taught","torn","trod","understood","wept","won","wound","woven","wrung"} |
| 128 | + word_counts = Counter() |
| 129 | + for i in ul: |
| 130 | + words = set(w.lower().strip("[]():#'\",.!?") for w in i["title"].split() if len(w) > 2) |
| 131 | + word_counts.update(words - stop) |
| 132 | + # Group by frequency, take top clusters |
| 133 | + common_terms = {term: count for term, count in word_counts.most_common(20) if count >= 3} |
| 134 | + |
| 135 | + # Existing labels that are defined but unused on issues |
| 136 | + all_labels_on_issues = set() |
| 137 | + for i in issues: |
| 138 | + all_labels_on_issues.update(i["labels"]) |
| 139 | + repo_labels = {"bug", "enhancement", "question", "documentation", "good first issue", "help wanted", "invalid", "duplicate", "wontfix"} |
| 140 | + unused_labels = sorted(repo_labels - all_labels_on_issues) |
| 141 | + |
| 142 | + return { |
| 143 | + "gen": now.strftime("%Y-%m-%d %H:%M UTC"), |
| 144 | + "ov": {"total": total, "open": opened, "closed": total-opened, "labeled": labeled, "unlabeled": total-labeled, "assigned": assigned, "unassigned": total-assigned, |
| 145 | + "completed": sum(1 for i in issues if i["state_reason"] == "completed"), "not_planned": sum(1 for i in issues if i["state_reason"] == "not_planned"), "dupes": sum(1 for i in issues if i["state_reason"] == "duplicate"), |
| 146 | + "start": min(dates).strftime("%b %d, %Y"), "end": max(dates).strftime("%b %d, %Y"), "span": span, "rate": round(total/max(span,1)*7, 1), |
| 147 | + "med": _fmt_h(p["med"]), "avg": _fmt_h(p["avg"]), "p90": _fmt_h(p["p90"]), |
| 148 | + "member": sum(1 for i in issues if i["author_type"] == "MEMBER"), "community": sum(1 for i in issues if i["author_type"] != "MEMBER")}, |
| 149 | + "timeline": weeks, "labels": label_stats, "res_by_label": res_by_label, |
| 150 | + "age": ab, "authors": authors, "engagement": top_eng, |
| 151 | + "zero_eng": sum(1 for i in issues if i["comments"] == 0 and i["reactions"] == 0 and i["state"] == "open"), |
| 152 | + "stale": stale, "common_terms": common_terms, "unused_labels": unused_labels, |
| 153 | + } |
| 154 | + |
| 155 | + |
| 156 | +# ── Pull Requests (GraphQL for speed) ────────────────────────────── |
| 157 | + |
| 158 | +def fetch_prs(): |
| 159 | + print("Fetching PRs with reviews (GraphQL)...") |
| 160 | + prs = [] |
| 161 | + cursor = None |
| 162 | + while True: |
| 163 | + after = f', after: "{cursor}"' if cursor else "" |
| 164 | + query = '{repository(owner:"aws",name:"agentcore-cli"){pullRequests(first:100,states:[MERGED,CLOSED,OPEN],orderBy:{field:CREATED_AT,direction:DESC}%s){pageInfo{hasNextPage endCursor}nodes{number title state createdAt mergedAt closedAt isDraft author{login}labels(first:10){nodes{name}}reviews(first:20){nodes{author{login}state submittedAt}}}}}}' % after |
| 165 | + r = subprocess.run(["gh", "api", "graphql", "-f", f"query={query}"], |
| 166 | + capture_output=True, text=True, check=True) |
| 167 | + data = json.loads(r.stdout)["data"]["repository"]["pullRequests"] |
| 168 | + prs.extend(data["nodes"]) |
| 169 | + print(f" ...{len(prs)} PRs") |
| 170 | + if not data["pageInfo"]["hasNextPage"]: |
| 171 | + break |
| 172 | + cursor = data["pageInfo"]["endCursor"] |
| 173 | + print(f" Done: {len(prs)} PRs with inline reviews") |
| 174 | + return prs |
| 175 | + |
| 176 | + |
| 177 | +def compute_prs(gql_prs): |
| 178 | + prs = [] |
| 179 | + for p in gql_prs: |
| 180 | + author = p["author"]["login"] if p["author"] else "ghost" |
| 181 | + if author == "github-actions[bot]": |
| 182 | + continue |
| 183 | + created = _dt(p["createdAt"]) |
| 184 | + merged = _dt(p["mergedAt"]) |
| 185 | + first_review = None |
| 186 | + for rv in p["reviews"]["nodes"]: |
| 187 | + t = _dt(rv.get("submittedAt")) |
| 188 | + if t and (first_review is None or t < first_review): |
| 189 | + first_review = t |
| 190 | + prs.append({ |
| 191 | + "number": p["number"], "title": p["title"], |
| 192 | + "state": "open" if p["state"] == "OPEN" else "closed", |
| 193 | + "created": created, "merged": merged, |
| 194 | + "draft": p["isDraft"], "author": author, |
| 195 | + "labels": [l["name"] for l in p["labels"]["nodes"]], |
| 196 | + "ttfr_h": round((first_review - created).total_seconds() / 3600, 1) if first_review else None, |
| 197 | + "ttm_h": round((merged - created).total_seconds() / 3600, 1) if merged else None, |
| 198 | + }) |
| 199 | + |
| 200 | + prs.sort(key=lambda p: p["created"]) |
| 201 | + now = datetime.now(prs[0]["created"].tzinfo) |
| 202 | + total = len(prs) |
| 203 | + n_merged = sum(1 for p in prs if p["merged"]) |
| 204 | + closed_no_merge = sum(1 for p in prs if p["state"] == "closed" and not p["merged"]) |
| 205 | + open_prs = sum(1 for p in prs if p["state"] == "open") |
| 206 | + drafts = sum(1 for p in prs if p["draft"] and p["state"] == "open") |
| 207 | + |
| 208 | + ttfr = [p["ttfr_h"] for p in prs if p["ttfr_h"] is not None] |
| 209 | + ttm = [p["ttm_h"] for p in prs if p["ttm_h"] is not None] |
| 210 | + no_review = sum(1 for p in prs if p["ttfr_h"] is None and p["merged"]) |
| 211 | + |
| 212 | + # Weekly timeline |
| 213 | + dates = [p["created"] for p in prs] |
| 214 | + merge_dates = [p["merged"] for p in prs if p["merged"]] |
| 215 | + start = min(dates).replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=min(dates).weekday()) |
| 216 | + weeks, cum = [], 0 |
| 217 | + cur = start |
| 218 | + while cur <= max(dates): |
| 219 | + nxt = cur + timedelta(days=7) |
| 220 | + wo = sum(1 for d in dates if cur <= d < nxt) |
| 221 | + wm = sum(1 for d in merge_dates if cur <= d < nxt) |
| 222 | + cum += wo - wm |
| 223 | + weeks.append({"week": cur.strftime("%b %d"), "opened": wo, "merged": wm, "cum": cum}) |
| 224 | + cur = nxt |
| 225 | + |
| 226 | + # Size distribution |
| 227 | + size_counts = Counter() |
| 228 | + size_ttm = defaultdict(list) |
| 229 | + for p in prs: |
| 230 | + sz = next((l for l in p["labels"] if l.startswith("size/")), "(no size label)") |
| 231 | + size_counts[sz] += 1 |
| 232 | + if p["ttm_h"] is not None: |
| 233 | + size_ttm[sz].append(p["ttm_h"]) |
| 234 | + size_stats = [{"size": s, "count": c, **_percentiles(size_ttm.get(s, []))} for s, c in size_counts.most_common()] |
| 235 | + |
| 236 | + # Authors |
| 237 | + ac = Counter(p["author"] for p in prs) |
| 238 | + top_authors = [{"author": a, "count": c} for a, c in ac.most_common(15)] |
| 239 | + |
| 240 | + # Stale |
| 241 | + stale = sorted([{"number": p["number"], "title": p["title"][:55], "age": (now - p["created"]).days, "author": p["author"], "draft": p["draft"]} for p in prs if p["state"] == "open" and (now - p["created"]).days > 7], key=lambda x: -x["age"]) |
| 242 | + |
| 243 | + tp = _percentiles(ttfr) |
| 244 | + mp = _percentiles(ttm) |
| 245 | + return { |
| 246 | + "gen": now.strftime("%Y-%m-%d %H:%M UTC"), |
| 247 | + "ov": {"total": total, "merged": n_merged, "closed_no_merge": closed_no_merge, "open": open_prs, "drafts": drafts, |
| 248 | + "merge_rate": round(n_merged*100/(n_merged+closed_no_merge)) if (n_merged+closed_no_merge) else 0, |
| 249 | + "ttfr_med": _fmt_h(tp["med"]), "ttfr_avg": _fmt_h(tp["avg"]), "ttfr_p90": _fmt_h(tp["p90"]), |
| 250 | + "ttm_med": _fmt_h(mp["med"]), "ttm_avg": _fmt_h(mp["avg"]), "ttm_p90": _fmt_h(mp["p90"]), |
| 251 | + "no_review_merged": no_review, |
| 252 | + "start": min(dates).strftime("%b %d, %Y"), "end": max(dates).strftime("%b %d, %Y"), |
| 253 | + "span": (max(dates) - min(dates)).days}, |
| 254 | + "timeline": weeks, "ttfr_raw": ttfr, "ttm_raw": ttm, |
| 255 | + "size_stats": size_stats, "top_authors": top_authors, "stale": stale, |
| 256 | + } |
0 commit comments