Skip to content

Commit 47b984b

Browse files
committed
rename: compute.ts → transform.ts
1 parent d099d24 commit 47b984b

11 files changed

Lines changed: 3552 additions & 1 deletion

File tree

.github/dashboard/data.py

Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
"""Fetch and process GitHub issues + PR data for aws/agentcore-cli."""
2+
import json
3+
import subprocess
4+
import sys
5+
from collections import Counter, defaultdict
6+
from datetime import datetime, timedelta
7+
8+
REPO = "aws/agentcore-cli"
9+
10+
11+
def _gh(endpoint):
12+
r = subprocess.run(["gh", "api", "--paginate", f"/repos/{REPO}/{endpoint}"],
13+
capture_output=True, text=True, check=True)
14+
return json.loads(r.stdout)
15+
16+
17+
def _gh_single(endpoint):
18+
r = subprocess.run(["gh", "api", f"/repos/{REPO}/{endpoint}"],
19+
capture_output=True, text=True, check=True)
20+
return json.loads(r.stdout)
21+
22+
23+
def _dt(s):
24+
return datetime.fromisoformat(s.replace("Z", "+00:00")) if s else None
25+
26+
27+
def _fmt_h(h):
28+
if h < 1: return f"{h*60:.0f}m"
29+
if h < 24: return f"{h:.1f}h"
30+
return f"{h/24:.1f}d"
31+
32+
33+
def _percentiles(vals):
34+
if not vals: return {"med": 0, "avg": 0, "p90": 0}
35+
s = sorted(vals)
36+
return {"med": round(s[len(s)//2], 1), "avg": round(sum(s)/len(s), 1), "p90": round(s[int(len(s)*0.9)], 1)}
37+
38+
39+
# ── Issues ──────────────────────────────────────────────────────────
40+
41+
def fetch_issues():
42+
print("Fetching issues...")
43+
raw = _gh("issues?state=all&per_page=100")
44+
issues = [i for i in raw if "pull_request" not in i]
45+
print(f" {len(issues)} issues")
46+
return issues
47+
48+
49+
def compute_issues(raw):
50+
issues = [{
51+
"number": i["number"], "title": i["title"], "state": i["state"],
52+
"created": _dt(i["created_at"]), "closed": _dt(i["closed_at"]),
53+
"labels": [l["name"] for l in i["labels"]],
54+
"assignees": [a["login"] for a in i["assignees"]],
55+
"comments": i["comments"], "reactions": i["reactions"]["total_count"],
56+
"state_reason": i.get("state_reason"),
57+
"author": i["user"]["login"], "author_type": i["author_association"],
58+
} for i in raw]
59+
issues.sort(key=lambda i: i["created"])
60+
now = datetime.now(issues[0]["created"].tzinfo)
61+
total = len(issues)
62+
opened = sum(1 for i in issues if i["state"] == "open")
63+
labeled = sum(1 for i in issues if i["labels"])
64+
assigned = sum(1 for i in issues if i["assignees"])
65+
dates = [i["created"] for i in issues]
66+
span = (max(dates) - min(dates)).days
67+
res = sorted([(i["closed"] - i["created"]).total_seconds() / 3600 for i in issues if i["closed"]])
68+
p = _percentiles(res)
69+
70+
# Weekly timeline
71+
close_dates = [i["closed"] for i in issues if i["closed"]]
72+
start = min(dates).replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=min(dates).weekday())
73+
weeks, cum = [], 0
74+
cur = start
75+
while cur <= max(dates):
76+
nxt = cur + timedelta(days=7)
77+
wo = sum(1 for d in dates if cur <= d < nxt)
78+
wc = sum(1 for d in close_dates if cur <= d < nxt)
79+
cum += wo - wc
80+
weeks.append({"week": cur.strftime("%b %d"), "opened": wo, "closed": wc, "cum": cum})
81+
cur = nxt
82+
83+
# Labels
84+
lc = Counter()
85+
for i in issues:
86+
for l in i["labels"]: lc[l] += 1
87+
lc["(unlabeled)"] = total - labeled
88+
label_stats = []
89+
for label, count in lc.most_common():
90+
sub = [i for i in issues if label in i["labels"]] if label != "(unlabeled)" else [i for i in issues if not i["labels"]]
91+
o = sum(1 for i in sub if i["state"] == "open")
92+
label_stats.append({"label": label, "count": count, "open": o, "closed": len(sub)-o, "pct": round((len(sub)-o)*100/len(sub)) if sub else 0})
93+
94+
# Resolution by label
95+
lt = defaultdict(list)
96+
for i in issues:
97+
if not i["closed"]: continue
98+
h = (i["closed"] - i["created"]).total_seconds() / 3600
99+
for l in (i["labels"] or ["(unlabeled)"]): lt[l].append(h)
100+
res_by_label = [{"label": l, "n": len(t), **_percentiles(t)} for l, t in sorted(lt.items(), key=lambda x: -len(x[1]))]
101+
102+
# Age buckets
103+
ab = {"< 1 day": 0, "1-3 days": 0, "3-7 days": 0, "1-2 weeks": 0, "2-4 weeks": 0, "1-2 months": 0, "> 2 months": 0}
104+
for i in issues:
105+
if i["state"] != "open": continue
106+
h = (now - i["created"]).total_seconds() / 3600
107+
if h < 24: ab["< 1 day"] += 1
108+
elif h < 72: ab["1-3 days"] += 1
109+
elif h < 168: ab["3-7 days"] += 1
110+
elif h < 336: ab["1-2 weeks"] += 1
111+
elif h < 672: ab["2-4 weeks"] += 1
112+
elif h < 1440: ab["1-2 months"] += 1
113+
else: ab["> 2 months"] += 1
114+
115+
# Authors
116+
ac = Counter(i["author"] for i in issues)
117+
authors = [{"author": a, "count": c, "type": next((i["author_type"] for i in issues if i["author"] == a), "")} for a, c in ac.most_common(15)]
118+
119+
# Engagement
120+
top_eng = [{"number": i["number"], "title": i["title"][:60], "comments": i["comments"], "reactions": i["reactions"], "state": i["state"]} for i in sorted(issues, key=lambda i: i["comments"], reverse=True)[:10]]
121+
122+
# Stale
123+
stale = sorted([{"number": i["number"], "title": i["title"][:55], "age": (now - i["created"]).days, "labels": i["labels"]} for i in issues if i["state"] == "open" and (now - i["created"]).days > 14 and i["comments"] == 0], key=lambda x: -x["age"])
124+
125+
# Auto-extract common terms from unlabeled issue titles
126+
ul = [i for i in issues if not i["labels"]]
127+
stop = {"the","a","an","in","on","of","to","for","is","and","or","not","with","from","by","at","it","as","be","was","are","this","that","but","if","no","do","does","can","has","have","had","i","my","we","our","you","your","its","all","any","new","after","when","should","would","could","into","than","then","also","just","about","up","out","so","how","what","why","which","where","who","been","being","will","more","some","only","other","each","both","few","most","very","too","here","there","these","those","such","same","own","between","through","during","before","while","since","until","against","above","below","over","under","again","further","once","already","still","now","get","set","use","add","run","try","see","let","make","take","give","go","come","find","keep","put","show","tell","say","ask","work","seem","feel","leave","call","need","may","must","shall","might","done","got","went","came","made","took","gave","said","told","used","found","left","called","started","tried","ran","saw","let","known","given","taken","shown","become","gone","kept","put","brought","thought","told","sent","received","held","read","written","spoken","broken","chosen","driven","eaten","fallen","forgotten","gotten","hidden","ridden","risen","shaken","stolen","sworn","thrown","worn","woken","wound","wrung","built","burnt","dealt","dreamt","felt","hung","knelt","leant","leapt","learnt","meant","met","paid","sold","shot","slid","slung","slit","smelt","spelt","spent","spilt","spun","split","spoilt","spread","sprung","stood","stuck","stung","stunk","struck","strung","swept","swum","swung","taught","torn","trod","understood","wept","won","wound","woven","wrung"}
128+
word_counts = Counter()
129+
for i in ul:
130+
words = set(w.lower().strip("[]():#'\",.!?") for w in i["title"].split() if len(w) > 2)
131+
word_counts.update(words - stop)
132+
# Group by frequency, take top clusters
133+
common_terms = {term: count for term, count in word_counts.most_common(20) if count >= 3}
134+
135+
# Existing labels that are defined but unused on issues
136+
all_labels_on_issues = set()
137+
for i in issues:
138+
all_labels_on_issues.update(i["labels"])
139+
repo_labels = {"bug", "enhancement", "question", "documentation", "good first issue", "help wanted", "invalid", "duplicate", "wontfix"}
140+
unused_labels = sorted(repo_labels - all_labels_on_issues)
141+
142+
return {
143+
"gen": now.strftime("%Y-%m-%d %H:%M UTC"),
144+
"ov": {"total": total, "open": opened, "closed": total-opened, "labeled": labeled, "unlabeled": total-labeled, "assigned": assigned, "unassigned": total-assigned,
145+
"completed": sum(1 for i in issues if i["state_reason"] == "completed"), "not_planned": sum(1 for i in issues if i["state_reason"] == "not_planned"), "dupes": sum(1 for i in issues if i["state_reason"] == "duplicate"),
146+
"start": min(dates).strftime("%b %d, %Y"), "end": max(dates).strftime("%b %d, %Y"), "span": span, "rate": round(total/max(span,1)*7, 1),
147+
"med": _fmt_h(p["med"]), "avg": _fmt_h(p["avg"]), "p90": _fmt_h(p["p90"]),
148+
"member": sum(1 for i in issues if i["author_type"] == "MEMBER"), "community": sum(1 for i in issues if i["author_type"] != "MEMBER")},
149+
"timeline": weeks, "labels": label_stats, "res_by_label": res_by_label,
150+
"age": ab, "authors": authors, "engagement": top_eng,
151+
"zero_eng": sum(1 for i in issues if i["comments"] == 0 and i["reactions"] == 0 and i["state"] == "open"),
152+
"stale": stale, "common_terms": common_terms, "unused_labels": unused_labels,
153+
}
154+
155+
156+
# ── Pull Requests (GraphQL for speed) ──────────────────────────────
157+
158+
def fetch_prs():
159+
print("Fetching PRs with reviews (GraphQL)...")
160+
prs = []
161+
cursor = None
162+
while True:
163+
after = f', after: "{cursor}"' if cursor else ""
164+
query = '{repository(owner:"aws",name:"agentcore-cli"){pullRequests(first:100,states:[MERGED,CLOSED,OPEN],orderBy:{field:CREATED_AT,direction:DESC}%s){pageInfo{hasNextPage endCursor}nodes{number title state createdAt mergedAt closedAt isDraft author{login}labels(first:10){nodes{name}}reviews(first:20){nodes{author{login}state submittedAt}}}}}}' % after
165+
r = subprocess.run(["gh", "api", "graphql", "-f", f"query={query}"],
166+
capture_output=True, text=True, check=True)
167+
data = json.loads(r.stdout)["data"]["repository"]["pullRequests"]
168+
prs.extend(data["nodes"])
169+
print(f" ...{len(prs)} PRs")
170+
if not data["pageInfo"]["hasNextPage"]:
171+
break
172+
cursor = data["pageInfo"]["endCursor"]
173+
print(f" Done: {len(prs)} PRs with inline reviews")
174+
return prs
175+
176+
177+
def compute_prs(gql_prs):
178+
prs = []
179+
for p in gql_prs:
180+
author = p["author"]["login"] if p["author"] else "ghost"
181+
if author == "github-actions[bot]":
182+
continue
183+
created = _dt(p["createdAt"])
184+
merged = _dt(p["mergedAt"])
185+
first_review = None
186+
for rv in p["reviews"]["nodes"]:
187+
t = _dt(rv.get("submittedAt"))
188+
if t and (first_review is None or t < first_review):
189+
first_review = t
190+
prs.append({
191+
"number": p["number"], "title": p["title"],
192+
"state": "open" if p["state"] == "OPEN" else "closed",
193+
"created": created, "merged": merged,
194+
"draft": p["isDraft"], "author": author,
195+
"labels": [l["name"] for l in p["labels"]["nodes"]],
196+
"ttfr_h": round((first_review - created).total_seconds() / 3600, 1) if first_review else None,
197+
"ttm_h": round((merged - created).total_seconds() / 3600, 1) if merged else None,
198+
})
199+
200+
prs.sort(key=lambda p: p["created"])
201+
now = datetime.now(prs[0]["created"].tzinfo)
202+
total = len(prs)
203+
n_merged = sum(1 for p in prs if p["merged"])
204+
closed_no_merge = sum(1 for p in prs if p["state"] == "closed" and not p["merged"])
205+
open_prs = sum(1 for p in prs if p["state"] == "open")
206+
drafts = sum(1 for p in prs if p["draft"] and p["state"] == "open")
207+
208+
ttfr = [p["ttfr_h"] for p in prs if p["ttfr_h"] is not None]
209+
ttm = [p["ttm_h"] for p in prs if p["ttm_h"] is not None]
210+
no_review = sum(1 for p in prs if p["ttfr_h"] is None and p["merged"])
211+
212+
# Weekly timeline
213+
dates = [p["created"] for p in prs]
214+
merge_dates = [p["merged"] for p in prs if p["merged"]]
215+
start = min(dates).replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=min(dates).weekday())
216+
weeks, cum = [], 0
217+
cur = start
218+
while cur <= max(dates):
219+
nxt = cur + timedelta(days=7)
220+
wo = sum(1 for d in dates if cur <= d < nxt)
221+
wm = sum(1 for d in merge_dates if cur <= d < nxt)
222+
cum += wo - wm
223+
weeks.append({"week": cur.strftime("%b %d"), "opened": wo, "merged": wm, "cum": cum})
224+
cur = nxt
225+
226+
# Size distribution
227+
size_counts = Counter()
228+
size_ttm = defaultdict(list)
229+
for p in prs:
230+
sz = next((l for l in p["labels"] if l.startswith("size/")), "(no size label)")
231+
size_counts[sz] += 1
232+
if p["ttm_h"] is not None:
233+
size_ttm[sz].append(p["ttm_h"])
234+
size_stats = [{"size": s, "count": c, **_percentiles(size_ttm.get(s, []))} for s, c in size_counts.most_common()]
235+
236+
# Authors
237+
ac = Counter(p["author"] for p in prs)
238+
top_authors = [{"author": a, "count": c} for a, c in ac.most_common(15)]
239+
240+
# Stale
241+
stale = sorted([{"number": p["number"], "title": p["title"][:55], "age": (now - p["created"]).days, "author": p["author"], "draft": p["draft"]} for p in prs if p["state"] == "open" and (now - p["created"]).days > 7], key=lambda x: -x["age"])
242+
243+
tp = _percentiles(ttfr)
244+
mp = _percentiles(ttm)
245+
return {
246+
"gen": now.strftime("%Y-%m-%d %H:%M UTC"),
247+
"ov": {"total": total, "merged": n_merged, "closed_no_merge": closed_no_merge, "open": open_prs, "drafts": drafts,
248+
"merge_rate": round(n_merged*100/(n_merged+closed_no_merge)) if (n_merged+closed_no_merge) else 0,
249+
"ttfr_med": _fmt_h(tp["med"]), "ttfr_avg": _fmt_h(tp["avg"]), "ttfr_p90": _fmt_h(tp["p90"]),
250+
"ttm_med": _fmt_h(mp["med"]), "ttm_avg": _fmt_h(mp["avg"]), "ttm_p90": _fmt_h(mp["p90"]),
251+
"no_review_merged": no_review,
252+
"start": min(dates).strftime("%b %d, %Y"), "end": max(dates).strftime("%b %d, %Y"),
253+
"span": (max(dates) - min(dates)).days},
254+
"timeline": weeks, "ttfr_raw": ttfr, "ttm_raw": ttm,
255+
"size_stats": size_stats, "top_authors": top_authors, "stale": stale,
256+
}

.github/dashboard/generate.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env python3
2+
"""Generate static dashboard HTML files for GitHub Pages."""
3+
import sys
4+
from pathlib import Path
5+
6+
# Allow imports from this directory
7+
sys.path.insert(0, str(Path(__file__).parent))
8+
9+
from data import fetch_issues, compute_issues, fetch_prs, compute_prs
10+
from html_issues import build_issues_html
11+
from html_prs import build_prs_html
12+
13+
OUT = Path(__file__).parent / "site"
14+
15+
16+
def main():
17+
OUT.mkdir(exist_ok=True)
18+
19+
# Issues
20+
raw_issues = fetch_issues()
21+
issue_data = compute_issues(raw_issues)
22+
(OUT / "issues.html").write_text(build_issues_html(issue_data))
23+
print(f" → {OUT / 'issues.html'}")
24+
25+
# PRs
26+
raw_prs = fetch_prs()
27+
pr_data = compute_prs(raw_prs)
28+
(OUT / "prs.html").write_text(build_prs_html(pr_data))
29+
print(f" → {OUT / 'prs.html'}")
30+
31+
# Index redirect
32+
(OUT / "index.html").write_text(
33+
'<!DOCTYPE html><html><head><meta http-equiv="refresh" content="0;url=issues.html"></head></html>'
34+
)
35+
print(f" → {OUT / 'index.html'}")
36+
print("Done!")
37+
38+
39+
if __name__ == "__main__":
40+
main()

0 commit comments

Comments
 (0)