-
Notifications
You must be signed in to change notification settings - Fork 76
Expand file tree
/
Copy pathfetch_stats.py
More file actions
124 lines (101 loc) · 4.46 KB
/
fetch_stats.py
File metadata and controls
124 lines (101 loc) · 4.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import urllib.request
import json
import os
from datetime import datetime, timezone, timedelta
# --- CONFIGURATION ---
OWNER = "ProjectX-VJTI" # Update this!
REPO = "Xplore-workshop" # Update this!
TOKEN = os.getenv("GITHUB_TOKEN")
headers = {
"Accept": "application/vnd.github.v3+json",
"Authorization": f"Bearer {TOKEN}" if TOKEN else "",
"X-GitHub-Api-Version": "2022-11-28"
}
def fetch_paginated(endpoint):
"""Fetches all pages of a GitHub API endpoint."""
data = []
page = 1
while True:
# Determine if we need to append with ? or &
separator = "&" if "?" in endpoint else "?"
url = f"https://api.github.com/repos/{OWNER}/{REPO}/{endpoint}{separator}per_page=100&page={page}"
req = urllib.request.Request(url, headers=headers)
try:
with urllib.request.urlopen(req) as response:
page_data = json.loads(response.read().decode())
if not page_data: # If the page is empty, we've hit the end
break
data.extend(page_data)
page += 1
except Exception as e:
print(f"Error fetching {url}: {e}")
break
return data
def main():
print(f"Fetching complete commit and PR history for {OWNER}/{REPO}...")
# Set up timeframes
now = datetime.now(timezone.utc)
date_3m = now - timedelta(days=90)
date_6m = now - timedelta(days=180)
users_data = {}
def get_or_create_user(login, avatar_url):
if login not in users_data:
users_data[login] = {
"login": login,
"avatar_url": avatar_url,
"role": "contributor",
"stats": {
"all": {"commits": 0, "prsOpened": 0, "prsMerged": 0},
"6m": {"commits": 0, "prsOpened": 0, "prsMerged": 0},
"3m": {"commits": 0, "prsOpened": 0, "prsMerged": 0}
}
}
return users_data[login]
# 1. Fetch Commits manually to get exact dates
print("Processing commits...")
commits = fetch_paginated("commits")
for c in commits:
author = c.get("author")
if not author: continue # Skip commits without linked GitHub accounts
login = author.get("login")
avatar_url = author.get("avatar_url")
commit_date_str = c["commit"]["author"]["date"]
commit_date = datetime.strptime(commit_date_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
user = get_or_create_user(login, avatar_url)
user["stats"]["all"]["commits"] += 1
if commit_date >= date_6m: user["stats"]["6m"]["commits"] += 1
if commit_date >= date_3m: user["stats"]["3m"]["commits"] += 1
# 2. Fetch all Pull Requests
print("Processing pull requests...")
prs = fetch_paginated("pulls?state=all")
for pr in prs:
user_info = pr.get("user")
if not user_info: continue
login = user_info.get("login")
avatar_url = user_info.get("avatar_url")
user = get_or_create_user(login, avatar_url)
# Check roles natively via the PR association
association = pr.get("author_association", "")
if association == "OWNER" or login == OWNER:
user["role"] = "admin"
elif association in ["MEMBER", "COLLABORATOR"] and user["role"] != "admin":
user["role"] = "collaborator"
# Tally PRs by date
created_at = datetime.strptime(pr["created_at"], "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
merged_at_str = pr.get("merged_at")
user["stats"]["all"]["prsOpened"] += 1
if created_at >= date_6m: user["stats"]["6m"]["prsOpened"] += 1
if created_at >= date_3m: user["stats"]["3m"]["prsOpened"] += 1
if merged_at_str: # If merged_at exists, the PR was merged
merged_at = datetime.strptime(merged_at_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
user["stats"]["all"]["prsMerged"] += 1
if merged_at >= date_6m: user["stats"]["6m"]["prsMerged"] += 1
if merged_at >= date_3m: user["stats"]["3m"]["prsMerged"] += 1
# Convert dictionary to list
final_data = list(users_data.values())
# Save to JSON
with open("data.json", "w") as f:
json.dump(final_data, f, indent=4)
print(f"Successfully generated data.json with {len(final_data)} contributors!")
if __name__ == "__main__":
main()