Skip to content

Commit fef3f25

Browse files
committed
Re-worked activitycommits script.
Minimizes "dead air" wait times and prioritize collecting available data. Should reduce update runtimes.
1 parent e1d7bde commit fef3f25

1 file changed

Lines changed: 64 additions & 26 deletions

File tree

_visualize/scripts/get_repos_activitycommits.py

Lines changed: 64 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from os import environ as env
33
import re
44
from datetime import datetime, timezone
5+
import time
56

67
ghDataDir = env.get("GITHUB_DATA", "../github-data")
78
datfilepath = "%s/intRepos_ActivityCommits.json" % ghDataDir
@@ -20,40 +21,77 @@
2021
dataCollector.data = {"data": {}}
2122

2223
# Initialize query manager
23-
queryMan = qm.GitHubQueryManager(maxRetry=10, retryDelay=2)
24+
queryMan = qm.GitHubQueryManager(maxRetry=3, retryDelay=2)
25+
26+
""" Unique handling for queries with especially slow response times.
27+
Prioritizes successful collection from at many repos as possible by moving
28+
on and coming back to repos that we're still waiting on,
29+
(rather than awaiting one at a time).
30+
Also allows for graceful termination of the script when exceeding a given
31+
time limit, preserving any successfully collected data."""
32+
# Set maximum loop count (like maxRetry, but for full list, not per-query)
33+
maxLoops = 5
34+
# Set execution time limit (can use `None` to remove limit)
35+
maxRuntime = 5.5 * 60 * 60 # 5.5 hrs as seconds (suited to GitHub job limits)
36+
# Counters
37+
endTime = None if maxRuntime is None else time.monotonic() + maxRuntime
38+
loopCount = 0
2439

2540
# Iterate through internal repos
2641
print("Gathering data across multiple queries...")
27-
for repo in repolist:
28-
print("\n'%s'" % (repo))
42+
while (endTime is None or time.monotonic() < endTime) and (loopCount < maxLoops):
43+
loopCount += 1
44+
print("\nPass %s (max %s)" % (loopCount, maxLoops))
45+
46+
for repo in repolist:
47+
48+
# Stop iteration if time limit exceeded
49+
if endTime is not None and time.monotonic() >= endTime:
50+
print("\nWarning: Script time limit reached.")
51+
print(
52+
"Runtime exceeded %s seconds during Pass %s of %s"
53+
% (maxRuntime, loopCount, maxLoops)
54+
)
55+
break
56+
57+
print("\n'%s'" % (repo))
2958

30-
r = repo.split("/")
59+
# Only check repos that weren't recorded in previous loops.
60+
if "data" in dataCollector.data.keys() and repo in dataCollector.data["data"]:
61+
print("Already recorded data for '%s'" % (repo))
62+
continue
3163

32-
gitquery = re.sub("OWNNAME", r[0], query_in)
33-
gitquery = re.sub("REPONAME", r[1], gitquery)
64+
r = repo.split("/")
3465

35-
try:
36-
outObj = queryMan.queryGitHub(gitquery, rest=True)
37-
except Exception as error:
38-
print("Warning: Could not complete '%s'" % (repo))
39-
print(error)
40-
continue
66+
gitquery = re.sub("OWNNAME", r[0], query_in)
67+
gitquery = re.sub("REPONAME", r[1], gitquery)
4168

42-
for item in outObj:
43-
# Remove per-day data, keep only weekly totals
4469
try:
45-
del item["days"]
46-
except KeyError:
47-
pass
48-
# Convert unix timestamps into standard dates (rounded to nearest week to improve aggregate data)
49-
weekinfo = datetime.fromtimestamp(item["week"], tz=timezone.utc).isocalendar()
50-
weekstring = str(weekinfo[0]) + "-W" + str(weekinfo[1]) + "-1"
51-
item["week"] = datetime.strptime(weekstring, "%Y-W%W-%w").strftime("%Y-%m-%d")
52-
53-
# Update collective data
54-
dataCollector.data["data"][repo] = outObj
55-
56-
print("'%s' Done!" % (repo))
70+
outObj = queryMan.queryGitHub(gitquery, rest=True)
71+
except Exception as error:
72+
print("Warning: Could not complete '%s'" % (repo))
73+
print(error)
74+
continue
75+
76+
for item in outObj:
77+
# Remove per-day data, keep only weekly totals
78+
try:
79+
del item["days"]
80+
except KeyError:
81+
pass
82+
# Convert unix timestamps into standard dates (rounded to nearest week to improve aggregate data)
83+
weekinfo = datetime.fromtimestamp(
84+
item["week"], tz=timezone.utc
85+
).isocalendar()
86+
weekstring = str(weekinfo[0]) + "-W" + str(weekinfo[1]) + "-1"
87+
item["week"] = datetime.strptime(weekstring, "%Y-W%W-%w").strftime(
88+
"%Y-%m-%d"
89+
)
90+
91+
# Update collective data
92+
dataCollector.data["data"][repo] = outObj
93+
94+
print("'%s' Done!" % (repo))
5795

5896
print("\nCollective data gathering complete!")
5997

0 commit comments

Comments
 (0)