Skip to content

Commit ab196ff

Browse files
committed
Add installation_audit.py
1 parent 22cbb9f commit ab196ff

File tree

1 file changed

+193
-0
lines changed

1 file changed

+193
-0
lines changed

installation_audit.py

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
"""
2+
gitstream-cm[bot] Activity Auditor (PAT version)
3+
Fetches all audit log actions by gitstream-cm[bot] across orgs within a time window.
4+
5+
Requirements:
6+
pip install requests
7+
8+
Usage:
9+
python installation_audit.py linearbtest --token ghp_xxx
10+
"""
11+
12+
import requests
13+
import json
14+
import csv
15+
import logging
16+
from datetime import datetime, timezone
17+
from collections import Counter
18+
19+
# ─── CONFIG ──────────────────────────────────────────────────────────────────
20+
21+
PAT = None # passed via --token CLI argument
22+
23+
TIME_FROM = datetime(2026, 3, 30, 0, 0, 0, tzinfo=timezone.utc)
24+
TIME_TO = datetime(2026, 4, 4, 23, 59, 59, tzinfo=timezone.utc)
25+
26+
OUTPUT_CSV = "bot_activity.csv"
27+
28+
# ─── SETUP ───────────────────────────────────────────────────────────────────
29+
30+
logging.basicConfig(
31+
level=logging.INFO,
32+
format="%(asctime)s [%(levelname)s] %(message)s",
33+
handlers=[logging.StreamHandler(), logging.FileHandler("audit.log")]
34+
)
35+
log = logging.getLogger(__name__)
36+
37+
HEADERS = {
38+
"Authorization": f"Bearer {PAT}",
39+
"Accept": "application/vnd.github+json",
40+
"X-GitHub-Api-Version": "2022-11-28"
41+
}
42+
43+
# ─── HELPERS ─────────────────────────────────────────────────────────────────
44+
45+
def parse_ts(ts) -> datetime:
46+
if isinstance(ts, int): # milliseconds epoch (@timestamp field)
47+
return datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
48+
return datetime.fromisoformat(ts.replace("Z", "+00:00"))
49+
50+
# ─── CORE ────────────────────────────────────────────────────────────────────
51+
52+
def fetch_audit_log(org: str, time_from: datetime, time_to: datetime) -> tuple[list[dict], str]:
53+
"""
54+
Returns (entries, status) where status is one of:
55+
"ok" | "no_access" | "not_found" | "error"
56+
"""
57+
url = (
58+
f"https://api.github.com/orgs/{org}/audit-log"
59+
f"?phrase=actor:gitstream-cm%5Bbot%5D&include=all&per_page=100"
60+
)
61+
62+
matches = []
63+
64+
while url:
65+
resp = requests.get(url, headers=HEADERS, timeout=15)
66+
67+
if resp.status_code == 403:
68+
return [], "no_access"
69+
if resp.status_code == 404:
70+
return [], "not_found"
71+
if resp.status_code != 200:
72+
return [], f"error_{resp.status_code}"
73+
74+
entries = resp.json()
75+
if not entries:
76+
break
77+
78+
for entry in entries:
79+
ts = entry.get("@timestamp") or entry.get("created_at", 0)
80+
dt = parse_ts(ts)
81+
82+
if dt < time_from:
83+
return matches, "ok"
84+
85+
if time_from <= dt <= time_to:
86+
matches.append({
87+
"org" : org,
88+
"timestamp" : dt.isoformat(),
89+
"action" : entry.get("action"),
90+
"actor" : entry.get("actor"),
91+
"repo" : entry.get("repo"),
92+
"details" : json.dumps({
93+
k: v for k, v in entry.items()
94+
if k not in ("action", "actor", "repo", "@timestamp", "created_at")
95+
})
96+
})
97+
98+
# Next page
99+
url = next(
100+
(p.split(";")[0].strip().strip("<>")
101+
for p in resp.headers.get("Link", "").split(",")
102+
if 'rel="next"' in p),
103+
None
104+
)
105+
106+
return matches, "ok"
107+
108+
# ─── MAIN ────────────────────────────────────────────────────────────────────
109+
110+
def main():
111+
import argparse
112+
parser = argparse.ArgumentParser(description="Audit gitstream-cm[bot] actions in a GitHub org")
113+
parser.add_argument("org", help="GitHub org name (e.g. my-company)")
114+
parser.add_argument("--token", required=True, help="GitHub PAT with read:audit_log + read:org scopes")
115+
args = parser.parse_args()
116+
117+
global PAT, HEADERS
118+
PAT = args.token
119+
HEADERS = {
120+
"Authorization": f"Bearer {PAT}",
121+
"Accept": "application/vnd.github+json",
122+
"X-GitHub-Api-Version": "2022-11-28"
123+
}
124+
125+
log.info(f"Org : {args.org}")
126+
log.info(f"Window : {TIME_FROM.isoformat()}{TIME_TO.isoformat()}")
127+
128+
all_entries = []
129+
entries, status = fetch_audit_log(args.org, TIME_FROM, TIME_TO)
130+
131+
if entries:
132+
log.info(f"⚠️ {args.org}{len(entries)} actions found")
133+
all_entries.extend(entries)
134+
else:
135+
log.info(f"{args.org}{status}")
136+
137+
# ── Write CSV
138+
if all_entries:
139+
csv_fields = ["timestamp", "action", "repo", "token_type", "conclusion",
140+
"workflow_run_id", "user_agent"]
141+
csv_rows = []
142+
for e in sorted(all_entries, key=lambda x: x["timestamp"]):
143+
details = json.loads(e["details"])
144+
csv_rows.append({
145+
"timestamp": e["timestamp"],
146+
"action": e["action"],
147+
"repo": details.get("repository", e.get("repo", "")),
148+
"token_type": details.get("programmatic_access_type", ""),
149+
"conclusion": details.get("conclusion", ""),
150+
"workflow_run_id": details.get("workflow_run_id", ""),
151+
"user_agent": details.get("user_agent", ""),
152+
})
153+
with open(OUTPUT_CSV, "w", newline="") as f:
154+
writer = csv.DictWriter(f, fieldnames=csv_fields)
155+
writer.writeheader()
156+
writer.writerows(csv_rows)
157+
log.info(f"CSV written → {OUTPUT_CSV}")
158+
159+
# ── Summary
160+
log.info("\n" + "=" * 50)
161+
log.info(f"Total actions found : {len(all_entries)}")
162+
163+
if all_entries:
164+
# Group by action type
165+
action_counts = Counter(e["action"] for e in all_entries)
166+
log.info(f"\nActions performed:")
167+
for action, count in action_counts.most_common():
168+
log.info(f" {action}: {count}")
169+
170+
# Repos targeted
171+
repo_counts = Counter(e["repo"] for e in all_entries)
172+
log.info(f"\nRepos targeted:")
173+
for repo, count in repo_counts.most_common():
174+
log.info(f" {repo}: {count}")
175+
176+
# Detailed timeline — every action
177+
log.info(f"\nTimeline:")
178+
for e in sorted(all_entries, key=lambda x: x["timestamp"]):
179+
details = json.loads(e["details"])
180+
extra = ""
181+
if e["action"].startswith("git."):
182+
extra = f"| repo: {details.get('repository', e['repo'])}"
183+
elif e["action"] == "workflows.created_workflow_run":
184+
extra = f"| repo: {e['repo']} | token_type: {details.get('programmatic_access_type', '?')}"
185+
elif e["action"] == "workflows.completed_workflow_run":
186+
extra = f"| repo: {e['repo']} | conclusion: {details.get('conclusion', '?')}"
187+
log.info(f" {e['timestamp']} | {e['action']} {extra}")
188+
189+
log.info(f"\nResults → {OUTPUT_CSV}")
190+
191+
192+
if __name__ == "__main__":
193+
main()

0 commit comments

Comments
 (0)