Skip to content

Commit 98bf1ee

Browse files
committed
Allow calling via python -m
1 parent bfaeffb commit 98bf1ee

3 files changed

Lines changed: 220 additions & 220 deletions

File tree

devstats/__init__.py

Lines changed: 0 additions & 219 deletions
Original file line numberDiff line numberDiff line change
@@ -1,219 +0,0 @@
1-
import os
2-
import requests
3-
import sys
4-
import json
5-
import click
6-
7-
try:
8-
token = os.environ["GRAPH_API_KEY"]
9-
except KeyError:
10-
print("You need to set GRAPH_API_KEY")
11-
print("But you shouldn't use this yet.")
12-
sys.exit()
13-
14-
endpoint = r"https://api.github.com/graphql"
15-
headers = {"Authorization": f"bearer {token}"}
16-
17-
18-
def load_query_from_file(fname, repo_owner="numpy", repo_name="numpy"):
19-
"""
20-
Load an 'issue' query from file and set the target repository, where
21-
the target repository has the format:
22-
23-
https://github.com/<repo_owner>/<repo_name>
24-
25-
Parameters
26-
----------
27-
fname : str
28-
Path to a text file containing a valid issue query according to the
29-
GitHub GraphQL schema.
30-
repo_owner : str
31-
Owner of target repository on GitHub. Default is 'numpy'.
32-
repo_name : str
33-
Name of target repository on GitHub. Default is 'numpy'.
34-
35-
Returns
36-
-------
37-
query : str
38-
Query loaded from file in text form suitable for ``send_query``.
39-
40-
Notes
41-
-----
42-
This function expects the query to have a specific form and will not work
43-
for general GitHub GraphQL queries. See ``examples/`` for some valid
44-
templated issue queries.
45-
"""
46-
with open(fname) as fh:
47-
query = fh.read()
48-
# Set target repo from template
49-
query = query.replace("_REPO_OWNER_", repo_owner)
50-
query = query.replace("_REPO_NAME_", repo_name)
51-
return query
52-
53-
54-
def send_query(query, query_type, cursor=None):
55-
"""
56-
Send a GraphQL query via requests.post
57-
58-
No validation is done on the query before sending. GitHub GraphQL is
59-
supported with the `cursor` argument.
60-
61-
Parameters
62-
----------
63-
query : str
64-
The GraphQL query to be sent
65-
query_type : {"issues", "pullRequests"}
66-
The object being queried according to the GitHub GraphQL schema.
67-
Currently only issues and pullRequests are supported
68-
cursor : str, optional
69-
If given, then the cursor is injected into the query to support
70-
GitHub's GraphQL pagination.
71-
72-
Returns
73-
-------
74-
dict
75-
The result of the query (json) parsed by `json.loads`
76-
77-
Notes
78-
-----
79-
This is intended mostly for internal use within `get_all_responses`.
80-
"""
81-
# TODO: Expand this, either by parsing the query type from the query
82-
# directly or manually adding more query_types to the set
83-
if query_type not in {"issues", "pullRequests"}:
84-
raise ValueError(
85-
"Only 'issues' and 'pullRequests' queries are currently supported"
86-
)
87-
# TODO: Generalize this
88-
# WARNING: The cursor injection depends on the specific structure of the
89-
# query, this is the main reason why query types are limited to issues/PRs
90-
if cursor is not None:
91-
cursor_insertion_key = query_type + "("
92-
cursor_ind = query.find(cursor_insertion_key) + len(cursor_insertion_key)
93-
query = query[:cursor_ind] + f'after:"{cursor}", ' + query[cursor_ind:]
94-
# Build request payload
95-
payload = {"query": "".join(query.split("\n"))}
96-
response = requests.post(endpoint, json=payload, headers=headers)
97-
return json.loads(response.content)
98-
99-
100-
def get_all_responses(query, query_type):
101-
"""
102-
Helper function to bypass GitHub GraphQL API node limit.
103-
"""
104-
# Get data from a single response
105-
initial_data = send_query(query, query_type)
106-
data, last_cursor, total_count = parse_single_query(initial_data, query_type)
107-
print(f"Retrieving {len(data)} out of {total_count} values...")
108-
# Continue requesting data (with pagination) until all are acquired
109-
while len(data) < total_count:
110-
rdata = send_query(query, query_type, cursor=last_cursor)
111-
pdata, last_cursor, _ = parse_single_query(rdata, query_type)
112-
data.extend(pdata)
113-
print(f"Retrieving {len(data)} out of {total_count} values...")
114-
print("Done.")
115-
return data
116-
117-
118-
def parse_single_query(data, query_type):
119-
"""
120-
Parse the data returned by `send_query`
121-
122-
.. warning::
123-
124-
Like `send_query`, the logic here depends on the specific structure
125-
of the query (e.g. it must be an issue or PR query, and must have a
126-
total count).
127-
"""
128-
try:
129-
total_count = data["data"]["repository"][query_type]["totalCount"]
130-
data = data["data"]["repository"][query_type]["edges"]
131-
last_cursor = data[-1]["cursor"]
132-
except KeyError as e:
133-
print(data)
134-
raise e
135-
return data, last_cursor, total_count
136-
137-
138-
class GithubGrabber:
139-
"""
140-
Pull down data via the GitHub APIv.4 given a valid GraphQL query.
141-
"""
142-
143-
def __init__(self, query_fname, query_type, repo_owner="numpy", repo_name="numpy"):
144-
"""
145-
Create an object to send/recv queries related to the issue tracker
146-
for the given repository via the GitHub API v.4.
147-
148-
The repository to query against is given by:
149-
https://github.com/<repo_owner>/<repo_name>
150-
151-
Parameters
152-
----------
153-
query_fname : str
154-
Path to a valid GraphQL query conforming to the GitHub GraphQL
155-
schema
156-
query_type : {"issues", "pullRequests"}
157-
Type of object that is being queried according to the GitHub GraphQL
158-
schema. Currently only "issues" and "pullRequests" are supported.
159-
repo_owner : str
160-
Repository owner. Default is "numpy"
161-
repo_name : str
162-
Repository name. Default is "numpy"
163-
"""
164-
self.query_fname = query_fname
165-
self.query_type = query_type # TODO: Parse this directly from query
166-
self.repo_owner = repo_owner
167-
self.repo_name = repo_name
168-
self.raw_data = None
169-
self.load_query()
170-
171-
def load_query(self):
172-
self.query = load_query_from_file(
173-
self.query_fname, self.repo_owner, self.repo_name
174-
)
175-
176-
def get(self):
177-
"""
178-
Get JSON-formatted raw data from the query.
179-
"""
180-
self.raw_data = get_all_responses(self.query, self.query_type)
181-
182-
def dump(self, outfile):
183-
"""
184-
Dump raw json to `outfile`.
185-
"""
186-
if not self.raw_data:
187-
raise ValueError("raw_data is currently empty, nothing to dump")
188-
189-
with open(outfile, "w") as outf:
190-
json.dump(self.raw_data, outf)
191-
192-
193-
@click.command()
194-
@click.argument("repo_owner")
195-
@click.argument("repo_name")
196-
def main(repo_owner, repo_name):
197-
"""Download and save issue and pr data for `repo_owner`/`repo_name`."""
198-
# Download issue data
199-
issues = GithubGrabber(
200-
"query_examples/issue_activity_since_date.gql",
201-
"issues",
202-
repo_owner=repo_owner,
203-
repo_name=repo_name,
204-
)
205-
issues.get()
206-
issues.dump(f"{repo_name}_issues.json")
207-
# Download PR data
208-
prs = GithubGrabber(
209-
"query_examples/pr_data_query.gql",
210-
"pullRequests",
211-
repo_owner=repo_owner,
212-
repo_name=repo_name,
213-
)
214-
prs.get()
215-
prs.dump(f"{repo_name}_prs.json")
216-
217-
218-
if __name__ == "__main__":
219-
main()

0 commit comments

Comments
 (0)