-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathgraphql.py
More file actions
177 lines (159 loc) · 5.31 KB
/
Copy pathgraphql.py
File metadata and controls
177 lines (159 loc) · 5.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import os
import requests
from datetime import timedelta
import numpy as np
import pandas as pd
from IPython.display import display
from ipywidgets import widgets
comments_query = """\
comments(last: 100) {
edges {
node {
authorAssociation
createdAt
updatedAt
url
author {
login
}
}
}
}
"""
base_elements = """\
state
id
title
url
createdAt
updatedAt
closedAt
labels(first: 10) {
edges {
node {
name
}
}
}
number
authorAssociation
author {
login
}
"""
gql_template = """\
{{
search({query}) {{
issueCount
pageInfo {{
endCursor
hasNextPage
}}
nodes {{
... on PullRequest {{
{base_elements}
mergedBy {{
login
}}
mergeCommit {{
oid
}}
deletions
additions
changedFiles
{comments}
}}
... on Issue {{
{base_elements}
{comments}
}}
}}
}}
}}
"""
# Define our query object that we'll re-use for github search
class GitHubGraphQlQuery():
def __init__(self, query, display_progress=True, auth=None):
"""Run a GitHub GraphQL query and return the issue/PR data from it.
Parameters
----------
query : string
The GitHub search query to run. This is similar to whatever you'd use
to search on GitHub.com.
display_progress : bool
Whether to display a progress bar as data is fetched.
auth : string | None
An authentication token for GitHub. If None, then the environment
variable `GITHUB_ACCESS_TOKEN` will be tried.
"""
self.query = query
# Authentication
headers = {}
auth = os.environ.get('GITHUB_ACCESS_TOKEN') if auth is None else auth
if auth is not None:
headers.update({"Authorization": "Bearer %s" % auth})
self.headers = headers
self.gql_template = gql_template
self.display_progress = display_progress
def request(self, n_pages=100, n_per_page=50):
"""Make a request to the GitHub GraphQL API.
This generates an attribute `self.data` with a pandas
DataFrame of the issue / PR activity corresponding to
the query you ran.
"""
# NOTE: This main search query has a type, but the query string also has a type.
# ref ("search"): https://developer.github.com/v4/query/#connections
# Collect paginated issues
self.issues_and_or_prs = []
for ii in range(n_pages):
github_search_query = [
'first: %s' % n_per_page,
'query: "%s"' % self.query,
'type: ISSUE',
]
if ii != 0:
github_search_query.append('after: "%s"' % pageInfo['endCursor'])
ii_gql_query = self.gql_template.format(
query=', '.join(github_search_query),
comments=comments_query,
base_elements=base_elements,
)
ii_request = requests.post('https://api.github.com/graphql', json={'query': ii_gql_query}, headers=self.headers)
if ii_request.status_code != 200:
raise Exception("Query failed to run by returning code of {}. {}".format(ii_request.status_code, ii_gql_query))
if "errors" in ii_request.json().keys():
raise Exception("Query failed to run with error {}. {}".format(ii_request.json()['errors'], ii_gql_query))
self.last_request = ii_request
# Parse the response for this pagination
json = ii_request.json()['data']['search']
if ii == 0:
if json['issueCount'] == 0:
print("Found no entries for query.")
self.data = pd.DataFrame()
return
n_pages = int(np.ceil(json['issueCount'] / n_per_page))
print("Found {} items, which will take {} pages".format(json['issueCount'], n_pages))
prog = widgets.IntProgress(
value=0,
min=0,
max=n_pages,
description='Downloading:',
bar_style='',
)
if n_pages > 1 and self.display_progress:
display(prog)
# Add the JSON to the raw data list
self.issues_and_or_prs.extend(json['nodes'])
pageInfo = json['pageInfo']
self.last_query = ii_gql_query
# Update progress and should we stop?
prog.value += 1
if pageInfo['hasNextPage'] is False:
prog.bar_style = 'success'
break
# Create a dataframe of the issues and/or PRs
self.data = pd.DataFrame(self.issues_and_or_prs)
# Add some extra fields
self.data['author'] = self.data['author'].map(lambda a: a['login'] if a is not None else a)
self.data['org'] = self.data['url'].map(lambda a: a.split('/')[3])
self.data['repo'] = self.data['url'].map(lambda a: a.split('/')[4])