-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathcommits.py
More file actions
289 lines (238 loc) · 10.4 KB
/
commits.py
File metadata and controls
289 lines (238 loc) · 10.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
"""Management of git commits during updates..."""
from git import git, empty_tree_rev, diff_tree
from io_utils import safe_decode
from updates.mailinglists import expanded_mailing_list
from utils import debug
import re
_REVERT_COMMIT_RE = re.compile(
r"^This reverts commit (?P<hash>[0-9a-f]+)\.$",
re.M
)
"""A regex that matches the same revert lines as ``revert_regex`` does in
``contrib/gcc-changelog/git_commit.py``.
Note that this regex is matched against an entire body of a commit rather than
each line in it, though.
"""
class CommitInfo(object):
"""A git commit.
ATTRIBUTES
rev: The commit's revision (SHA1).
author_name: The author of the commit.
author_email: The email address of the author of the commit.
subject: The subject of the commit.
parent_revs: A list of revisions (SHA1s) of the parents
of this commit. The empty list if the commit has
no parent. None if this attribute has not been
computed.
pre_existing_p: True if this commit already existed in another
branch prior to the update, False otherwise. May be None,
meaning that the value of that attribute has not been
computed yet.
send_email_p: True if a commit email should be sent for
this commit, False otherwise. May be None, meaning that
the value of that attribute has not been computed yet.
"""
def __init__(self, rev, author_name, author_email, subject, parent_revs):
self.rev = rev
self.author_name = author_name
self.author_email = author_email
self.subject = subject
self.parent_revs = parent_revs
self.pre_existing_p = None
self.send_email_p = None
# A cache for the raw_revlog and the raw_revlog_lines methods.
self.__raw_revlog = None
self.__raw_revlog_lines = None
# A cache for the "email_to" method.
self.__email_to = {}
# A cache for the "all_files" method.
self.__all_files = None
# A cache for the "files_changed" method.
self.__files_changed = None
# A cache for the "added_files" method.
self.__added_files = None
def oneline_str(self):
"""A one-line string description of the commit."""
return "%s... %s" % (self.rev[:7], self.subject[:59])
@property
def full_author_email(self):
"""Return the author's full email address (name and actual address)."""
return "{self.author_name} <{self.author_email}>".format(self=self)
@property
def raw_revlog(self):
"""Return the commit's raw revlog.
This is what Git calls the commit's "raw body (unwrapped subject
and lines)".
Note that the revlog is computed lazily and then cached.
"""
if self.__raw_revlog is None:
self.__raw_revlog = git.log(
self.rev, max_count="1", pretty="format:%B", _decode=True
)
return self.__raw_revlog
@property
def raw_revlog_lines(self):
"""Return the commit's raw revlog split into lines.
This is what Git calls the commit's "raw body (unwrapped subject
and lines)".
Note that the revlog and its split into lines is computed
lazily and then cached.
"""
if self.__raw_revlog_lines is None:
self.__raw_revlog_lines = self.raw_revlog.splitlines()
return self.__raw_revlog_lines
def email_to(self, ref_name):
"""Return this commit's list of email recipients.
Returns a list of email addresses, in RFC 822 format.
PARAMETERS
ref_name: The name of the reference being updated.
Implemented as a property in order for its initialization
to be performed only when required.
"""
if ref_name not in self.__email_to:
self.__email_to[ref_name] = expanded_mailing_list(
ref_name, self.files_changed
)
return self.__email_to[ref_name]
def all_files(self):
"""Return the list of all files in the repository for this commit."""
if self.__all_files is None:
self.__all_files = self.__all_files_from_commit_rev(self.rev)
return self.__all_files
def files_changed(self):
"""Return the list of files changed by this commit (incl. new files).
Cache the result in self.__files_changed so that subsequent
calls to this method do not require calling git again.
"""
if self.__files_changed is None:
self.__files_changed = []
all_changes = diff_tree("-r", self.base_rev_for_git(), self.rev)
for item in all_changes:
(old_mode, new_mode, old_sha1, new_sha1, status, filename) = item
debug(
"diff-tree entry: %s %s %s %s %s %s"
% (old_mode, new_mode, old_sha1, new_sha1, status, filename),
level=5,
)
self.__files_changed.append(filename)
return self.__files_changed
def added_files(self):
"""Return the list of files added by this commit.
This method assumes that self.parent_revs is not None, and raises
an assertion failure if the assumption is not met. Users can call
function `commit_parents' to set it if needed.
"""
assert self.parent_revs is not None
# One method we could use to compute the list of new files is
# to ask git, via the status code returned by the "git diff-tree"
# command. However, while the format of that code seems to be well
# documented, this method seems more difficult to test because
# of the copy-edit/rename-edit status codes. How Git decides
# whether a file comes from another one is a bit of a black box,
# and bound to change from version to version.
#
# So, to avoid this complexity, we rely on a different approach
# instead, which consists in simply getting the list of files
# in the parent commit, and compare it to the list of files
# in this commit. Any file not present in the parent is declared
# new.
base_rev = self.base_rev_for_display()
if base_rev is None:
prev_commit_all_files = set()
else:
prev_commit_all_files = set(self.__all_files_from_commit_rev(base_rev))
# The list of files is returned in sorted alphabetical order,
# mostly to ensure predictability and stability in the result.
return sorted(set(self.all_files()) - prev_commit_all_files)
def base_rev_for_display(self):
"""The rev as reference to determine what changed in this commit.
This method assumes that self.parent_revs is not None, and raises
an assertion failure if the assumption is not met. Users can call
function `commit_parents' to set it if needed.
RETURN VALUE
The reference commit's SHA1, or None if this commit does not
have a parent (root commit).
"""
assert self.parent_revs is not None
# Make sure we use each commits's first parent as the base
# commit. This is important for merge commits, or commits
# imported by merges.
#
# Consider for instance the following scenario...
#
# <-- origin/master
# /
# C1 <-- C2 <-- C3 <-- M4 <-- master
# \ /
# <-- B1 <-- B2 <-+
#
# ... where the user merged his changes B1 & B2 into
# his master branch (as commit M4), and then tries
# to push this merge.
#
# There are 3 new commits in this case to be checked,
# which are B1, B2, and M4, with C3 being the update's
# base rev.
#
# If not careful, we would be checking B1 against C3,
# rather than C1, which would cause these scripts
# to think that all the files modified by C2 and C3
# have been modified by B1, and thus must be checked.
#
# Similarly, we would be checking M4 against B2,
# whereas it makes more sense in that case to be
# checking it against C3.
if self.parent_revs:
return self.parent_revs[0]
else:
return None
def base_rev_for_git(self):
"""The rev as reference to determine what changed in this commit.
Use this function when this rev should be passed to git commands,
as it never returns None.
RETURN VALUE
The reference commit's SHA1, or the empty tree's SHA1 if
this commit does not have a parent (root commit).
"""
base_rev = self.base_rev_for_display()
if base_rev is None:
base_rev = empty_tree_rev()
return base_rev
def is_revert(self):
"""Return True if this commit appears to be a revert commit.
We detect such commits by searching for specific patterns that
the "git revert" command automatically includes in the default
revision log of such commits, hoping that a user is not deleting
them afterwards.
"""
return bool(_REVERT_COMMIT_RE.search(self.raw_revlog));
@classmethod
def __all_files_from_commit_rev(cls, rev):
"""Return the list of all files for the given commit revision.
Note that unlike in the all_files method, the result of
this method is not cached.
"""
return git.ls_tree(
"--full-tree", "--name-only", "-r", rev, _split_lines=True, _decode=True
)
def commit_info_list(*args):
"""Return a list of CommitInfo objects in chronological order.
PARAMETERS
Same as in the "git rev-list" command.
"""
rev_info_in_bytes = git.rev_list(
*args, pretty="format:%P%n%an%n%ae%n%s", _split_lines=True, reverse=True
)
# Each commit should generate 5 lines of output.
assert len(rev_info_in_bytes) % 5 == 0
rev_info = [safe_decode(b) for b in rev_info_in_bytes]
result = []
while rev_info:
commit_keyword, rev = rev_info.pop(0).split(None, 1)
parents = rev_info.pop(0).split()
author_name = rev_info.pop(0)
author_email = rev_info.pop(0)
subject = rev_info.pop(0)
assert commit_keyword == "commit"
result.append(CommitInfo(rev, author_name, author_email, subject, parents))
return result