-
-
Notifications
You must be signed in to change notification settings - Fork 720
Expand file tree
/
Copy pathwin_pe.py
More file actions
375 lines (301 loc) · 12.4 KB
/
win_pe.py
File metadata and controls
375 lines (301 loc) · 12.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
from contextlib import closing
import pefile
from ftfy import fix_text
from commoncode import text
from packagedcode import models
from packagedcode.models import Party
from packagedcode.models import party_org
from cluecode.copyrights import detect_copyrights_from_lines
from cluecode.copyrights import prepare_text_line
from typecode import contenttype
TRACE = False
def logger_debug(*args):
pass
if TRACE:
import logging
import sys
logger = logging.getLogger(__name__)
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.DEBUG)
def logger_debug(*args):
return logger.debug(' '.join(
isinstance(a, str) and a or repr(a) for a in args))
"""
Extract data from windows PE DLLs and executable.
Note that the extraction may not be correct for all PE in particular
older legacy PEs. See tests and:
http://msdn.microsoft.com/en-us/library/aa381058%28v=VS.85%29.aspx
PE stores data in a "VarInfo" structure for "variable information".
VarInfo are by definition variable key/value pairs:
http://msdn.microsoft.com/en-us/library/ms646995%28v=vs.85%29.aspx
Therefore we use a list of the most common and useful key names with
an eye on origin and license related information and return a value
when there is one present.
"""
"""
https://docs.microsoft.com/en-us/windows/win32/menurc/versioninfo-resource
Name Description
Comments Additional information that should be displayed for
diagnostic purposes.
CompanyName Company that produced the file?for example, "Microsoft
Corporation" or "Standard Microsystems Corporation, Inc."
This string is required.
FileDescription File description to be presented to users. This string may
be displayed in a list box when the user is choosing files
to install?for example, "Keyboard Driver for AT-Style
Keyboards". This string is required.
FileVersion Version number of the file?for example, "3.10" or
"5.00.RC2". This string is required.
InternalName Internal name of the file, if one exists?for example, a
module name if the file is a dynamic-link library. If the
file has no internal name, this string should be the
original filename, without extension. This string is
required.
LegalCopyright Copyright notices that apply to the file. This should
include the full text of all notices, legal symbols,
copyright dates, and so on. This string is optional.
LegalTrademarks Trademarks and registered trademarks that apply to the file.
This should include the full text of all notices, legal
symbols, trademark numbers, and so on. This string is
optional.
OriginalFilename Original name of the file, not including a path. This
information enables an application to determine whether a
file has been renamed by a user. The format of the name
depends on the file system for which the file was created.
This string is required.
ProductName Name of the product with which the file is distributed. This
string is required.
ProductVersion Version of the product with which the file is
distributed?for example, "3.10" or "5.00.RC2". This string
is required.
"""
# List of common info keys found in PE.
PE_INFO_KEYS = (
'Full Version', # rare and used only by Java exe
'ProductVersion', # the actual version
'FileVersion', # another common version
'Assembly Version', # a version common in MSFT, redundant when present with ProductVersion
'AssemblyVersion', # a version common in MSFT, redundant when present with ProductVersion
'BuildDate', # rare but useful when there 2013/02/04-18:07:46 2018-11-10 14:38
'ProductName', # often present often localized, that's a component name
'OriginalFilename', # name or the original DLL
'InternalName', # often present: sometimes a package name or a .dll or .exe
'License', # rare, seen only in CURL
'LegalCopyright', # copyright notice, sometimes a license tag or URL. Use it for license detection
'LegalTrademarks', # may sometimes contains license or copyright. Ignore a single ".". Treat as part of the declared license
'LegalTrademarks1', # mostly MSFT
'LegalTrademarks2', # mostly MSFT
'LegalTrademarks3', # mostly MSFT
'FileDescription', # description, often localized
'Comments', # random data. Append to a description
'CompanyName', # the company e.g a party, sometimes localized
'Company', # rare, use a fallback if present and CCompanyName missing
'URL', # rarely there but good if there
'WWW', # rarely there but good if there
)
PE_INFO_KEYSET = set(PE_INFO_KEYS)
def pe_info(location):
"""
Return a mapping of common data available for a Windows dll or exe PE
(portable executable).
Return None for non-Windows PE files.
Return an empty mapping for PE from which we could not collect data.
Also collect extra data found if any, returned as a dictionary under the
'extra_data' key in the returned mapping.
"""
if not location:
return {}
result = dict([(k, None,) for k in PE_INFO_KEYS])
extra_data = result['extra_data'] = {}
with closing(pefile.PE(location)) as pe:
if not hasattr(pe, 'FileInfo'):
# No fileinfo section: we return just empties
return result
# >>> pe.FileInfo: this is a list of list of Structure objects:
# [[<Structure: [VarFileInfo] >, <Structure: [StringFileInfo]>]]
file_info = pe.FileInfo
if not file_info or not isinstance(file_info, list):
if TRACE:
logger.debug('pe_info: not file_info')
return result
# here we have a non-empty list
file_info = file_info[0]
if TRACE:
logger.debug('pe_info: file_info:', file_info)
string_file_info = [x for x in file_info
if type(x) == pefile.Structure
and hasattr(x, 'name')
and x.name == 'StringFileInfo']
if not string_file_info:
# No stringfileinfo section: we return just empties
if TRACE:
logger.debug('pe_info: not string_file_info')
return result
string_file_info = string_file_info[0]
if not hasattr(string_file_info, 'StringTable'):
# No fileinfo.StringTable section: we return just empties
if TRACE:
logger.debug('pe_info: not StringTable')
return result
string_table = string_file_info.StringTable
if not string_table or not isinstance(string_table, list):
return result
string_table = string_table[0]
if TRACE:
logger.debug(
'pe_info: Entries keys: ' + str(set(k for k in string_table.entries)))
logger.debug('pe_info: Entry values:')
for k, v in string_table.entries.items():
logger.debug(' ' + str(k) + ': ' + repr(type(v)) + repr(v))
for k, v in string_table.entries.items():
# convert unicode to a safe ASCII representation
key = text.as_unicode(k).strip()
value = text.as_unicode(v).strip()
value = fix_text(value)
if key in PE_INFO_KEYSET:
result[key] = value
else:
extra_data[key] = value
return result
def get_first(mapping, *keys):
"""
Return the first value of the `keys` that is found in the `mapping`.
"""
for key in keys:
value = mapping.get(key)
if value:
return value
def concat(mapping, *keys):
"""
Return a concatenated string of all unique values of the `keys found in the
`mapping`.
"""
values = []
for key in keys:
val = mapping.get(key)
if val and val not in values:
values.append(val)
return '\n'.join(values)
def has_license_with_copyright(text):
"""
Return True if the LegalCopyright `text` could have some license
declarations and should be a part of the extracted_license_statement.
"""
copyrights = detect_copyrights_from_lines(
numbered_lines=[tuple([1, text])],
include_copyrights=True,
include_authors=False,
include_holders=False,
include_copyright_years=True,
include_copyright_allrights=True,
)
detections = [detection.to_dict() for detection in copyrights]
if text and not detections:
return True
if detections and "copyright" in detections[0]:
return False
return True
class WindowsExecutableHandler(models.NonAssemblableDatafileHandler):
datasource_id = 'windows_executable'
default_package_type = 'winexe'
filetypes = ('pe32', 'for ms windows',)
path_patterns = (
'*.exe',
'*.dll',
'*.mui',
'*.mun',
'*.com',
'*.winmd',
'*.sys',
'*.tlb',
'*.exe_*',
'*.dll_*',
'*.mui_*',
'*.mun_*',
'*.com_*',
'*.winmd_*',
'*.sys_*',
'*.tlb_*',
'*.ocx',
)
description = 'Windows Portable Executable metadata'
documentation_url = 'https://en.wikipedia.org/wiki/Portable_Executable'
@classmethod
def is_datafile(cls, location, filetypes=tuple()):
"""
Return True if the file at location is highly likely to be a POM.
"""
if super().is_datafile(location, filetypes=filetypes):
return True
T = contenttype.get_type(location)
if T.is_winexe:
return True
@classmethod
def parse(cls, location, package_only=False):
infos = pe_info(location)
yield get_package_data_from_pe_info(infos, package_only)
def get_package_data_from_pe_info(infos, package_only=False):
version = get_first(
infos,
'Full Version',
'ProductVersion',
'FileVersion',
'Assembly Version',
)
release_date = get_first(infos, 'BuildDate')
if release_date:
if len(release_date) >= 10:
release_date = release_date[:10]
release_date = release_date.replace('/', '-')
name = get_first(
infos,
'ProductName',
'OriginalFilename',
'InternalName',
)
LegalCopyright = get_first(infos, 'LegalCopyright')
copyr_has_license = LegalCopyright and has_license_with_copyright(LegalCopyright)
LegalTrademarks = concat(
infos,
'LegalTrademarks',
'LegalTrademarks1',
'LegalTrademarks2',
'LegalTrademarks3')
License = get_first(infos, 'License')
extracted_license_statement = None
if copyr_has_license or LegalTrademarks or License:
extracted_license_statement = {}
if copyr_has_license:
extracted_license_statement['LegalCopyright'] = LegalCopyright
if LegalTrademarks and LegalTrademarks != '':
extracted_license_statement['LegalTrademarks'] = LegalTrademarks
if License:
extracted_license_statement['License'] = License
description = concat(infos, 'FileDescription', 'Comments')
parties = []
cname = get_first(infos, 'CompanyName', 'Company')
if cname:
parties = [Party(type=party_org, role='author', name=cname)]
homepage_url = get_first(infos, 'URL', 'WWW')
package_data = dict(
datasource_id=WindowsExecutableHandler.datasource_id,
type=WindowsExecutableHandler.default_package_type,
name=name,
version=version,
release_date=release_date,
copyright=LegalCopyright,
extracted_license_statement=extracted_license_statement,
description=description,
parties=parties,
homepage_url=homepage_url,
)
return models.PackageData.from_data(package_data, package_only)