-
-
Notifications
You must be signed in to change notification settings - Fork 191
Expand file tree
/
Copy pathdeploy_to_develop.py
More file actions
345 lines (295 loc) · 13.2 KB
/
deploy_to_develop.py
File metadata and controls
345 lines (295 loc) · 13.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
from aboutcode.pipeline import optional_step
from scanpipe import pipes
from scanpipe.pipelines import Pipeline
from scanpipe.pipes import d2d
from scanpipe.pipes import flag
from scanpipe.pipes import input
from scanpipe.pipes import matchcode
from scanpipe.pipes import purldb
from scanpipe.pipes import scancode
class DeployToDevelop(Pipeline):
"""
Establish relationships between two code trees: deployment and development.
This pipeline requires a minimum of two archive files, each properly tagged with:
- **from** for archives containing the development source code.
- **to** for archives containing the deployment compiled code.
When using download URLs as inputs, the "from" and "to" tags can be
provided by adding a "#from" or "#to" fragment at the end of the download URLs.
When uploading local files:
- **User Interface:** Use the "Edit flag" link in the "Inputs" panel of the Project
details view.
- **REST API:** Utilize the "upload_file_tag" field in addition to the
"upload_file".
- **Command Line Interface:** Tag uploaded files using the "filename:tag" syntax,
for example, ``--input-file path/filename:tag``.
"""
@classmethod
def steps(cls):
return (
cls.get_inputs,
cls.extract_inputs_to_codebase_directory,
cls.extract_archives,
cls.collect_and_create_codebase_resources,
cls.fingerprint_codebase_directories,
cls.flag_empty_files,
cls.flag_whitespace_files,
cls.flag_ignored_resources,
cls.map_about_files,
cls.map_checksum,
cls.match_archives_to_purldb,
cls.find_java_packages,
cls.map_java_to_class,
cls.map_jar_to_source,
cls.map_javascript,
cls.map_javascript_symbols,
cls.map_javascript_strings,
cls.map_elf,
cls.map_macho,
cls.map_winpe,
cls.map_go,
cls.map_rust,
cls.match_directories_to_purldb,
cls.match_resources_to_purldb,
cls.map_javascript_post_purldb_match,
cls.map_javascript_path,
cls.map_javascript_colocation,
cls.map_thirdparty_npm_packages,
cls.map_path,
cls.flag_mapped_resources_archives_and_ignored_directories,
cls.perform_house_keeping_tasks,
cls.match_purldb_resources_post_process,
cls.remove_packages_without_resources,
cls.scan_unmapped_to_files,
cls.scan_mapped_from_for_files,
cls.flag_deployed_from_resources_with_missing_license,
cls.create_local_files_packages,
)
purldb_package_extensions = [".jar", ".war", ".zip"]
purldb_resource_extensions = [
".map",
".js",
".mjs",
".ts",
".d.ts",
".jsx",
".tsx",
".css",
".scss",
".less",
".sass",
".soy",
".class",
]
doc_extensions = [
".pdf",
".doc",
".docx",
".ppt",
".pptx",
".tex",
".odt",
".odp",
]
def get_inputs(self):
"""Locate the ``from`` and ``to`` input files."""
self.from_files, self.to_files = d2d.get_inputs(self.project)
def extract_inputs_to_codebase_directory(self):
"""Extract input files to the project's codebase/ directory."""
inputs_with_codebase_path_destination = [
(self.from_files, self.project.codebase_path / d2d.FROM),
(self.to_files, self.project.codebase_path / d2d.TO),
]
for input_files, codebase_path in inputs_with_codebase_path_destination:
for input_file_path in input_files:
if input.is_archive(input_file_path):
self.extract_archive(input_file_path, codebase_path)
else:
input.copy_input(input_file_path, codebase_path)
# Reload the project env post-extraction as the scancode-config.yml file
# may be located in one of the extracted archives.
self.env = self.project.get_env()
def collect_and_create_codebase_resources(self):
"""Collect and create codebase resources."""
pipes.collect_and_create_codebase_resources(self.project)
def fingerprint_codebase_directories(self):
"""Compute directory fingerprints for matching"""
matchcode.fingerprint_codebase_directories(self.project, to_codebase_only=True)
def flag_whitespace_files(self):
"""Flag whitespace files with size less than or equal to 100 byte as ignored."""
d2d.flag_whitespace_files(project=self.project)
def map_about_files(self):
"""Map ``from/`` .ABOUT files to their related ``to/`` resources."""
d2d.map_about_files(project=self.project, logger=self.log)
def map_checksum(self):
"""Map using SHA1 checksum."""
d2d.map_checksum(project=self.project, checksum_field="sha1", logger=self.log)
def match_archives_to_purldb(self):
"""Match selected package archives by extension to PurlDB."""
if not purldb.is_available():
self.log("PurlDB is not available. Skipping.")
return
d2d.match_purldb_resources(
project=self.project,
extensions=self.purldb_package_extensions,
matcher_func=d2d.match_purldb_package,
logger=self.log,
)
@optional_step("Java")
def find_java_packages(self):
"""Find the java package of the .java source files."""
d2d.find_java_packages(self.project, logger=self.log)
@optional_step("Java")
def map_java_to_class(self):
"""Map a .class compiled file to its .java source."""
d2d.map_java_to_class(project=self.project, logger=self.log)
@optional_step("Java")
def map_jar_to_source(self):
"""Map .jar files to their related source directory."""
d2d.map_jar_to_source(project=self.project, logger=self.log)
@optional_step("JavaScript")
def map_javascript(self):
"""
Map a packed or minified JavaScript, TypeScript, CSS and SCSS
to its source.
"""
d2d.map_javascript(project=self.project, logger=self.log)
@optional_step("JavaScript")
def map_javascript_symbols(self):
"""Map deployed JavaScript, TypeScript to its sources using symbols."""
d2d.map_javascript_symbols(project=self.project, logger=self.log)
@optional_step("JavaScript")
def map_javascript_strings(self):
"""Map deployed JavaScript, TypeScript to its sources using string literals."""
d2d.map_javascript_strings(project=self.project, logger=self.log)
@optional_step("Elf")
def map_elf(self):
"""Map ELF binaries to their sources using dwarf paths and symbols."""
d2d.map_elfs_with_dwarf_paths(project=self.project, logger=self.log)
d2d.map_elfs_binaries_with_symbols(project=self.project, logger=self.log)
@optional_step("MacOS")
def map_macho(self):
"""Map mach0 binaries to their sources using symbols."""
d2d.map_macho_binaries_with_symbols(project=self.project, logger=self.log)
@optional_step("Windows")
def map_winpe(self):
"""Map winpe binaries to their sources using symbols."""
d2d.map_winpe_binaries_with_symbols(project=self.project, logger=self.log)
@optional_step("Go")
def map_go(self):
"""Map Go binaries to their sources using paths."""
d2d.map_go_paths(project=self.project, logger=self.log)
@optional_step("Rust")
def map_rust(self):
"""Map Rust binaries to their sources using symbols."""
d2d.map_rust_binaries_with_symbols(project=self.project, logger=self.log)
def match_directories_to_purldb(self):
"""Match selected directories in PurlDB."""
if not purldb.is_available():
self.log("PurlDB is not available. Skipping.")
return
d2d.match_purldb_directories(
project=self.project,
logger=self.log,
)
def match_resources_to_purldb(self):
"""Match selected files by extension in PurlDB."""
if not purldb.is_available():
self.log("PurlDB is not available. Skipping.")
return
d2d.match_purldb_resources(
project=self.project,
extensions=self.purldb_resource_extensions,
matcher_func=d2d.match_purldb_resource,
logger=self.log,
)
@optional_step("JavaScript")
def map_javascript_post_purldb_match(self):
"""Map minified javascript file based on existing PurlDB match."""
d2d.map_javascript_post_purldb_match(project=self.project, logger=self.log)
@optional_step("JavaScript")
def map_javascript_path(self):
"""Map javascript file based on path."""
d2d.map_javascript_path(project=self.project, logger=self.log)
@optional_step("JavaScript")
def map_javascript_colocation(self):
"""Map JavaScript files based on neighborhood file mapping."""
d2d.map_javascript_colocation(project=self.project, logger=self.log)
@optional_step("JavaScript")
def map_thirdparty_npm_packages(self):
"""Map thirdparty package using package.json metadata."""
d2d.map_thirdparty_npm_packages(project=self.project, logger=self.log)
def map_path(self):
"""Map using path similarities."""
d2d.map_path(project=self.project, logger=self.log)
def flag_mapped_resources_archives_and_ignored_directories(self):
"""Flag all codebase resources that were mapped during the pipeline."""
flag.flag_mapped_resources(self.project)
flag.flag_ignored_directories(self.project)
d2d.flag_processed_archives(self.project)
def perform_house_keeping_tasks(self):
"""
On deployed side
- PurlDB match files with ``no-java-source`` and empty status,
if no match is found update status to ``requires-review``.
- Update status for uninteresting files.
- Flag the dangling legal files for review.
On devel side
- Update status for not deployed files.
"""
d2d.match_resources_with_no_java_source(project=self.project, logger=self.log)
d2d.handle_dangling_deployed_legal_files(project=self.project, logger=self.log)
d2d.match_unmapped_resources(
project=self.project,
matched_extensions=self.purldb_resource_extensions,
logger=self.log,
)
d2d.flag_undeployed_resources(project=self.project)
def match_purldb_resources_post_process(self):
"""Choose the best package for PurlDB matched resources."""
d2d.match_purldb_resources_post_process(self.project, logger=self.log)
def remove_packages_without_resources(self):
"""Remove packages without any resources."""
package_without_resources = self.project.discoveredpackages.filter(
codebase_resources__isnull=True
)
package_without_resources.delete()
def scan_unmapped_to_files(self):
"""
Scan unmapped/matched ``to/`` files for copyrights, licenses,
emails, and urls and update the status to `requires-review`.
"""
d2d.scan_unmapped_to_files(project=self.project, logger=self.log)
def scan_mapped_from_for_files(self):
"""Scan mapped ``from/`` files for copyrights, licenses, emails, and urls."""
scan_files = d2d.get_from_files_for_scanning(self.project.codebaseresources)
scancode.scan_for_files(self.project, scan_files, progress_logger=self.log)
def create_local_files_packages(self):
"""Create local-files packages for codebase resources not part of a package."""
d2d.create_local_files_packages(self.project)
def flag_deployed_from_resources_with_missing_license(self):
"""Update the status for deployed from files with missing license."""
d2d.flag_deployed_from_resources_with_missing_license(
self.project,
doc_extensions=self.doc_extensions,
)