Skip to content

Commit 065c87a

Browse files
authored
Mine PURL from alpine #665 (#724)
* Add files for alpine pipeline #665 Signed-off-by: Jono Yang <jyang@nexb.com> * Update AlpineCollector #665 * Modify functions to return an Alpine PackageData from a parsed pkginfo string Signed-off-by: Jono Yang <jyang@nexb.com> * Iteratively download indices and yield packages #665 Signed-off-by: Jono Yang <jyang@nexb.com> * Update destructor code #665 Signed-off-by: Jono Yang <jyang@nexb.com> * Create code to collect and write purl files #665 Signed-off-by: Jono Yang <jyang@nexb.com> * Add append argument to write_packageurls_to_file #665 Signed-off-by: Jono Yang <jyang@nexb.com> * Update code formatting #665 * move single function to alpine pipe Signed-off-by: Jono Yang <jyang@nexb.com> * Add test for alpine pipe #665 Signed-off-by: Jono Yang <jyang@nexb.com> * Update logic in MavenCollector constructor #660 Signed-off-by: Jono Yang <jyang@nexb.com> * Update test expectation Signed-off-by: Jono Yang <jyang@nexb.com> --------- Signed-off-by: Jono Yang <jyang@nexb.com>
1 parent c468a76 commit 065c87a

12 files changed

Lines changed: 626 additions & 20 deletions

File tree

minecode/tests/collectors/test_github.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ def test_github_get_all_versions(self):
3535
"matchcode-toolkit-v3.0.0",
3636
"matchcode-toolkit-v1.1.1",
3737
"minecode-pipelines/v0.0.1b3",
38+
"minecode-pipelines/v0.0.1b4",
39+
"minecode-pipelines/v0.0.1b5",
40+
"minecode-pipelines/v0.0.1b6",
41+
"minecode-pipelines/v0.0.1b7",
42+
"minecode-pipelines/v0.0.1b8",
3843
]
3944
for item in versions:
4045
self.assertIn(item, expected)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
22+
23+
from scanpipe.pipelines import Pipeline
24+
from scanpipe.pipes import federatedcode
25+
26+
from minecode_pipelines import pipes
27+
from minecode_pipelines.pipes import alpine
28+
29+
30+
class MineAlpine(Pipeline):
31+
"""
32+
Mine all packageURLs from an alpine index and publish them to
33+
a FederatedCode repo.
34+
"""
35+
36+
@classmethod
37+
def steps(cls):
38+
return (
39+
cls.check_federatedcode_eligibility,
40+
cls.collect_packages_from_alpine,
41+
cls.delete_cloned_repos,
42+
)
43+
44+
def check_federatedcode_eligibility(self):
45+
"""
46+
Check if the project fulfills the following criteria for
47+
pushing the project result to FederatedCode.
48+
"""
49+
federatedcode.check_federatedcode_configured_and_available(logger=self.log)
50+
51+
def collect_packages_from_alpine(self):
52+
self.repos = alpine.collect_packages_from_alpine(logger=self.log)
53+
54+
def delete_cloned_repos(self):
55+
pipes.delete_cloned_repos(repos=self.repos, logger=self.log)

minecode_pipelines/pipelines/mine_pypi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
from scanpipe.pipelines import Pipeline
2424
from scanpipe.pipes import federatedcode
2525

26-
from minecode_pipelines.pipes import pypi
2726
from minecode_pipelines import pipes
27+
from minecode_pipelines.pipes import pypi
2828

2929

3030
class MinePypi(Pipeline):

minecode_pipelines/pipes/__init__.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,27 @@ def update_mined_packages_in_checkpoint(packages, config_repo, cloned_repo, chec
8181
)
8282

8383

84-
def write_packageurls_to_file(repo, base_dir, packageurls):
84+
def write_packageurls_to_file(repo, base_dir, packageurls, append=False):
85+
if not isinstance(packageurls, list):
86+
raise Exception("`packageurls` needs to be a list")
87+
8588
purl_file_rel_path = os.path.join(base_dir, PURLS_FILENAME)
8689
purl_file_full_path = Path(repo.working_dir) / purl_file_rel_path
90+
if append and purl_file_full_path.exists():
91+
existing_purls = load_data_from_yaml_file(purl_file_full_path)
92+
packageurls = existing_purls.extend(packageurls)
8793
write_data_to_yaml_file(path=purl_file_full_path, data=packageurls)
8894
return purl_file_rel_path
8995

9096

97+
def load_data_from_yaml_file(path):
98+
if isinstance(path, str):
99+
path = Path(path)
100+
101+
with open(path, encoding="utf-8") as f:
102+
return saneyaml.load(f.read())
103+
104+
91105
def write_data_to_yaml_file(path, data):
92106
if isinstance(path, str):
93107
path = Path(path)

0 commit comments

Comments
 (0)