Skip to content

Commit 822a562

Browse files
Fix cpan issues and release cpan pipeline
Signed-off-by: Ayan Sinha Mahapatra <asmahapatra@aboutcode.org>
1 parent 78bb653 commit 822a562

5 files changed

Lines changed: 20 additions & 15 deletions

File tree

minecode_pipelines/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@
88
#
99

1010

11-
VERSION = "0.0.1b42"
11+
VERSION = "0.0.1b59"

minecode_pipelines/miners/cpan.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
from bs4 import BeautifulSoup
1414
from packageurl import PackageURL
1515

16-
from scanpipe.pipes.fetch import fetch_http
17-
16+
from minecode_pipelines.utils import get_temp_file
1817

1918
"""
2019
Visitors for cpan and cpan-like perl package repositories.
@@ -33,9 +32,19 @@ def get_cpan_packages(cpan_repo=CPAN_REPO, logger=None):
3332
path_prefixes with author page path from this list.
3433
"""
3534
cpan_packages_url = cpan_repo + "modules/02packages.details.txt.gz"
36-
cpan_packages_gz_download = fetch_http(cpan_packages_url)
37-
with gzip.open(cpan_packages_gz_download, "rb") as file_content:
38-
packages_content = file_content.read()
35+
packages_archive = get_temp_file(file_name="cpan_packages", extension=".gz")
36+
packages_content = get_temp_file(file_name="cpan_packages", extension=".txt")
37+
response = requests.get(cpan_packages_url, stream=True)
38+
with open(packages_archive, 'wb') as f:
39+
for chunk in response.iter_content(chunk_size=8192):
40+
f.write(chunk)
41+
42+
with gzip.open(packages_archive, "rb") as f_in:
43+
with open(packages_content, "wb") as f_out:
44+
f_out.writelines(f_in)
45+
46+
with open(packages_content, 'r', encoding='utf-8') as file:
47+
packages_content = file.read()
3948

4049
package_path_by_name = {}
4150

minecode_pipelines/pipelines/mine_cpan.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ def steps(cls):
4040
cls.create_federatedcode_working_dir,
4141
cls.mine_cpan_packages,
4242
cls.fetch_federation_config,
43-
cls.mine_and_publish_cpan_packageurls,
4443
cls.mine_and_publish_packageurls,
4544
cls.delete_working_dir,
4645
)
@@ -54,7 +53,8 @@ def packages_count(self):
5453

5554
def mine_packageurls(self):
5655
"""Get cpan packageURLs for all mined cpan package names."""
57-
cpan.mine_and_publish_cpan_packageurls(
56+
yield from cpan.mine_and_publish_cpan_packageurls(
5857
package_path_by_name=self.cpan_packages_path_by_name,
5958
logger=self.log,
6059
)
60+

minecode_pipelines/pipes/cpan.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
from minecode_pipelines.miners.cpan import CPAN_TYPE
2828
from minecode_pipelines.utils import grouper
2929

30-
from aboutcode.hashid import get_package_base_dir
3130
from packageurl import PackageURL
3231

3332
# If True, show full details on fetching packageURL for
@@ -54,7 +53,6 @@ def mine_and_publish_cpan_packageurls(package_path_by_name, logger=None):
5453
if not package_path_by_name:
5554
return
5655

57-
packageurls_by_base_purl = {}
5856
for package_batch in grouper(n=PACKAGE_BATCH_SIZE, iterable=package_path_by_name.keys()):
5957
packages_mined = []
6058

@@ -94,6 +92,4 @@ def mine_and_publish_cpan_packageurls(package_path_by_name, logger=None):
9492
logger(f"packageURLs: {purls_string}")
9593

9694
packages_mined.append(package_name)
97-
packageurls_by_base_purl[base_purl] = packageurls
98-
99-
return packageurls_by_base_purl
95+
yield base_purl, packageurls

pyproject-minecode_pipelines.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "flot.buildapi"
44

55
[project]
66
name = "minecode_pipelines"
7-
version = "0.0.1b42"
7+
version = "0.0.1b59"
88
description = "A library for mining packageURLs and package metadata from ecosystem repositories."
99
readme = "minecode_pipelines/README.rst"
1010
license = { text = "Apache-2.0" }
@@ -63,7 +63,7 @@ mine_swift = "minecode_pipelines.pipelines.mine_swift:MineSwift"
6363
mine_composer = "minecode_pipelines.pipelines.mine_composer:MineComposer"
6464

6565
[tool.bumpversion]
66-
current_version = "0.0.1b42"
66+
current_version = "0.0.1b59"
6767
allow_dirty = true
6868

6969
files = [

0 commit comments

Comments
 (0)