|
20 | 20 | # ScanCode.io is a free software code scanning tool from nexB Inc. and others. |
21 | 21 | # Visit https://github.com/aboutcode-org/scancode.io for support and download. |
22 | 22 |
|
23 | | -import os |
24 | | -from scanpipe.pipelines import Pipeline |
25 | | -from scanpipe.pipes import federatedcode |
26 | | - |
27 | | -from minecode_pipelines import pipes |
28 | | -from minecode_pipelines.miners.cran import fetch_cran_db |
| 23 | +import json |
| 24 | +from minecode_pipelines.pipelines import MineCodeBasePipeline |
29 | 25 | from minecode_pipelines.pipes import cran |
| 26 | +from minecode_pipelines.pipes.cran import fetch_cran_db |
30 | 27 |
|
31 | 28 |
|
32 | | -MINECODE_DATA_CRAN_REPO = os.environ.get( |
33 | | - "MINECODE_DATA_CRAN_REPO", "https://github.com/aboutcode-data/minecode-data-cran-test" |
34 | | -) |
35 | | - |
36 | | - |
37 | | -class MineCran(Pipeline): |
38 | | - """ |
39 | | - Mine all packageURLs from a CRAN R index and publish them to a FederatedCode repo. |
40 | | - """ |
| 29 | +class MineCran(MineCodeBasePipeline): |
| 30 | + """Pipeline to mine CRAN R packages and publish them to FederatedCode.""" |
41 | 31 |
|
42 | 32 | @classmethod |
43 | 33 | def steps(cls): |
44 | 34 | return ( |
45 | 35 | cls.check_federatedcode_eligibility, |
46 | | - cls.setup_federatedcode_cran, |
47 | | - cls.mine_and_publish_cran_packageurls, |
48 | | - cls.cleanup_db_and_repo, |
| 36 | + cls.create_federatedcode_working_dir, |
| 37 | + cls.fetch_federation_config, |
| 38 | + cls.mine_and_publish_packageurls, |
| 39 | + cls.delete_working_dir, |
49 | 40 | ) |
50 | 41 |
|
51 | | - def check_federatedcode_eligibility(self): |
| 42 | + def fetch_cran_db(self): |
52 | 43 | """ |
53 | | - Check if the project fulfills the following criteria for |
54 | | - pushing the project result to FederatedCode. |
| 44 | + Download the full CRAN package database |
55 | 45 | """ |
56 | | - federatedcode.check_federatedcode_configured_and_available(logger=self.log) |
| 46 | + self.db_path = fetch_cran_db(logger=self.log) |
57 | 47 |
|
58 | | - def setup_federatedcode_cran(self): |
| 48 | + def packages_count(self): |
59 | 49 | """ |
60 | | - Clone the FederatedCode CRAN repository and download the CRAN DB JSON file. |
| 50 | + Return the count of packages found in the downloaded CRAN JSON database. |
61 | 51 | """ |
62 | | - self.cloned_data_repo = federatedcode.clone_repository(MINECODE_DATA_CRAN_REPO) |
63 | | - self.db_path = fetch_cran_db() |
64 | | - |
65 | | - if self.log: |
66 | | - self.log( |
67 | | - f"{MINECODE_DATA_CRAN_REPO} repo cloned at: {self.cloned_data_repo.working_dir}" |
68 | | - ) |
| 52 | + if not getattr(self, "db_path", None) or not self.db_path.exists(): |
| 53 | + return None |
69 | 54 |
|
70 | | - def mine_and_publish_cran_packageurls(self): |
71 | | - """Get cran packageURLs for all mined cran package names.""" |
72 | | - cran.mine_and_publish_cran_packageurls( |
73 | | - cloned_data_repo=self.cloned_data_repo, db_path=self.db_path, logger=self.log |
74 | | - ) |
75 | | - |
76 | | - def cleanup_db_and_repo(self): |
77 | | - self.log(f"Cleaning database file at: {self.db_path}") |
78 | | - os.remove(self.db_path) |
| 55 | + with open(self.db_path, encoding="utf-8") as f: |
| 56 | + return sum(1 for _ in json.load(f)) |
79 | 57 |
|
80 | | - self.log( |
81 | | - f"Deleting cloned repo {MINECODE_DATA_CRAN_REPO} from: {self.cloned_data_repo.working_dir}" |
82 | | - ) |
83 | | - pipes.delete_cloned_repos( |
84 | | - repos=[self.cloned_data_repo], |
85 | | - logger=self.log, |
86 | | - ) |
| 58 | + def mine_packageurls(self): |
| 59 | + """Mine Cran PackageURLs from cran package database.""" |
| 60 | + cran.mine_cran_packageurls(db_path=self.db_path) |
0 commit comments