|
20 | 20 | # ScanCode.io is a free software code scanning tool from nexB Inc. and others. |
21 | 21 | # Visit https://github.com/aboutcode-org/scancode.io for support and download. |
22 | 22 |
|
23 | | -import os |
24 | | -from scanpipe.pipelines import Pipeline |
| 23 | +from datetime import datetime |
25 | 24 | from scanpipe.pipes import federatedcode |
26 | 25 |
|
27 | 26 | from minecode_pipelines import pipes |
28 | | -from minecode_pipelines.pipes import MINECODE_PIPELINES_CONFIG_REPO |
29 | | -from minecode_pipelines.pipes.composer import mine_composer_packages |
30 | | -from minecode_pipelines.pipes.composer import mine_and_publish_composer_purls |
| 27 | +from minecode_pipelines.pipelines import MineCodeBasePipeline |
| 28 | +from minecode_pipelines.pipes import composer |
| 29 | +from minecode_pipelines.pipes.composer import mine_composer_packages, PACKAGE_BATCH_SIZE |
| 30 | +from minecode_pipelines.pipelines import _mine_and_publish_packageurls |
31 | 31 |
|
32 | | -MINECODE_COMPOSER_GIT_URL = os.environ.get( |
33 | | - "MINECODE_COMPOSER_GIT_URL", "https://github.com/aboutcode-data/minecode-data-composer-test" |
34 | | -) |
35 | 32 |
|
36 | | - |
37 | | -class MineComposer(Pipeline): |
| 33 | +class MineComposer(MineCodeBasePipeline): |
38 | 34 | """ |
39 | | - Mine all packageURLs from a composer index and publish them to a FederatedCode repo. |
| 35 | + Pipeline to mine Composer PHP packages and publish them to FederatedCode. |
40 | 36 | """ |
41 | 37 |
|
| 38 | + pipeline_config_repo = "https://github.com/aboutcode-data/minecode-pipelines-config/" |
| 39 | + checkpoint_path = "composer/checkpoints.json" |
| 40 | + checkpoint_freq = 200 |
| 41 | + |
42 | 42 | @classmethod |
43 | 43 | def steps(cls): |
44 | 44 | return ( |
45 | 45 | cls.check_federatedcode_eligibility, |
46 | | - cls.clone_composer_repo, |
47 | | - cls.mine_and_publish_composer_purls, |
| 46 | + cls.create_federatedcode_working_dir, |
| 47 | + cls.fetch_checkpoint_and_start_index, |
| 48 | + cls.fetch_federation_config, |
| 49 | + cls.mine_and_publish_packageurls, |
| 50 | + cls.delete_working_dir, |
| 51 | + ) |
| 52 | + |
| 53 | + def fetch_checkpoint_and_start_index(self): |
| 54 | + self.checkpoint_config_repo = federatedcode.clone_repository( |
| 55 | + repo_url=self.pipeline_config_repo, |
| 56 | + clone_path=self.working_path / "minecode-pipelines-config", |
| 57 | + logger=self.log, |
| 58 | + ) |
| 59 | + checkpoint = pipes.get_checkpoint_from_file( |
| 60 | + cloned_repo=self.checkpoint_config_repo, |
| 61 | + path=self.checkpoint_path, |
48 | 62 | ) |
49 | 63 |
|
50 | | - def check_federatedcode_eligibility(self): |
51 | | - """ |
52 | | - Check if the project fulfills the following criteria for |
53 | | - pushing the project result to FederatedCode. |
54 | | - """ |
55 | | - federatedcode.check_federatedcode_configured_and_available(logger=self.log) |
| 64 | + self.start_index = checkpoint.get("start_index", 0) |
| 65 | + self.log(f"start_index: {self.start_index}") |
56 | 66 |
|
57 | | - def clone_composer_repo(self): |
58 | | - """ |
59 | | - Clone the federatedcode composer url and return the Repo object |
60 | | - """ |
61 | | - self.cloned_data_repo = federatedcode.clone_repository(MINECODE_COMPOSER_GIT_URL) |
62 | | - self.cloned_config_repo = federatedcode.clone_repository(MINECODE_PIPELINES_CONFIG_REPO) |
| 67 | + def packages_count(self): |
| 68 | + return len(self.composer_packages) if self.composer_packages else None |
63 | 69 |
|
64 | | - def mine_and_publish_composer_purls(self): |
65 | | - """ |
66 | | - Mine Composer package names from Composer indexes and generate |
67 | | - package URLs (pURLs) for all mined Composer packages. |
68 | | - """ |
| 70 | + def mine_packageurls(self): |
| 71 | + self.composer_packages = mine_composer_packages() |
| 72 | + return composer.mine_composer_packageurls( |
| 73 | + packages=self.composer_packages, |
| 74 | + start_index=self.start_index, |
| 75 | + ) |
69 | 76 |
|
70 | | - composer_packages = mine_composer_packages() |
71 | | - mine_and_publish_composer_purls( |
72 | | - packages=composer_packages, |
73 | | - cloned_data_repo=self.cloned_data_repo, |
74 | | - cloned_config_repo=self.cloned_config_repo, |
| 77 | + def mine_and_publish_packageurls(self): |
| 78 | + """Mine and publish PackageURLs.""" |
| 79 | + _mine_and_publish_packageurls( |
| 80 | + packageurls=self.mine_packageurls(), |
| 81 | + total_package_count=self.packages_count(), |
| 82 | + data_cluster=self.data_cluster, |
| 83 | + checked_out_repos=self.checked_out_repos, |
| 84 | + working_path=self.working_path, |
| 85 | + append_purls=self.append_purls, |
| 86 | + commit_msg_func=self.commit_message, |
75 | 87 | logger=self.log, |
| 88 | + checkpoint_func=self.save_check_point, |
| 89 | + checkpoint_freq=self.checkpoint_freq, |
76 | 90 | ) |
77 | 91 |
|
78 | | - def delete_cloned_repos(self): |
79 | | - pipes.delete_cloned_repos( |
80 | | - repos=[self.cloned_data_repo, self.cloned_config_repo], |
| 92 | + def save_check_point(self): |
| 93 | + checkpoint = { |
| 94 | + "date": str(datetime.now()), |
| 95 | + "start_index": self.start_index + self.checkpoint_freq * PACKAGE_BATCH_SIZE, |
| 96 | + } |
| 97 | + |
| 98 | + self.log(f"Saving checkpoint: {checkpoint}") |
| 99 | + pipes.update_checkpoints_in_github( |
| 100 | + checkpoint=checkpoint, |
| 101 | + cloned_repo=self.checkpoint_config_repo, |
| 102 | + path=self.checkpoint_path, |
81 | 103 | logger=self.log, |
82 | 104 | ) |
0 commit comments