66# See https://github.com/aboutcode-org/purldb for support or download.
77# See https://aboutcode.org for more information about nexB OSS projects.
88#
9- from minecode_pipelines .pipes import get_last_commit
9+ from datetime import datetime
10+
11+ from minecode_pipelines .pipes import fetch_checkpoint_from_github
12+ from minecode_pipelines .pipes import update_checkpoints_in_github
13+ from minecode_pipelines .pipes import MINECODE_PIPELINES_CONFIG_REPO
1014from minecode_pipelines .pipes import get_changed_files
11- from minecode_pipelines .pipes import update_last_commit
1215from minecode_pipelines .pipes .cargo import store_cargo_packages
16+ from scanpipe .pipes .federatedcode import commit_changes
17+ from scanpipe .pipes .federatedcode import push_changes
18+ from minecode_pipelines import VERSION
19+
1320import json
1421from pathlib import Path
1522
1623from minecode_pipelines .utils import get_next_x_commit
1724
25+ PACKAGE_BATCH_SIZE = 500
26+ CARGO_CHECKPOINT_PATH = "cargo/checkpoints.json"
1827
19- def process_cargo_packages (cargo_repo , fed_repo , fed_conf_repo , logger ):
28+
29+ def process_cargo_packages (cargo_index_repo , cloned_data_repo , config_repo , logger ):
2030 """
2131 Process Cargo index files commit by commit.
2232 Push changes to fed_repo after:
23- - every `commit_batch` commits, OR
24- - when reaching HEAD.
33+ - every `commit_batch` commits, OR when reaching HEAD.
2534 """
2635
27- base_path = Path (cargo_repo .working_tree_dir )
36+ base_path = Path (cargo_index_repo .working_tree_dir )
2837
2938 while True :
30- setting_last_commit = get_last_commit (fed_conf_repo , "cargo" )
31- next_commit = get_next_x_commit (cargo_repo , setting_last_commit , x = 10 , branch = "master" )
39+ cargo_checkpoints = (
40+ fetch_checkpoint_from_github (MINECODE_PIPELINES_CONFIG_REPO , CARGO_CHECKPOINT_PATH )
41+ or {}
42+ )
43+ checkpoints_last_commit = cargo_checkpoints .get ("last_commit" )
3244
33- if next_commit == setting_last_commit :
45+ next_commit = get_next_x_commit (
46+ cargo_index_repo , checkpoints_last_commit , x = 10 , branch = "master"
47+ )
48+
49+ if next_commit == checkpoints_last_commit :
3450 logger ("No new commits to mine" )
3551 break
3652
3753 changed_files = get_changed_files (
38- cargo_repo , commit_x = setting_last_commit , commit_y = next_commit
54+ cargo_index_repo , commit_x = checkpoints_last_commit , commit_y = next_commit
3955 )
4056 logger (f"Found { len (changed_files )} changed files in Cargo index." )
4157
4258 file_counter = 0
59+ purl_files = []
60+ purls = []
4361 for idx , rel_path in enumerate (changed_files ):
4462 file_path = base_path / rel_path
4563 logger (f"Found { file_path } ." )
@@ -57,8 +75,45 @@ def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger):
5775 packages .append (json .loads (line ))
5876
5977 file_counter += 1
60- push_commit = (file_counter % 1000 == 0 ) or (idx == len (changed_files ))
61- store_cargo_packages (packages , fed_repo , push_commit )
78+ commit_and_push = (file_counter % PACKAGE_BATCH_SIZE == 0 ) or (
79+ idx == len (changed_files )
80+ )
81+ purl_file , base_purl = store_cargo_packages (packages , cloned_data_repo )
82+ logger (f"writing packageURLs for package: { base_purl } at: { purl_file } " )
83+
84+ purl_files .append (purl_file )
85+ purls .append (str (base_purl ))
86+ if not commit_and_push :
87+ continue
88+
89+ commit_changes (
90+ repo = cloned_data_repo ,
91+ files_to_commit = purl_files ,
92+ purls = purls ,
93+ mine_type = "packageURL" ,
94+ tool_name = "pkg:cargo/minecode-pipelines" ,
95+ tool_version = VERSION ,
96+ )
97+
98+ # Push changes to remote repository
99+ push_changes (repo = cloned_data_repo )
100+ purl_files = []
101+ purls = []
102+
103+ if logger :
104+ logger (
105+ f"Updating checkpoint at: { CARGO_CHECKPOINT_PATH } with last commit: { checkpoints_last_commit } "
106+ )
107+
108+ settings_data = {
109+ "date" : str (datetime .now ()),
110+ "last_commit" : next_commit ,
111+ }
112+
113+ update_checkpoints_in_github (
114+ checkpoint = settings_data ,
115+ cloned_repo = config_repo ,
116+ path = CARGO_CHECKPOINT_PATH ,
117+ )
62118
63- update_last_commit (next_commit , fed_conf_repo , "cargo" )
64- logger (f"Pushed batch for commit range { setting_last_commit } :{ next_commit } ." )
119+ logger (f"Pushed batch for commit range { checkpoints_last_commit } :{ next_commit } ." )
0 commit comments