2626from minecode_pipelines .pipes import write_packageurls_to_file
2727from minecode_pipelines .pipes import fetch_checkpoint_from_github
2828from minecode_pipelines .pipes import update_checkpoints_in_github
29+ from minecode_pipelines .pipes import update_checkpoints_file_in_github
2930from minecode_pipelines .pipes import get_mined_packages_from_checkpoint
3031from minecode_pipelines .pipes import update_mined_packages_in_checkpoint
3132from minecode_pipelines .pipes import get_packages_file_from_checkpoint
3435from minecode_pipelines .pipes import INITIAL_SYNC_STATE
3536from minecode_pipelines .pipes import PERIODIC_SYNC_STATE
3637from minecode_pipelines .pipes import write_packages_json
38+ from minecode_pipelines .pipes import compress_packages_file
39+ from minecode_pipelines .pipes import decompress_packages_file
3740
3841
3942from minecode_pipelines .miners .npm import get_npm_packages
5760
5861
5962PACKAGE_FILE_NAME = "NPMPackages.json"
63+ COMPRESSED_PACKAGE_FILE_NAME = "NPMPackages.json.gz"
6064NPM_REPLICATE_CHECKPOINT_PATH = "npm/" + PACKAGE_FILE_NAME
65+ COMPRESSED_NPM_REPLICATE_CHECKPOINT_PATH = "npm/" + COMPRESSED_PACKAGE_FILE_NAME
6166NPM_CHECKPOINT_PATH = "npm/checkpoints.json"
6267NPM_PACKAGES_CHECKPOINT_PATH = "npm/packages_checkpoint.json"
6368
@@ -103,10 +108,15 @@ def mine_npm_packages(logger=None):
103108 packages = packages ,
104109 name = PACKAGE_FILE_NAME ,
105110 )
106- update_checkpoints_in_github (
107- checkpoint = packages ,
111+ compressed_packages_file = packages_file + ".gz"
112+ compress_packages_file (
113+ packages_file = packages_file ,
114+ compressed_packages_file = compressed_packages_file ,
115+ )
116+ update_checkpoints_file_in_github (
117+ checkpoints_file = compressed_packages_file ,
108118 cloned_repo = cloned_repo ,
109- path = NPM_REPLICATE_CHECKPOINT_PATH ,
119+ path = COMPRESSED_NPM_REPLICATE_CHECKPOINT_PATH ,
110120 )
111121
112122 if logger :
@@ -129,10 +139,15 @@ def mine_npm_packages(logger=None):
129139 settings_path = NPM_CHECKPOINT_PATH ,
130140 )
131141
132- packages_file = get_packages_file_from_checkpoint (
142+ compressed_packages_file = get_packages_file_from_checkpoint (
133143 config_repo = MINECODE_PIPELINES_CONFIG_REPO ,
134- checkpoint_path = NPM_REPLICATE_CHECKPOINT_PATH ,
135- name = PACKAGE_FILE_NAME ,
144+ checkpoint_path = COMPRESSED_NPM_REPLICATE_CHECKPOINT_PATH ,
145+ name = COMPRESSED_PACKAGE_FILE_NAME ,
146+ )
147+ packages_file = compressed_packages_file .replace (".gz" , "" )
148+ decompress_packages_file (
149+ packages_file = packages_file ,
150+ compressed_packages_file = compressed_packages_file ,
136151 )
137152
138153 elif state == PERIODIC_SYNC_STATE :
0 commit comments