@@ -60,12 +60,14 @@ def bag_circle(api: CKANAPI,
6060 # and if yes, compute the MD5 hash and compare it. If the comparison
6161 # fails, then the user has to choose a different `target_path`, because
6262 # we cannot guarantee data integrity.
63+ target_path .mkdir (parents = True , exist_ok = True )
6364 circle_jsonlines_path = target_path / "circle.jsonlines"
6465 if circle_jsonlines_path .exists ():
6566 lines = circle_jsonlines_path .read_text ().split ("\n " )
6667 hasher2 = hashlib .sha256 ()
6768 for line in lines :
68- hasher2 .update (json .loads (line )["id" ].encode (encoding = "utf-8" ))
69+ if line .strip ():
70+ hasher2 .update (json .loads (line )["id" ].encode (encoding = "utf-8" ))
6971 sha256_hash2 = hasher2 .hexdigest ()
7072 if sha256_hash != sha256_hash2 :
7173 raise ValueError (
@@ -87,8 +89,8 @@ def bag_circle(api: CKANAPI,
8789 if callback :
8890 callback (ii / num_datasets )
8991
90- dataset_index = ii + 1
91- prefix = str (dataset_index ).zfill (max_digits )
92+ dataset_index = ii + 1
93+ prefix = str (dataset_index ).zfill (max_digits + 1 )
9294 bag_path = target_path / f"{ prefix } _{ ds_dict ['name' ]} "
9395
9496 if not manifest .is_bagged (bag_path ):
@@ -148,14 +150,24 @@ def bag_dataset(api: CKANAPI,
148150 download_resource (api = api ,
149151 bag_path = bag_path ,
150152 res_dict = res_dict ,
151- condensed = False )
153+ condensed = False ,
154+ abort_event = abort_event ,
155+ )
156+
157+ if abort_event is not None and abort_event .is_set ():
158+ return
152159
153160 # condensed resource
154161 if res_dict ["name" ].endswith (".rtdc" ):
155162 download_resource (api = api ,
156163 bag_path = bag_path ,
157164 res_dict = res_dict ,
158- condensed = True )
165+ condensed = True ,
166+ abort_event = abort_event ,
167+ )
168+
169+ if abort_event is not None and abort_event .is_set ():
170+ return
159171
160172 # create BagIt files
161173 info .write_bag_info (bag_path = bag_path ,
@@ -171,7 +183,9 @@ def bag_dataset(api: CKANAPI,
171183def download_resource (api : CKANAPI ,
172184 bag_path : pathlib .Path ,
173185 res_dict : dict ,
174- condensed : bool ):
186+ condensed : bool ,
187+ abort_event : threading .Event = None ,
188+ ):
175189 """Download and verify a resource from DCOR
176190
177191 Parameters
@@ -183,6 +197,8 @@ def download_resource(api: CKANAPI,
183197 CKAN resource dictionary
184198 condensed
185199 Whether to download the condensed resource (or the original resource)
200+ abort_event
201+ For stopping the download process prematurely
186202 """
187203 data_path = bag_path / "data"
188204 data_path .mkdir (parents = True , exist_ok = True )
@@ -192,7 +208,14 @@ def download_resource(api: CKANAPI,
192208 dj = DownloadJob (api = api ,
193209 resource_id = res_dict ["id" ],
194210 download_path = dl_path ,
195- condensed = condensed
211+ condensed = condensed ,
196212 )
197- dj .task_download_resource ()
213+ if abort_event is not None and abort_event .is_set ():
214+ return
215+
216+ dj .task_download_resource (abort_event = abort_event )
217+
218+ if abort_event is not None and abort_event .is_set ():
219+ return
220+
198221 dj .task_verify_resource ()
0 commit comments