11import logging
22import pathlib
33import time
4+ import threading
5+ import traceback
46import traceback as tb
57import warnings
68
@@ -27,6 +29,7 @@ def __init__(self, path):
2729 self .path_queued = self .path / "queued"
2830 self .path_completed .mkdir (parents = True , exist_ok = True )
2931 self .path_queued .mkdir (parents = True , exist_ok = True )
32+ self .write_locks = {}
3033
3134 def __contains__ (self , item ):
3235 if isinstance (item , UploadJob ):
@@ -49,6 +52,10 @@ def get_queued_dataset_ids(self):
4952 """Return list of DCOR dataset IDs corresponding to queued jobs"""
5053 return sorted ([pp .stem for pp in self .path_queued .glob ("*.json" )])
5154
55+ def get_write_lock (self , dataset_id ):
56+ """Return a `threading.Lock` object for write operations"""
57+ return self .write_locks .setdefault (dataset_id , threading .Lock ())
58+
5259 def is_job_done (self , dataset_id ):
5360 jp = self .path_completed / (dataset_id + ".json" )
5461 return jp .exists ()
@@ -68,21 +75,27 @@ def immortalize_job(self, upload_job):
6875 # is done after checking for queued (above case).
6976 raise FileExistsError (f"The job '{ upload_job .dataset_id } ' is "
7077 f"already done!" )
71- save_task (upload_job = upload_job , path = pout )
78+ with self .get_write_lock (upload_job .dataset_id ):
79+ save_task (upload_job = upload_job , path = pout )
7280
7381 def job_exists (self , dataset_id ):
74- return self .is_job_queued (dataset_id ) or self .is_job_done (dataset_id )
82+ # Use a write lock here to avoid race conditions
83+ with self .get_write_lock (dataset_id ):
84+ return (self .is_job_queued (dataset_id )
85+ or self .is_job_done (dataset_id ))
7586
7687 def obliterate_job (self , dataset_id ):
7788 """Remove a job from the persistent queue list"""
7889 pdel = self .path_queued / (dataset_id + ".json" )
79- pdel .unlink ()
90+ with self .get_write_lock (dataset_id ):
91+ pdel .unlink ()
8092
8193 def set_job_done (self , dataset_id ):
8294 """Move the job from the queue to the complete list"""
8395 pin = self .path_queued / (dataset_id + ".json" )
8496 pout = self .path_completed / (dataset_id + ".json" )
85- pin .rename (pout )
97+ with self .get_write_lock (dataset_id ):
98+ pin .rename (pout )
8699
87100 def summon_job (self , dataset_id , api , cache_dir = None ):
88101 """Instantiate job from the persistent queue list"""
@@ -91,6 +104,20 @@ def summon_job(self, dataset_id, api, cache_dir=None):
91104 assert upload_job .dataset_id == dataset_id
92105 return upload_job
93106
107+ def update_job (self , upload_job ):
108+ """Update an immortalized job dictionary
109+
110+ The job must exist. Updating a job makes sense when e.g.
111+ the ETag of a resource becomes known.
112+ """
113+ if not self .job_exists (upload_job .dataset_id ):
114+ raise ValueError (f"Cannot update non-existent job { upload_job } " )
115+ with self .get_write_lock (upload_job .dataset_id ):
116+ pq = self .path_queued / (upload_job .dataset_id + ".json" )
117+ pc = self .path_completed / (upload_job .dataset_id + ".json" )
118+ pp = pq if pq .exists () else pc
119+ save_task (upload_job = upload_job , path = pp )
120+
94121
95122class UploadQueue :
96123 def __init__ (self , api , path_persistent_job_list = None , cache_dir = None ):
@@ -142,13 +169,19 @@ def __init__(self, api, path_persistent_job_list=None, cache_dir=None):
142169 else :
143170 self .jobs_eternal = None
144171 self .daemon_compress = CompressDaemon (self .jobs )
145- self .daemon_upload = UploadDaemon (self .jobs )
172+ self .daemon_upload = UploadDaemon (self .jobs , self . jobs_eternal )
146173 self .daemon_verify = VerifyDaemon (self .jobs )
147174
148175 def __contains__ (self , upload_job ):
149176 return upload_job in self .jobs
150177
151178 def __del__ (self ):
179+ # Attempt to update the eternal jobs (important for ETags)
180+ for job in self .jobs :
181+ try :
182+ self .jobs_eternal .update_job (job )
183+ except BaseException :
184+ self .logger .error (traceback .format_exc ())
152185 self .daemon_upload .shutdown_flag .set ()
153186 self .daemon_verify .shutdown_flag .set ()
154187 self .daemon_compress .shutdown_flag .set ()
@@ -199,7 +232,7 @@ def abort_job(self, dataset_id):
199232 # the daemon to set all the other jobs to "abort".
200233 self .daemon_upload .shutdown_flag .set ()
201234 self .daemon_upload .terminate ()
202- self .daemon_upload = UploadDaemon (self .jobs )
235+ self .daemon_upload = UploadDaemon (self .jobs , self . jobs_eternal )
203236 elif prev_state == "compress" :
204237 self .daemon_compress .shutdown_flag .set ()
205238 self .daemon_compress .terminate ()
@@ -327,12 +360,20 @@ def __init__(self, jobs):
327360
328361
329362class UploadDaemon (Daemon ):
330- def __init__ (self , jobs ):
363+ def __init__ (self , jobs , jobs_eternal ):
331364 """Upload daemon"""
365+ if jobs_eternal is None :
366+ run_after_upload = None
367+ else :
368+ run_after_upload = jobs_eternal .update_job
332369 super (UploadDaemon , self ).__init__ (
333370 jobs ,
334371 job_trigger_state = "parcel" ,
335- job_function_name = "task_upload_resources" )
372+ job_function_name = "task_upload_resources" ,
373+ job_function_kwargs = {
374+ "run_after_upload" : run_after_upload
375+ }
376+ )
336377
337378
338379class VerifyDaemon (Daemon ):
0 commit comments