1212from dclab .cli import compress
1313
1414from ..api import dataset_activate , resource_add , resource_exists
15- from ..common import sha256sum
15+ from ..common import is_dc_file , sha256sum
16+
17+
18+ class AtLeastOneDCResourceRequiredPerDatasetError (BaseException ):
19+ """Raised when an upload does not contain at least one DC resource"""
20+ pass
1621
1722
1823logger = logging .getLogger (__name__ )
@@ -76,10 +81,28 @@ def __init__(self, api, dataset_id, resource_paths,
7681 Multiple upload jobs may share the same cache dir,
7782 since each job creates its own subdirectory.
7883 """
84+ # Ensure resource_paths is a list in case somebody passed an iterator.
85+ resource_paths = list (resource_paths )
86+
7987 self .api = api .copy () # create a copy of the API
8088 self .dataset_id = dataset_id
89+
90+ # Check whether at least one DC resource is present in the list.
91+ # This is a hard DCOR requirement.
92+ for pp in resource_paths :
93+ if is_dc_file (pp ):
94+ break
95+ else :
96+ raise AtLeastOneDCResourceRequiredPerDatasetError (
97+ f"DCOR requires at least one valid deformability cytometry "
98+ f"file per dataset upload. Please make sure that this "
99+ f"condition is met and all of these files exist: "
100+ f"{ resource_paths } " )
101+
81102 # make sure the dataset_id is valid
82103 self .api .get ("package_show" , id = self .dataset_id , timeout = 500 )
104+
105+ # resolve paths and set resource names
83106 resolved_paths = [pathlib .Path (pp ).resolve () for pp in resource_paths ]
84107 if resource_names is None :
85108 resource_names = [pp .name for pp in resolved_paths ]
@@ -192,18 +215,19 @@ def sort_resources_according_to_basin_hierarchy(paths) -> list[int]:
192215 paths_basins = {}
193216 for pp in path_strings :
194217 basins = []
195- try :
196- with dclab .new_dataset (pp ) as ds :
197- for bn in ds .basins :
198- if (bn .basin_type == "file"
199- and bn .basin_format == "hdf5" ):
200- bpath = str (bn .location .resolve ())
201- if bpath in path_strings :
202- basins .append (bpath )
203- except BaseException :
204- logger .error (f"Failed to get basin info for { pp } . Traceback"
205- f"follows." )
206- logger .error (traceback .format_exc ())
218+ if is_dc_file (pp , test_open = False ):
219+ try :
220+ with dclab .new_dataset (pp ) as ds :
221+ for bn in ds .basins :
222+ if (bn .basin_type == "file"
223+ and bn .basin_format == "hdf5" ):
224+ bpath = str (bn .location .resolve ())
225+ if bpath in path_strings :
226+ basins .append (bpath )
227+ except BaseException :
228+ logger .error (f"Failed to get basin info for { pp } . "
229+ f"Traceback follows." )
230+ logger .error (traceback .format_exc ())
207231 paths_basins [pp ] = basins
208232
209233 # edit path_strings in-place and populate paths_sort_order
@@ -418,14 +442,12 @@ def task_compress_resources(self):
418442 Data are stored in the user's cache directory and
419443 deleted after upload is complete.
420444 """
421- # make sure that we have .rtdc or .dc files
422- dc_files = [pp for pp in self .paths if pp .suffix in [".rtdc" , ".dc" ]]
423- if not dc_files :
424- raise ValueError ("There are no RT-DC files in this dataset!" )
425445 self .set_state ("compress" )
426446 ds_dict = self .api .get ("package_show" , id = self .dataset_id , timeout = 500 )
427447 for ii , path in enumerate (self .paths ):
428- if path .suffix in [".rtdc" , ".dc" ]: # do we have a DC file?
448+ # We check EVERY DC file. So `test_open=False`. Integrity checker
449+ # will open it and make sure it is a valid DC instance.
450+ if is_dc_file (path , test_open = False ):
429451 if resource_exists (
430452 dataset_id = self .dataset_id ,
431453 resource_name = self .resource_names [ii ],
@@ -434,7 +456,7 @@ def task_compress_resources(self):
434456 dataset_dict = ds_dict
435457 ):
436458 # There is no need to compress resources that have
437- # already been upladed to DCOR. The same check is done
459+ # already been uploaded to DCOR. The same check is done
438460 # in task_upload_resources, so there is no danger that
439461 # an uncompressed resource would be uploaded.
440462 continue
0 commit comments