1- """API to fetch IBC data from EBRAINS via Human Data Gateway using siibra.
2- """
1+ """API to fetch IBC data from EBRAINS via Human Data Gateway using siibra."""
32
43# %$
54import json
@@ -165,6 +164,10 @@ def download_gm_mask(resolution=1.5, save_to=None):
165164 return save_as
166165
167166
167+ def _is_empty_db (db ):
168+ return db is None or db .empty or len (db ) == 0
169+
170+
168171def get_info (data_type = "volume_maps" , save_to = None , metadata = METADATA ):
169172 """Fetch a csv file describing each file in a given IBC dataset on EBRAINS.
170173
@@ -182,14 +185,48 @@ def get_info(data_type="volume_maps", save_to=None, metadata=METADATA):
182185 pandas.DataFrame
183186 dataframe with information about each file in the dataset
184187 """
185- # file with all information about the dataset
186- db_file = md .fetch_dataset_db (data_type , metadata )
187- # load the file as dataframe
188- # convert subject, session and run to string to avoid losing leading zeros
189- db = pd .read_csv (
190- db_file , converters = {"subject" : str , "session" : str , "run" : str }
191- )
192- db .drop (columns = ["Unnamed: 0" ], inplace = True , errors = "ignore" )
188+
189+ datasets = metadata [data_type ]
190+ latest_idx = md ._find_latest_version (datasets )
191+
192+ last_exception = None
193+
194+ # Try from latest version → older versions
195+ for version_idx in range (latest_idx , - 1 , - 1 ):
196+ # fetch the information corresponding to this version
197+ dataset = datasets [version_idx ]
198+ db_file = md .fetch_remote_file (dataset ["db_file" ])
199+ # load the file as dataframe
200+ # convert subject, session and run to string to avoid losing
201+ # leading zeros
202+ db = pd .read_csv (
203+ db_file ,
204+ converters = {"subject" : str , "session" : str , "run" : str },
205+ )
206+ db .drop (columns = ["Unnamed: 0" ], inplace = True , errors = "ignore" )
207+
208+ if not _is_empty_db (db ):
209+ print (
210+ f"Fetched database for { data_type } , version { dataset ['version' ]} ."
211+ )
212+ break
213+ else :
214+ last_exception = ValueError (
215+ f"No versions found for dataset { data_type } , version { dataset ['version' ]} ."
216+ )
217+ print (
218+ f"Failed to fetch database for { data_type } , version { dataset ['version' ]} ."
219+ "Trying older version..."
220+ )
221+
222+ # If all versions failed, raise the last exception
223+ if _is_empty_db (db ):
224+ raise (
225+ last_exception
226+ if last_exception
227+ else ValueError (f"No versions found for dataset { data_type } ." )
228+ )
229+
193230 # save the database file
194231 save_to = _create_root_dir (save_to )
195232 save_as = os .path .join (save_to , f"available_{ data_type } .csv" )
@@ -411,7 +448,7 @@ def download_data(db, n_jobs=2, save_to=None):
411448 dataframe with information about files in the dataset, ideally a subset
412449 of the full dataset
413450 n_jobs : int, optional
414- number of parallel jobs to run, by default 2. -1 would use all the CPUs.
451+ number of parallel jobs to run, by default 2. -1 would use all the CPUs.
415452 See: https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html
416453 save_to : str, optional
417454 where to save the data, by default None, in which case the data is
@@ -457,7 +494,7 @@ def _download_and_update_progress(src_file, dst_file, connector):
457494 CACHE .run_maintenance () # keep cache < 2GB
458495 return file_name , file_time
459496 except Exception as e :
460- raise (f"Error downloading { src_file } . Error: { e } " )
497+ raise (f"Error downloading { src_file } . Error: { e } " )
461498
462499 # download finally
463500 print (f"\n ...Starting download of { len (src_file_names )} files..." )
0 commit comments