|
1 | 1 | """Script to create the database from the raw, preprocessed, volume maps and surface maps |
2 | | - data on EBRAINS.""" |
| 2 | +data on EBRAINS.""" |
3 | 3 |
|
4 | 4 | # import libraries |
5 | 5 | import pandas as pd |
6 | 6 | import os |
7 | 7 | import ibc_api.utils as ibc |
| 8 | +from ibc_api.metadata import fetch_metadata, _find_latest_version |
8 | 9 | from bids.layout import parse_file_entities |
9 | 10 |
|
10 | 11 | datasets = ["raw", "preprocessed", "volume_maps", "surface_maps"] |
11 | 12 |
|
12 | | -ibc.authenticate() |
| 13 | +ibc._authenticate() |
13 | 14 | for dataset in datasets: |
14 | | - for version in range(1, 4): |
| 15 | + for version in range(1, 6): |
15 | 16 | # Get EBRAINS metadata about the dataset |
16 | 17 | try: |
17 | 18 | ebrains_data = ibc._connect_ebrains(dataset, version=version) |
18 | 19 | except (ValueError, IndexError) as error: |
| 20 | + print(error) |
19 | 21 | print(f"skipping dataset {dataset}, version {version}") |
20 | 22 | continue |
| 23 | + try: |
| 24 | + root_dir = ebrains_data.prefix.strip("/") |
| 25 | + except AttributeError: |
| 26 | + root_dir = "" |
21 | 27 | # Get the file names and other info as dataframes |
22 | 28 | ebrains_df = pd.DataFrame(ebrains_data.__dict__["_files"]) |
23 | 29 | filenames = ebrains_df["name"].tolist() |
24 | 30 | # parse filenames using pybids to get all the entities |
25 | 31 | bids_entities = [] |
26 | 32 | for file in filenames: |
27 | 33 | bids_entity = parse_file_entities( |
28 | | - file, include_unmatched=True, config="ibc_conifg.json" |
| 34 | + file, |
| 35 | + include_unmatched=True, |
| 36 | + config=os.path.join( |
| 37 | + os.path.dirname(__file__), "ibc_config.json" |
| 38 | + ), |
29 | 39 | ) |
30 | 40 | bids_entities.append(bids_entity) |
31 | 41 | # convert the list of dictionaries with bids entities to a dataframe |
|
36 | 46 | bids_df["megabytes"] = ebrains_df["bytes"].astype(int).div(1024**2) |
37 | 47 | # add a column with the dataset name |
38 | 48 | bids_df["dataset"] = [dataset] * len(bids_df) |
39 | | - root_dir = ebrains_df["name"].str.split("/").str[0] |
40 | | - # add a column with the file path without the root directory |
41 | | - path = ebrains_df["name"].str.split("/").str[1:].str.join("/") |
| 49 | + infer_root_dir = ebrains_df["name"].str.split("/").str[0].unique()[0] |
| 50 | + |
| 51 | + if infer_root_dir == root_dir: |
| 52 | + # add a column with the file path without the root directory |
| 53 | + path = ebrains_df["name"].str.split("/").str[1:].str.join("/") |
| 54 | + else: |
| 55 | + # add a column with the file path |
| 56 | + path = ebrains_df["name"] |
| 57 | + |
| 58 | + breakpoint() |
| 59 | + |
| 60 | + root_dir_series = ebrains_df["name"].str.split("/").str[0] |
42 | 61 | bids_df["path"] = path |
43 | 62 | # separate surface maps and volume maps in different csv files |
44 | 63 | if dataset == "surface_maps": |
45 | | - mask = (root_dir == "resulting_smooth_maps_surface") & bids_df[ |
46 | | - "extension" |
47 | | - ].isin([".gii", ".json"]) |
| 64 | + mask = ( |
| 65 | + root_dir_series == "resulting_smooth_maps_surface" |
| 66 | + ) & bids_df["extension"].isin([".gii", ".json"]) |
48 | 67 | bids_df = bids_df[mask] |
49 | 68 | # there are some files with .gii extension in the volume maps folder |
50 | 69 | # filtering them out |
51 | 70 | elif dataset == "volume_maps": |
52 | | - mask = (root_dir == "resulting_smooth_maps") & bids_df[ |
| 71 | + mask = (root_dir_series == "resulting_smooth_maps") & bids_df[ |
53 | 72 | "extension" |
54 | 73 | ].isin([".nii.gz", ".json"]) |
55 | 74 | bids_df = bids_df[mask] |
56 | 75 | bids_df = bids_df.reset_index(drop=True) |
57 | 76 | # create a csv file with the bids entities |
58 | | - csv_file = os.path.join("..", "data", f"{dataset}_v{version}.csv") |
| 77 | + csv_file = os.path.join( |
| 78 | + os.path.dirname(__file__), |
| 79 | + "..", |
| 80 | + "data", |
| 81 | + f"{dataset}_v{version}.csv", |
| 82 | + ) |
59 | 83 | bids_df.to_csv(csv_file) |
60 | 84 | print(f"{csv_file} created!") |
0 commit comments