Skip to content

Commit cd86dbd

Browse files
authored
Merge pull request #19 from individual-brain-charting/new_versions
Add new versions of IBC database
2 parents e53ffe1 + 89b27c0 commit cd86dbd

7 files changed

Lines changed: 24902 additions & 13 deletions

File tree

src/ibc_api/data/preprocessed_v2.csv

Lines changed: 2977 additions & 0 deletions
Large diffs are not rendered by default.

src/ibc_api/data/raw_v4.csv

Lines changed: 1973 additions & 0 deletions
Large diffs are not rendered by default.

src/ibc_api/data/raw_v5.csv

Lines changed: 8937 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
,subject,session,desc,hemi,task,direction,run,space,suffix,datatype,extension,contrast,megabytes,dataset,path

src/ibc_api/data/volume_maps_v3.csv

Lines changed: 10977 additions & 0 deletions
Large diffs are not rendered by default.

src/ibc_api/scripts/create_db.py

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,41 @@
11
"""Script to create the database from the raw, preprocessed, volume maps and surface maps
2-
data on EBRAINS."""
2+
data on EBRAINS."""
33

44
# import libraries
55
import pandas as pd
66
import os
77
import ibc_api.utils as ibc
8+
from ibc_api.metadata import fetch_metadata, _find_latest_version
89
from bids.layout import parse_file_entities
910

1011
datasets = ["raw", "preprocessed", "volume_maps", "surface_maps"]
1112

12-
ibc.authenticate()
13+
ibc._authenticate()
1314
for dataset in datasets:
14-
for version in range(1, 4):
15+
for version in range(1, 6):
1516
# Get EBRAINS metadata about the dataset
1617
try:
1718
ebrains_data = ibc._connect_ebrains(dataset, version=version)
1819
except (ValueError, IndexError) as error:
20+
print(error)
1921
print(f"skipping dataset {dataset}, version {version}")
2022
continue
23+
try:
24+
root_dir = ebrains_data.prefix.strip("/")
25+
except AttributeError:
26+
root_dir = ""
2127
# Get the file names and other info as dataframes
2228
ebrains_df = pd.DataFrame(ebrains_data.__dict__["_files"])
2329
filenames = ebrains_df["name"].tolist()
2430
# parse filenames using pybids to get all the entities
2531
bids_entities = []
2632
for file in filenames:
2733
bids_entity = parse_file_entities(
28-
file, include_unmatched=True, config="ibc_conifg.json"
34+
file,
35+
include_unmatched=True,
36+
config=os.path.join(
37+
os.path.dirname(__file__), "ibc_config.json"
38+
),
2939
)
3040
bids_entities.append(bids_entity)
3141
# convert the list of dictionaries with bids entities to a dataframe
@@ -36,25 +46,39 @@
3646
bids_df["megabytes"] = ebrains_df["bytes"].astype(int).div(1024**2)
3747
# add a column with the dataset name
3848
bids_df["dataset"] = [dataset] * len(bids_df)
39-
root_dir = ebrains_df["name"].str.split("/").str[0]
40-
# add a column with the file path without the root directory
41-
path = ebrains_df["name"].str.split("/").str[1:].str.join("/")
49+
infer_root_dir = ebrains_df["name"].str.split("/").str[0].unique()[0]
50+
51+
if infer_root_dir == root_dir:
52+
# add a column with the file path without the root directory
53+
path = ebrains_df["name"].str.split("/").str[1:].str.join("/")
54+
else:
55+
# add a column with the file path
56+
path = ebrains_df["name"]
57+
58+
breakpoint()
59+
60+
root_dir_series = ebrains_df["name"].str.split("/").str[0]
4261
bids_df["path"] = path
4362
# separate surface maps and volume maps in different csv files
4463
if dataset == "surface_maps":
45-
mask = (root_dir == "resulting_smooth_maps_surface") & bids_df[
46-
"extension"
47-
].isin([".gii", ".json"])
64+
mask = (
65+
root_dir_series == "resulting_smooth_maps_surface"
66+
) & bids_df["extension"].isin([".gii", ".json"])
4867
bids_df = bids_df[mask]
4968
# there are some files with .gii extension in the volume maps folder
5069
# filtering them out
5170
elif dataset == "volume_maps":
52-
mask = (root_dir == "resulting_smooth_maps") & bids_df[
71+
mask = (root_dir_series == "resulting_smooth_maps") & bids_df[
5372
"extension"
5473
].isin([".nii.gz", ".json"])
5574
bids_df = bids_df[mask]
5675
bids_df = bids_df.reset_index(drop=True)
5776
# create a csv file with the bids entities
58-
csv_file = os.path.join("..", "data", f"{dataset}_v{version}.csv")
77+
csv_file = os.path.join(
78+
os.path.dirname(__file__),
79+
"..",
80+
"data",
81+
f"{dataset}_v{version}.csv",
82+
)
5983
bids_df.to_csv(csv_file)
6084
print(f"{csv_file} created!")

src/ibc_api/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def _authenticate(token_dir=TOKEN_ROOT):
5050
with open(token_file, "r") as f:
5151
token = f.read()
5252
# set the token
53-
siibra.set_ebrains_token(token)
53+
EbrainsRequest.set_token(token)
5454
else:
5555
EbrainsRequest.fetch_token()
5656
token = EbrainsRequest._KG_API_TOKEN

0 commit comments

Comments
 (0)