Skip to content

Commit 2aed68f

Browse files
authored
Creating File bundles (#51)
* file_bundle * absolute path for pytest * tests for file bundle * solving windows path * Typo * Correction file bundle * new tests for file bundel * typo * typo
1 parent 4c98692 commit 2aed68f

4 files changed

Lines changed: 191 additions & 26 deletions

File tree

bids2openminds/main.py

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -333,12 +333,71 @@ def create_subjects(subject_id, layout_df, layout, collection):
333333
return subjects_dict, subject_state_dict, subjects_list
334334

335335

336+
def create_file_bundle(BIDS_path, path, collection, parent_file_bundle=None, is_file_repository=False):
337+
338+
if is_file_repository:
339+
openminds_file_bundle = omcore.FileRepository(format=omcore.ContentType.by_name("application/vnd.bids"),
340+
iri=IRI(pathlib.Path(BIDS_path).absolute().as_uri()))
341+
else:
342+
relative_path = os.path.relpath(path, BIDS_path)
343+
name = str(relative_path).replace("\\", "/")
344+
if name[0] == "_":
345+
name = name[1:]
346+
openminds_file_bundle = omcore.FileBundle(content_description=f"File bundle created for {relative_path}",
347+
name=name,
348+
is_part_of=parent_file_bundle)
349+
350+
files = {}
351+
files_size = 0
352+
all = os.listdir(path)
353+
354+
for item in all:
355+
356+
item_path = str(pathlib.PurePath(path, item))
357+
358+
if os.path.isfile(item_path) and os.path.basename(item_path) != "openminds.jsonld":
359+
360+
if is_file_repository:
361+
files[item_path] = None
362+
else:
363+
files[item_path] = [openminds_file_bundle]
364+
365+
files_size += os.stat(item_path).st_size
366+
367+
if os.path.isdir(item_path) and os.path.basename(item_path) != "openminds":
368+
369+
child_files, child_filesizes, _ = create_file_bundle(
370+
BIDS_path, item_path, collection, parent_file_bundle=openminds_file_bundle, is_file_repository=False)
371+
372+
for child_file_path in child_files.keys():
373+
if child_file_path not in files:
374+
files[child_file_path] = []
375+
376+
files[child_file_path].extend(child_files[child_file_path])
377+
378+
files_size += child_filesizes
379+
380+
openminds_file_bundle.storage_size = omcore.QuantitativeValue(value=files_size,
381+
unit=controlled_terms.UnitOfMeasurement.by_name(
382+
"byte")
383+
)
384+
collection.add(openminds_file_bundle)
385+
386+
if is_file_repository:
387+
openminds_file_repository = openminds_file_bundle
388+
else:
389+
openminds_file_repository = None
390+
391+
return files, files_size, openminds_file_repository
392+
393+
336394
def create_file(layout_df, BIDS_path, collection):
337395

338-
file_repository = omcore.FileRepository(
339-
format=omcore.ContentType.by_name("application/vnd.bids"),
340-
iri=IRI(pathlib.Path(BIDS_path).absolute().as_uri()))
341-
collection.add(file_repository)
396+
BIDS_path_absolute = pathlib.Path(BIDS_path).absolute()
397+
398+
file2file_bundle_dic, _, file_repository = create_file_bundle(
399+
BIDS_path_absolute, BIDS_path_absolute, collection, is_file_repository=True)
400+
342401
files_list = []
343402
for index, file in layout_df.iterrows():
344403
file_format = None
@@ -380,14 +439,16 @@ def create_file(layout_df, BIDS_path, collection):
380439
"event sequence")
381440
file_format = omcore.ContentType.by_name(
382441
"text/tab-separated-values")
442+
383443
file = omcore.File(
384444
iri=iri,
385445
content_description=content_description,
386446
data_types=data_types,
387447
file_repository=file_repository,
388448
format=file_format,
389449
hashes=hashes,
390-
# is_part_of=file_bundels
450+
is_part_of=file2file_bundle_dic[str(
451+
pathlib.Path(path))],
391452
name=name,
392453
# special_usage_role
393454
storage_size=storage_size_obj,

test/test_bids_examples.py

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,18 @@
44
import bids2openminds.converter
55

66

7-
# Dataset information in following order dataset_label, dataset_subject_number, dataset_subject_state_number, dataset_person_number, dataset_files_number, dataset_behavioral_protocol_number
8-
example_dataset = [("ds003", 13, 13, 2, 58, 1),
9-
("ds000247", 6, 10, 5, 202, 2),
7+
# Dataset information in following order dataset_label, dataset_subject_number, dataset_subject_state_number, dataset_person_number, dataset_files_number, dataset_file_bundles_number, dataset_behavioral_protocol_number
8+
example_dataset = [("ds003", 13, 13, 2, 58, 39, 1),
9+
("ds000247", 6, 10, 5, 202, 41, 2),
1010
# The authors list in 'eeg_cbm' contains non person entities 2 is not correct name (issue raied #43)
11-
("eeg_cbm", 20, 20, 2, 104, 1),
12-
("asl001", 1, 1, 2, 8, 0),
11+
("eeg_cbm", 20, 20, 2, 104, 40, 1),
12+
("asl001", 1, 1, 2, 8, 3, 0),
1313
# Number of files in 'eeg_rest_fmri' is not correct as it doesn't contain files in derivated (issue raied #42)
14-
("eeg_rest_fmri", 3, 3, 6, 46, 1)]
14+
("eeg_rest_fmri", 3, 3, 6, 46, 22, 1)]
1515

1616

17-
@pytest.mark.parametrize("dataset_label, dataset_subject_number, dataset_subject_state_number, dataset_person_number, dataset_files_number, dataset_behavioral_protocol_number", example_dataset)
18-
def test_example_datasets(dataset_label, dataset_subject_number, dataset_subject_state_number, dataset_person_number, dataset_files_number, dataset_behavioral_protocol_number):
17+
@pytest.mark.parametrize("dataset_label, dataset_subject_number, dataset_subject_state_number, dataset_person_number, dataset_files_number, dataset_file_bundles_number, dataset_behavioral_protocol_number", example_dataset)
18+
def test_example_datasets(dataset_label, dataset_subject_number, dataset_subject_state_number, dataset_person_number, dataset_files_number, dataset_file_bundles_number, dataset_behavioral_protocol_number):
1919
test_dir = os.path.join("bids-examples", dataset_label)
2020
bids2openminds.converter.convert(test_dir, save_output=True)
2121
c = Collection()
@@ -25,23 +25,26 @@ def test_example_datasets(dataset_label, dataset_subject_number, dataset_subject
2525
subject_state_number = 0
2626
person_number = 0
2727
files_number = 0
28+
file_bundles_number = 0
2829
behavioral_protocol_number = 0
2930

3031
for item in c:
31-
match item.type_:
32-
case "https://openminds.ebrains.eu/core/Subject":
33-
subject_number += 1
34-
case "https://openminds.ebrains.eu/core/SubjectState":
35-
subject_state_number += 1
36-
case "https://openminds.ebrains.eu/core/Person":
37-
person_number += 1
38-
case "https://openminds.ebrains.eu/core/File":
39-
files_number += 1
40-
case "https://openminds.ebrains.eu/core/BehavioralProtocol":
41-
behavioral_protocol_number += 1
32+
if item.type_ == "https://openminds.ebrains.eu/core/Subject":
33+
subject_number += 1
34+
if item.type_ == "https://openminds.ebrains.eu/core/SubjectState":
35+
subject_state_number += 1
36+
if item.type_ == "https://openminds.ebrains.eu/core/Person":
37+
person_number += 1
38+
if item.type_ == "https://openminds.ebrains.eu/core/File":
39+
files_number += 1
40+
if item.type_ == "https://openminds.ebrains.eu/core/FileBundle":
41+
file_bundles_number += 1
42+
if item.type_ == "https://openminds.ebrains.eu/core/BehavioralProtocol":
43+
behavioral_protocol_number += 1
4244

4345
assert dataset_subject_number == subject_number
4446
assert dataset_subject_state_number == subject_state_number
4547
assert dataset_person_number == person_number
4648
assert dataset_files_number == files_number
49+
assert dataset_file_bundles_number == file_bundles_number
4750
assert dataset_behavioral_protocol_number == behavioral_protocol_number

test/test_example_datasets_click.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import os
2+
import shutil
23
from openminds import Collection
34
from bids2openminds.converter import convert_click
45
from click.testing import CliRunner
56

6-
(test_data_set, number_of_openminds_files) = ("ds003", 102)
7+
(test_data_set, number_of_openminds_files) = ("ds003", 141)
78

89

910
def test_example_datasets_click():
@@ -17,10 +18,12 @@ def test_example_datasets_click():
1718

1819
def test_example_datasets_click_seperate_files():
1920
test_dir = os.path.join("bids-examples", test_data_set)
21+
path_openminds = os.path.join(test_dir, "openminds")
22+
if os.path.isdir(path_openminds):
23+
shutil.rmtree(path_openminds)
2024
runner = CliRunner()
2125
result = runner.invoke(convert_click, ["--multiple-files", test_dir])
2226
assert result.exit_code == 0
23-
path_openminds = os.path.join(test_dir, "openminds")
2427
numer_of_files = len(os.listdir(path_openminds))
2528
assert numer_of_files == number_of_openminds_files
2629

test/test_file_bundle.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import pytest
2+
import os
3+
import pathlib
4+
from openminds import Collection
5+
from bids2openminds.main import create_file_bundle
6+
7+
(test_data_set, number_of_openminds_bundle) = (
8+
"ds007", 60)
9+
10+
# example_file_filebundle = ([list of the path to this file],[expected file bundle])
11+
example_file_filebundle = ((["sub-04", "anat", "sub-04_inplaneT2.nii.gz"], ["sub-04/anat"]),
12+
(["sub-03", "func", "sub-03_task-stopsignalwithmanualresponse_run-02_events.tsv"], ["sub-03/func"]))
13+
# example_folder_filebundle= ([list of path to this folder],[expected folder name],[expected parent filebundle (None for highest level file bundles)])
14+
example_folder_filebundle = ((["sub-04", "anat"], ["sub-04/anat"], ["sub-04"]),
15+
(["sub-01"], ["sub-01"], [None]))
16+
17+
18+
@pytest.fixture(scope="session")
19+
def test_dir():
20+
test_dir = os.path.join("bids-examples", test_data_set)
21+
return test_dir
22+
23+
24+
def path_name(path):
25+
name = str(path).replace("\\", "/")
26+
return name
27+
28+
29+
def example_path(test_dir, path_list):
30+
path = test_dir
31+
for item in path_list:
32+
path = os.path.join(path, item)
33+
return path
34+
35+
36+
@pytest.fixture(scope="session")
37+
def generate_file_bundle_collection(test_dir):
38+
collection = Collection()
39+
file_bundles, _, file_repository = create_file_bundle(
40+
test_dir, test_dir, collection, is_file_repository=True)
41+
return file_bundles, collection, file_repository
42+
43+
44+
def test_file_bundles_type(generate_file_bundle_collection):
45+
file_bundles, _, _ = generate_file_bundle_collection
46+
assert type(file_bundles) is dict
47+
48+
49+
def test_number_file_bundle(generate_file_bundle_collection):
50+
_, collection, _ = generate_file_bundle_collection
51+
m = 0
52+
for item in collection:
53+
if item.type_ == "https://openminds.ebrains.eu/core/FileBundle":
54+
m += 1
55+
assert m == number_of_openminds_bundle
56+
57+
58+
@pytest.mark.parametrize("path_list, bundle", example_file_filebundle)
59+
def test_random_file(test_dir, path_list, bundle, generate_file_bundle_collection):
60+
61+
file_bundles, _, _ = generate_file_bundle_collection
62+
63+
example_file_path = example_path(test_dir, path_list)
64+
65+
test_bundles = file_bundles[example_file_path]
66+
67+
assert len(test_bundles) == len(bundle)
68+
69+
for test_bundle in test_bundles:
70+
assert test_bundle.name in bundle
71+
72+
73+
@pytest.mark.parametrize("path_list, bundle, parent_bundle", example_folder_filebundle)
74+
def test_random_folder(test_dir, path_list, bundle, parent_bundle, generate_file_bundle_collection):
75+
76+
_, collection, _ = generate_file_bundle_collection
77+
78+
file_repository_iri = pathlib.Path(test_dir).absolute().as_uri()
79+
80+
folder_name = path_name(example_path("", path_list))
81+
82+
dataset_bundle = None
83+
84+
for item in collection:
85+
if item.type_ == "https://openminds.ebrains.eu/core/FileBundle" and item.name == folder_name:
86+
# detects only one file bundle have this name
87+
assert dataset_bundle is None
88+
dataset_bundle = item
89+
# asserts at least one file bundle have this name
90+
assert dataset_bundle is not None
91+
92+
assert dataset_bundle.name == bundle[0]
93+
94+
if parent_bundle is not None:
95+
assert dataset_bundle.is_part_of.name == parent_bundle[0]
96+
else:
97+
assert dataset_bundle.is_part_of.type_ == "https://openminds.ebrains.eu/core/FileRepository"
98+
assert dataset_bundle.is_part_of.iri.value == file_repository_iri

0 commit comments

Comments
 (0)