Skip to content

Commit 4184842

Browse files
author
oerc0042
committed
code review and suggested alterations
1 parent 52473bb commit 4184842

15 files changed

Lines changed: 1078 additions & 714 deletions

File tree

isatools/create/assay_templates.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def create_new_ontology_annotation(term_name):
148148
]
149149
)
150150

151-
chip_seq_dict = rna_seq_dic = OrderedDict(
151+
chip_seq_dict = OrderedDict(
152152
[
153153
("measurement_type", "chromatin modification profiling"),
154154
("technology_type", "nucleic acid sequencing"),

isatools/create/connectors.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections import OrderedDict
2+
from copy import deepcopy
23

34
from isatools.create.constants import (
45
BASE_FACTORS,
@@ -232,7 +233,8 @@ def generate_assay_ord_dict_from_config(datascriptor_assay_config, arm_name, epo
232233
)
233234
for name, node in datascriptor_assay_config["workflow"]:
234235
prepared_nodes = None
235-
assert isinstance(node, dict)
236+
if not isinstance(node, dict):
237+
raise TypeError("Each workflow node must be a dictionary. {} was provided.".format(type(node).__name__))
236238
if "#replicates" in node:
237239
# this is a ProtocolNode
238240
prepared_nodes = {}
@@ -336,7 +338,7 @@ def generate_study_design(datascriptor_study_config):
336338
)
337339
arm_map[cell] = sa_plan
338340
source_type = Characteristic(
339-
category=DEFAULT_SOURCE_TYPE.category,
341+
category=deepcopy(DEFAULT_SOURCE_TYPE.category),
340342
value=_map_ontology_annotation(
341343
arm_dict.get("subjectType", None) or study_design_config.get("subjectType", None)
342344
),

isatools/create/constants.py

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NON TREATMENT TYPES
22
import os
3+
from functools import lru_cache
34

45
import yaml
56

@@ -92,34 +93,48 @@
9293
LABELED_EXTRACT_PREFIX = "LBLEXTR"
9394
ASSAY_GRAPH_PREFIX = "AT" # AT stands for Assay Type
9495

95-
with open(
96-
os.path.join(os.path.dirname(__file__), "..", "resources", "config", "yaml", "study-creator-config.yml")
97-
) as yaml_file:
98-
yaml_config = yaml.load(yaml_file, Loader=yaml.FullLoader)
96+
_YAML_CONFIG_DIR = os.path.join(os.path.dirname(__file__), "..", "resources", "config", "yaml")
97+
_STUDY_CREATOR_CONFIG_PATH = os.path.join(_YAML_CONFIG_DIR, "study-creator-config.yml")
98+
_ASSAY_OPTIONS_CONFIG_PATH = os.path.join(_YAML_CONFIG_DIR, "assay-options.yml")
99+
100+
101+
@lru_cache(maxsize=1)
102+
def get_study_creator_config():
103+
with open(_STUDY_CREATOR_CONFIG_PATH, encoding="utf-8") as yaml_file:
104+
return yaml.load(yaml_file, Loader=yaml.FullLoader)
105+
106+
107+
@lru_cache(maxsize=1)
108+
def get_assay_options():
109+
with open(_ASSAY_OPTIONS_CONFIG_PATH, encoding="utf-8") as yaml_file:
110+
return yaml.load(yaml_file, Loader=yaml.FullLoader)
111+
112+
113+
yaml_config = get_study_creator_config()
99114
default_ontology_source_reference = OntologySource(**yaml_config["study"]["ontology_source_references"][1])
100115

101116
# constants specific to the sampling plan in the study generation from the study design
102117
RUN_ORDER = yaml_config["study"]["protocols"][0]["parameters"][0]
103118
STUDY_CELL = yaml_config["study"]["protocols"][0]["parameters"][1]
104119

105-
with open(
106-
os.path.join(os.path.dirname(__file__), "..", "resources", "config", "yaml", "assay-options.yml")
107-
) as yaml_file:
108-
assays_opts = yaml.load(yaml_file, Loader=yaml.FullLoader)
120+
assays_opts = get_assay_options()
109121

122+
def get_default_source_type(
123+
term="Human",
124+
term_accession="http://purl.obolibrary.org/obo/NCIT_C14225",
125+
term_source=default_ontology_source_reference,
126+
):
127+
return Characteristic(
128+
category=OntologyAnnotation(
129+
term="Study Subject",
130+
term_source=default_ontology_source_reference,
131+
term_accession="http://purl.obolibrary.org/obo/NCIT_C41189",
132+
),
133+
value=OntologyAnnotation(term=term, term_source=term_source, term_accession=term_accession),
134+
)
110135

111-
DEFAULT_SOURCE_TYPE = Characteristic(
112-
category=OntologyAnnotation(
113-
term="Study Subject",
114-
term_source=default_ontology_source_reference,
115-
term_accession="http://purl.obolibrary.org/obo/NCIT_C41189",
116-
),
117-
value=OntologyAnnotation(
118-
term="Human",
119-
term_source=default_ontology_source_reference,
120-
term_accession="http://purl.obolibrary.org/obo/NCIT_C14225",
121-
),
122-
)
136+
137+
DEFAULT_SOURCE_TYPE = get_default_source_type()
123138

124139
DEFAULT_LABEL = Characteristic(
125140
category=OntologyAnnotation(

isatools/create/model.py

Lines changed: 52 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import itertools
1010
import json
1111
import logging
12-
import os
1312
import re
1413
import uuid
1514
from abc import ABC
@@ -20,7 +19,6 @@
2019
from numbers import Number
2120

2221
import networkx as nx
23-
import yaml
2422

2523
from isatools.create import errors
2624
from isatools.create.constants import (
@@ -57,6 +55,7 @@
5755
WASHOUT,
5856
ZFILL_WIDTH,
5957
assays_opts,
58+
get_study_creator_config,
6059
)
6160
from isatools.model import (
6261
AcquisitionParameterDataFile,
@@ -98,6 +97,23 @@
9897

9998
__author__ = "massi"
10099

100+
_ALLOWED_DATA_FILE_CLASSES = {
101+
RawDataFile,
102+
RawSpectralDataFile,
103+
ArrayDataFile,
104+
FreeInductionDecayDataFile,
105+
DerivedDataFile,
106+
DerivedSpectralDataFile,
107+
DerivedArrayDataFile,
108+
ProteinAssignmentFile,
109+
PeptideAssignmentFile,
110+
DerivedArrayDataMatrixFile,
111+
PostTranslationalModificationAssignmentFile,
112+
AcquisitionParameterDataFile,
113+
}
114+
115+
_DATA_FILE_CLASS_BY_NAME = {cls.__name__: cls for cls in _ALLOWED_DATA_FILE_CLASSES}
116+
101117

102118
def intersperse(lst, item):
103119
"""
@@ -215,8 +231,9 @@ def duration(self):
215231
def update_duration(self, duration_value, duration_unit=None):
216232
if not isinstance(duration_value, Number):
217233
raise ValueError("duration_value must be a Number. Value provided is {0}".format(duration_value))
218-
self.__duration.value = duration_value
219-
self.__duration.unit = duration_unit
234+
duration_factor_value = self.duration
235+
duration_factor_value.value = duration_value
236+
duration_factor_value.unit = duration_unit
220237

221238

222239
class Treatment(Element):
@@ -298,8 +315,9 @@ def duration(self):
298315
def update_duration(self, duration_value, duration_unit=None):
299316
if not isinstance(duration_value, Number):
300317
raise ValueError("duration_value must be a Number. Value provided is {0}".format(duration_value))
301-
self.__duration.value = duration_value
302-
self.__duration.unit = duration_unit
318+
duration_factor_value = self.duration
319+
duration_factor_value.value = duration_value
320+
duration_factor_value.unit = duration_unit
303321

304322

305323
class StudyCell(object):
@@ -2509,8 +2527,14 @@ def _generate_isa_elements_from_node(
25092527
for process in processes[::-1]
25102528
if process.executes_protocol == previous_protocol_node
25112529
)
2512-
assert isinstance(previous_process, Process)
2513-
assert isinstance(item, Process)
2530+
if not isinstance(previous_process, Process):
2531+
raise TypeError(
2532+
"Expected Process instance for previous_process, got {}".format(
2533+
type(previous_process).__name__
2534+
)
2535+
)
2536+
if not isinstance(item, Process):
2537+
raise TypeError("Expected Process instance for item, got {}".format(type(item).__name__))
25142538
log.debug("linking process {0} to process {1}".format(previous_process.name, item.name))
25152539
plink(previous_process, item) # TODO check if this generates any issue
25162540

@@ -2530,8 +2554,14 @@ def _generate_isa_elements_from_node(
25302554
for process in processes[::-1]
25312555
if process.executes_protocol == previous_protocol_node
25322556
)
2533-
assert isinstance(previous_process, Process)
2534-
assert isinstance(item, Process)
2557+
if not isinstance(previous_process, Process):
2558+
raise TypeError(
2559+
"Expected Process instance for previous_process, got {}".format(
2560+
type(previous_process).__name__
2561+
)
2562+
)
2563+
if not isinstance(item, Process):
2564+
raise TypeError("Expected Process instance for item, got {}".format(type(item).__name__))
25352565
log.debug("linking process {0} to process {1}".format(previous_process.name, item.name))
25362566
plink(previous_process, item) # TODO check if this generates any issue
25372567
return processes, other_materials, characteristic_categories, data_files, item, counter
@@ -2675,22 +2705,8 @@ def _isa_objects_factory(
26752705
log.debug(
26762706
"Assay conf. found: {}; {}; {};".format(measurement_type, technology_type, curr_assay_opt)
26772707
)
2678-
isa_class = globals()[curr_assay_opt["raw data file"].replace(" ", "")]
2679-
assert isa_class in {
2680-
# expand this set if needed
2681-
RawDataFile,
2682-
RawSpectralDataFile,
2683-
ArrayDataFile,
2684-
FreeInductionDecayDataFile,
2685-
DerivedDataFile,
2686-
DerivedSpectralDataFile,
2687-
DerivedArrayDataFile,
2688-
ProteinAssignmentFile,
2689-
PeptideAssignmentFile,
2690-
DerivedArrayDataMatrixFile,
2691-
PostTranslationalModificationAssignmentFile,
2692-
AcquisitionParameterDataFile,
2693-
}
2708+
raw_data_file_name = curr_assay_opt["raw data file"].replace(" ", "")
2709+
isa_class = _DATA_FILE_CLASS_BY_NAME.get(raw_data_file_name, RawDataFile)
26942710
file_extension = ".{}".format(node.extension) if node.extension else ""
26952711
return isa_class(
26962712
filename="{}_S{}_DAE_R{}_{}{}".format(
@@ -2710,12 +2726,7 @@ def generate_isa_study(self, identifier=None):
27102726
this is the core method to return the fully populated ISA Study object from the StudyDesign
27112727
:return: isatools.model.Study
27122728
"""
2713-
with open(
2714-
os.path.join(os.path.dirname(__file__), "..", "resources", "config", "yaml", "study-creator-config.yml")
2715-
) as yaml_file:
2716-
config = yaml.load(yaml_file, Loader=yaml.FullLoader)
2717-
2718-
study_config = config["study"]
2729+
study_config = get_study_creator_config()["study"]
27192730
study = Study(
27202731
identifier=self.identifier or identifier or DEFAULT_STUDY_IDENTIFIER,
27212732
title=self.name,
@@ -2808,7 +2819,8 @@ def augment_study(cls, study, study_design, in_place=False):
28082819
:param in_place: boolean
28092820
:return:
28102821
"""
2811-
assert isinstance(in_place, bool)
2822+
if not isinstance(in_place, bool):
2823+
raise TypeError("in_place must be a boolean")
28122824
if not isinstance(study, Study):
28132825
raise TypeError("study must be a valid Study object")
28142826
if not isinstance(study_design, StudyDesign):
@@ -2818,7 +2830,8 @@ def augment_study(cls, study, study_design, in_place=False):
28182830
for cell, study_assay_plan in arm.arm_map.items():
28192831
if study_assay_plan:
28202832
for assay_graph in study_assay_plan.assay_plan:
2821-
assert isinstance(assay_graph, AssayGraph)
2833+
if not isinstance(assay_graph, AssayGraph):
2834+
raise TypeError("assay_graph must be a valid AssayGraph object")
28222835
if assay_graph.quality_control:
28232836
# CHECK the assumption here is that an assay file can unequivocally be identified
28242837
# by StudyCell name, corresponding AssayGraph id and measurement type
@@ -2929,7 +2942,8 @@ def _generate_quality_control_samples(
29292942
if not isinstance(quality_control, QualityControl):
29302943
raise TypeError()
29312944
qc_pre = quality_control.pre_run_sample_type
2932-
assert isinstance(qc_pre, ProductNode)
2945+
if not isinstance(qc_pre, ProductNode):
2946+
raise TypeError("quality_control.pre_run_sample_type must be a ProductNode")
29332947
cell_name = study_cell.name
29342948
for i in range(qc_pre.size):
29352949
dummy_source = QualityControlSource(
@@ -2976,7 +2990,8 @@ def _generate_quality_control_samples(
29762990
qc_samples_interspersed[(sample_node, interspersing_interval)].append(sample)
29772991
log.debug("Completed interspersed samples")
29782992
qc_post = quality_control.post_run_sample_type
2979-
assert isinstance(qc_post, ProductNode)
2993+
if not isinstance(qc_post, ProductNode):
2994+
raise TypeError("quality_control.post_run_sample_type must be a ProductNode")
29802995
for i in range(qc_post.size):
29812996
dummy_source = QualityControlSource(
29822997
name="SRC-QC-POST_{}_{}_{}".format(cell_name, SOURCE_QC_SOURCE_NAME, str(i).zfill(4))
@@ -2986,7 +3001,7 @@ def _generate_quality_control_samples(
29863001
name="SMP-QC-POST-{}_{}_{}".format(cell_name, QC_SAMPLE_NAME, str(i).zfill(4)),
29873002
factor_values=[],
29883003
characteristics=[
2989-
qc_post.characteristics if i < len(qc_post.characteristics) else qc_post.characteristics[-1]
3004+
qc_post.characteristics[i] if i < len(qc_post.characteristics) else qc_post.characteristics[-1]
29903005
],
29913006
derives_from=[dummy_source],
29923007
)

isatools/isajson/dump.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
from json import JSONEncoder
2-
3-
4-
class ISAJSONEncoder(JSONEncoder):
5-
def default(self, o):
6-
if hasattr(o, "to_dict"):
7-
method = getattr(o, "to_dict")
8-
if callable(method):
9-
return o.to_dict()
10-
return JSONEncoder.default(self, o)
1+
from json import JSONEncoder
2+
3+
4+
class ISAJSONEncoder(JSONEncoder):
5+
def default(self, o):
6+
if hasattr(o, "to_dict"):
7+
method = getattr(o, "to_dict")
8+
if callable(method):
9+
return o.to_dict()
10+
return JSONEncoder.default(self, o)

isatools/isajson/load.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
import json
2-
3-
from isatools.model import Investigation
4-
5-
6-
def load(fp):
7-
"""Loads an ISA-JSON file and returns an Investigation object.
8-
9-
:param fp: A file-like object or a string containing the JSON data.
10-
:return: An Investigation object.
11-
"""
12-
investigation_json = json.load(fp)
13-
investigation = Investigation()
14-
investigation.from_dict(investigation_json)
15-
return investigation
1+
import json
2+
3+
from isatools.model import Investigation
4+
5+
6+
def load(fp):
7+
"""Loads an ISA-JSON file and returns an Investigation object.
8+
9+
:param fp: A file-like object or a string containing the JSON data.
10+
:return: An Investigation object.
11+
"""
12+
investigation_json = json.load(fp)
13+
investigation = Investigation()
14+
investigation.from_dict(investigation_json)
15+
return investigation

0 commit comments

Comments
 (0)