Skip to content

Commit e224f91

Browse files
authored
Merge pull request #607 from ISA-tools/develop
Patch for support additional Data File types closes #605 closes #608 closes #609
2 parents 47962b8 + 543c5cc commit e224f91

13 files changed

Lines changed: 207 additions & 14 deletions

File tree

isatools/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"Acquisition Parameter Data File",
2424
"Metabolite Assignment File",
2525
"Metabolite Identification File",
26+
"Normalization Name"
2627
]
2728

2829
_LABELS_DATA_NODES = [

isatools/isatab/validate/rules/rules_40xx.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,7 @@ def load_table_checks(df, filename):
448448
"Hybridization Assay Name",
449449
"Array Design REF",
450450
"Scan Name",
451+
"Normalization Name",
451452
"Data Transformation Name",
452453
]
453454
and not _RX_PARAMETER_VALUE.match(col)

isatools/model/datafile.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,42 @@ def __ne__(self, other):
202202
return not self == other
203203

204204

205+
class ImageFile(DataFile):
206+
"""Represents an Image file in an experimental graph."""
207+
208+
def __init__(self, filename="", id_="", generated_from=None, comments=None):
209+
super().__init__(filename=filename, id_=id_, generated_from=generated_from, comments=comments)
210+
211+
self.label = "Image File"
212+
213+
def __repr__(self):
214+
return (
215+
"isatools.model.ImageFile(filename='{0.filename}', "
216+
"generated_from={0.generated_from}, comments={0.comments})".format(self)
217+
)
218+
219+
def __str__(self):
220+
return """ImageFile(
221+
filename={data_file.filename}
222+
generated_from={num_generated_from} Sample objects
223+
comments={num_comments} Comment objects
224+
)""".format(data_file=self, num_generated_from=len(self.generated_from), num_comments=len(self.comments))
225+
226+
def __hash__(self):
227+
return hash(repr(self))
228+
229+
def __eq__(self, other):
230+
return (
231+
isinstance(other, ImageFile)
232+
and self.filename == other.filename
233+
and self.generated_from == other.generated_from
234+
and self.comments == other.comments
235+
)
236+
237+
def __ne__(self, other):
238+
return not self == other
239+
240+
205241
class RawSpectralDataFile(DataFile):
206242
"""Represents a raw spectral data file in an experimental graph."""
207243

@@ -428,6 +464,44 @@ def __ne__(self, other):
428464
return not self == other
429465

430466

467+
class MetaboliteAssignmentFile(DataFile):
468+
"""Represents a metabolite assignment file in an experimental graph."""
469+
470+
def __init__(self, filename="", id_="", generated_from=None, comments=None):
471+
super().__init__(filename=filename, id_=id_, generated_from=generated_from, comments=comments)
472+
473+
self.label = "Metabolite Assignment File"
474+
475+
def __repr__(self):
476+
return (
477+
"isatools.model.MetaboliteAssignmentFile("
478+
"filename='{data_file.filename}', "
479+
"generated_from={data_file.generated_from}, "
480+
"comments={data_file.comments})".format(data_file=self)
481+
)
482+
483+
def __str__(self):
484+
return """MetaboliteAssignmentFile(
485+
filename={data_file.filename}
486+
generated_from={num_generated_from} Sample objects
487+
comments={num_comments} Comment objects
488+
)""".format(data_file=self, num_generated_from=len(self.generated_from), num_comments=len(self.comments))
489+
490+
def __hash__(self):
491+
return hash(repr(self))
492+
493+
def __eq__(self, other):
494+
return (
495+
isinstance(other, MetaboliteAssignmentFile)
496+
and self.filename == other.filename
497+
and self.generated_from == other.generated_from
498+
and self.comments == other.comments
499+
)
500+
501+
def __ne__(self, other):
502+
return not self == other
503+
504+
431505
class DerivedArrayDataMatrixFile(DataFile):
432506
"""Represents a derived array data matrix file in an experimental graph."""
433507

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,50 @@
1-
<isatab-config-file xmlns="http://www.ebi.ac.uk/bii/isatab_configuration#"><isatab-configuration table-name="histology" isatab-assay-type="generic_assay" isatab-conversion-target="generic"><measurement term-label="histology" term-accession="http://purl.obolibrary.org/obo/OBI_0600020" source-abbreviation="OBI"/><technology term-label="" term-accession="" source-abbreviation=""/><field header="Sample Name" data-type="String" is-file-field="false" is-multiple-value="false" is-required="true" is-hidden="false" is-forced-ontology="false"><description><![CDATA[Samples represent major outputs resulting from a protocol application other than the special case outputs of Extract or a Labeled Extract.]]></description><default-value><![CDATA[]]></default-value></field><protocol-field protocol-type ="histology"/><field header="Assay Name" data-type="String" is-file-field="false" is-multiple-value="false" is-required="true" is-hidden="false" is-forced-ontology="false"><description><![CDATA[User-defined name for an assay.]]></description><default-value><![CDATA[]]></default-value><generated-value-template>[INSTITUTION].Group-[GROUP_NO].Subject-[SUBJECT_NO].[SAMPLE_EXTRACT].Extract-[EXTRACT_COUNT].LE-[LABEL_COUNT].ASSAY-[HYB_COUNT]</generated-value-template></field><field header="Raw Data File" data-type="String" is-file-field="true" is-multiple-value="false" is-required="false" is-hidden="false" is-forced-ontology="false"><description><![CDATA[Name (or URI) of the data file generated by an assay]]></description><default-value><![CDATA[]]></default-value></field><field header="Image File" data-type="String" is-file-field="true" is-multiple-value="false" is-required="false" is-hidden="false" is-forced-ontology="false"><description><![CDATA[Name (or URI) of the image files generated by an assay.]]></description><default-value><![CDATA[]]></default-value><generated-value-template>[INSTITUTION].Group-[GROUP_NO].Subject-[SUBJECT_NO].[SAMPLE_EXTRACT]</generated-value-template></field><field header="Normalization Name" data-type="String" is-file-field="false" is-multiple-value="false" is-required="false" is-hidden="false" is-forced-ontology="false"><description><![CDATA[User-defined name for each normalization applied]]></description><default-value><![CDATA[]]></default-value></field><field header="Data Transformation Name" data-type="String" is-file-field="false" is-multiple-value="false" is-required="false" is-hidden="false" is-forced-ontology="false"><description><![CDATA[User-defined name for each data transformation applied]]></description><default-value><![CDATA[]]></default-value></field><field header="Derived Data File" data-type="String" is-file-field="true" is-multiple-value="false" is-required="false" is-hidden="false" is-forced-ontology="false"><description><![CDATA[Name (or URI) of the data file generated by an assay.]]></description><default-value><![CDATA[]]></default-value></field><structured-field name="factors"/></isatab-configuration></isatab-config-file>
1+
<isatab-config-file xmlns="http://www.ebi.ac.uk/bii/isatab_configuration#">
2+
<isatab-configuration table-name="histology" isatab-assay-type="generic_assay" isatab-conversion-target="generic">
3+
<measurement term-label="histology" term-accession="http://purl.obolibrary.org/obo/OBI_0600020"
4+
source-abbreviation="OBI"/>
5+
<technology term-label="" term-accession="" source-abbreviation=""/>
6+
<field header="Sample Name" data-type="String" is-file-field="false" is-multiple-value="false"
7+
is-required="true" is-hidden="false" is-forced-ontology="false">
8+
<description>
9+
<![CDATA[Samples represent major outputs resulting from a protocol application other than the special case outputs of Extract or a Labeled Extract.]]></description>
10+
<default-value><![CDATA[]]></default-value>
11+
</field>
12+
<protocol-field protocol-type="histology"/>
13+
<field header="Assay Name" data-type="String" is-file-field="false" is-multiple-value="false" is-required="true"
14+
is-hidden="false" is-forced-ontology="false">
15+
<description><![CDATA[User-defined name for an assay.]]></description>
16+
<default-value><![CDATA[]]></default-value>
17+
<generated-value-template>
18+
[INSTITUTION].Group-[GROUP_NO].Subject-[SUBJECT_NO].[SAMPLE_EXTRACT].Extract-[EXTRACT_COUNT].LE-[LABEL_COUNT].ASSAY-[HYB_COUNT]
19+
</generated-value-template>
20+
</field>
21+
<field header="Raw Data File" data-type="String" is-file-field="true" is-multiple-value="false"
22+
is-required="false" is-hidden="false" is-forced-ontology="false">
23+
<description><![CDATA[Name (or URI) of the data file generated by an assay]]></description>
24+
<default-value><![CDATA[]]></default-value>
25+
</field>
26+
<field header="Image File" data-type="String" is-file-field="true" is-multiple-value="false" is-required="false"
27+
is-hidden="false" is-forced-ontology="false">
28+
<description><![CDATA[Name (or URI) of the image files generated by an assay.]]></description>
29+
<default-value><![CDATA[]]></default-value>
30+
<generated-value-template>[INSTITUTION].Group-[GROUP_NO].Subject-[SUBJECT_NO].[SAMPLE_EXTRACT]
31+
</generated-value-template>
32+
</field>
33+
<field header="Normalization Name" data-type="String" is-file-field="false" is-multiple-value="false"
34+
is-required="false" is-hidden="false" is-forced-ontology="false">
35+
<description><![CDATA[User-defined name for each normalization applied]]></description>
36+
<default-value><![CDATA[]]></default-value>
37+
</field>
38+
<field header="Data Transformation Name" data-type="String" is-file-field="false" is-multiple-value="false"
39+
is-required="false" is-hidden="false" is-forced-ontology="false">
40+
<description><![CDATA[User-defined name for each data transformation applied]]></description>
41+
<default-value><![CDATA[]]></default-value>
42+
</field>
43+
<field header="Derived Data File" data-type="String" is-file-field="true" is-multiple-value="false"
44+
is-required="false" is-hidden="false" is-forced-ontology="false">
45+
<description><![CDATA[Name (or URI) of the data file generated by an assay.]]></description>
46+
<default-value><![CDATA[]]></default-value>
47+
</field>
48+
<structured-field name="factors"/>
49+
</isatab-configuration>
50+
</isatab-config-file>
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<isatab-config-file xmlns="http://www.ebi.ac.uk/bii/isatab_configuration#">
2+
<isatab-configuration table-name="phenotyping_imaging" isatab-assay-type="generic_assay"
3+
isatab-conversion-target="generic">
4+
<measurement term-label="phenotyping analysis"
5+
term-accession="" source-abbreviation=""/>
6+
<technology term-label="imaging" term-accession="" source-abbreviation=""/>
7+
<field header="Sample Name" data-type="String" is-file-field="false" is-multiple-value="true" is-required="true"
8+
is-hidden="false" is-forced-ontology="false">
9+
<description>
10+
<![CDATA[Samples represent major outputs resulting from a protocol application other than the special case outputs of Extract or a Labeled Extract.]]></description>
11+
<default-value><![CDATA[]]></default-value>
12+
<generated-value-template>[INSTITUTION].Group-[GROUP_NO].Subject-[SUBJECT_NO].[SAMPLE_EXTRACT]
13+
</generated-value-template>
14+
</field>
15+
<protocol-field protocol-type="imaging"/>
16+
<field header="Assay Name" data-type="String" is-file-field="false" is-multiple-value="false" is-required="true"
17+
is-hidden="false" is-forced-ontology="false">
18+
<description><![CDATA[User-defined name for an assay.]]></description>
19+
<default-value><![CDATA[]]></default-value>
20+
<generated-value-template>
21+
[INSTITUTION].Group-[GROUP_NO].Subject-[SUBJECT_NO].[SAMPLE_EXTRACT].Extract-[EXTRACT_COUNT].LE-[LABEL_COUNT].ASSAY-[HYB_COUNT]
22+
</generated-value-template>
23+
</field>
24+
<field header="Image File" data-type="String" is-file-field="true" is-multiple-value="false"
25+
is-required="true" is-hidden="false" is-forced-ontology="false">
26+
<description><![CDATA[Name (or URI) of the data file generated by an assay.]]></description>
27+
<default-value><![CDATA[]]></default-value>
28+
</field>
29+
<field header="Normalization Name" data-type="String" is-file-field="false" is-multiple-value="false"
30+
is-required="false" is-hidden="false" is-forced-ontology="false">
31+
<description><![CDATA[User-defined name for each normalization applied]]></description>
32+
<default-value><![CDATA[]]></default-value>
33+
</field>
34+
<field header="Data Transformation Name" data-type="String" is-file-field="false" is-multiple-value="false"
35+
is-required="false" is-hidden="false" is-forced-ontology="false">
36+
<description><![CDATA[User-defined name for each data transformation applied.]]></description>
37+
<default-value><![CDATA[]]></default-value>
38+
</field>
39+
<field header="Derived Data File" data-type="String" is-file-field="true" is-multiple-value="false"
40+
is-required="false" is-hidden="false" is-forced-ontology="false">
41+
<description><![CDATA[Name (or URI) of the data file generated by an assay.]]></description>
42+
<default-value><![CDATA[]]></default-value>
43+
</field>
44+
<structured-field name="factors"/>
45+
</isatab-configuration>
46+
</isatab-config-file>

isatools/resources/schemas/v1.0.1/assay_schema.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"dataFiles" : {
2121
"type": "array",
2222
"items" : {
23-
"$ref": "data_schema.json#"
23+
"$ref": "data_file_schema.json#"
2424
}
2525
},
2626
"materials": {
@@ -68,4 +68,4 @@
6868
}
6969
},
7070
"additionalProperties": false
71-
}
71+
}

isatools/resources/schemas/v1.0.1/data_schema.json renamed to isatools/resources/schemas/v1.0.1/data_file_schema.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"id": "https://raw.githubusercontent.com/ISA-tools/isa-api/master/isatools/resources/schemas/v1.0.1/data_schema.json",
2+
"id": "https://raw.githubusercontent.com/ISA-tools/isa-api/master/isatools/resources/schemas/v1.0.1/data_file_schema.json",
33
"$schema": "https://json-schema.org/draft/2020-12/schema",
44
"title": "ISA Data schema",
55
"name" : "ISA Data schema",
@@ -40,4 +40,4 @@
4040
}
4141
},
4242
"additionalProperties": false
43-
}
43+
}

isatools/resources/schemas/v1.0.1/process_schema.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
"$ref": "sample_schema.json#"
4848
},
4949
{
50-
"$ref": "data_schema.json#"
50+
"$ref": "data_file_schema.json#"
5151
},
5252
{
5353
"$ref": "material_schema.json#"
@@ -63,7 +63,7 @@
6363
"$ref": "sample_schema.json#"
6464
},
6565
{
66-
"$ref": "data_schema.json#"
66+
"$ref": "data_file_schema.json#"
6767
},
6868
{
6969
"$ref": "material_schema.json#"
@@ -79,4 +79,4 @@
7979
}
8080
},
8181
"additionalProperties": false
82-
}
82+
}

tests/convert/test_mzml2isa.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def test_mzml2isa_convert_investigation(self):
2424
os.path.join(self._mzml_data_dir, study_id + "-partial"), self._tmp_dir, study_id, validate_output=True
2525
)
2626
self.assertEqual(len(report["warnings"]), 8)
27-
self.assertEqual(len(report["errors"]), 3)
27+
self.assertEqual(len(report["errors"]), 2)
2828

2929
# Strip out the line with Comment[Created With Tool] to avoid changes in version number generated by mzml2isa
3030
with open(os.path.join(self._tmp_dir, "i_Investigation.txt")) as in_fp, StringIO() as stripped_actual_file:
@@ -45,7 +45,7 @@ def test_mzml2isa_convert_study_table(self):
4545
os.path.join(self._mzml_data_dir, study_id + "-partial"), self._tmp_dir, study_id, validate_output=True
4646
)
4747
self.assertEqual(len(report["warnings"]), 8)
48-
self.assertEqual(len(report["errors"]), 3)
48+
self.assertEqual(len(report["errors"]), 2)
4949
with open(os.path.join(self._tmp_dir, "s_{}.txt".format(study_id))) as out_fp:
5050
with open(
5151
os.path.join(self._tab_data_dir, study_id + "-partial", "s_{}.txt".format(study_id))
@@ -59,7 +59,7 @@ def test_mzml2isa_convert_assay_table(self):
5959
)
6060
self.assertTrue(report["validation_finished"])
6161
self.assertEqual(len(report["warnings"]), 8)
62-
self.assertEqual(len(report["errors"]), 3)
62+
self.assertEqual(len(report["errors"]), 2)
6363
with open(
6464
os.path.join(self._tmp_dir, "a_{}_metabolite_profiling_mass_spectrometry.txt".format(study_id))
6565
) as out_fp:

tests/isajson/test_isajson.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,18 @@ def test_json_load_and_dump_bii_s_test(self):
460460
assays = [a for a in studies["assays"] if a["filename"] == "a_assay.txt"][0]
461461
self.assertEqual(assays["materials"]["otherMaterials"][1]["type"], "Extract Name")
462462

463+
464+
def test_json_load_and_dump_imagefile_test(self):
465+
# Load into ISA objects
466+
with open(os.path.join(utils.JSON_DATA_DIR, "ISA-Image", "isa-image.json")) as isajson_fp:
467+
investigation = isajson.load(isajson_fp)
468+
469+
# Dump into ISA JSON from ISA objects
470+
investigation_reload = json.loads(json.dumps(investigation, cls=isajson.ISAJSONEncoder))
471+
studies = [s for s in investigation_reload["studies"] if s["filename"] == "s_study.txt"][0]
472+
assays = [a for a in studies["assays"] if a["filename"] == "a_assay.txt"][0]
473+
self.assertEqual(assays["dataFiles"][1]["type"], "Image File")
474+
463475
def test_json_load_and_dump_isa_labeled_extract(self):
464476
# Load into ISA objects
465477
with open(os.path.join(utils.JSON_DATA_DIR, "TEST-ISA-LabeledExtract1", "isa-test-le1.json")) as isajson_fp:

0 commit comments

Comments
 (0)