|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": 1, |
| 6 | + "id": "e77e7aaf-2576-4c66-be66-b2f4bc41ae7e", |
| 7 | + "metadata": {}, |
| 8 | + "outputs": [ |
| 9 | + { |
| 10 | + "name": "stderr", |
| 11 | + "output_type": "stream", |
| 12 | + "text": [ |
| 13 | + "/Users/oerc0042/.pyenv/versions/3.13.5/envs/py313isa/lib/python3.13/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", |
| 14 | + " __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n" |
| 15 | + ] |
| 16 | + } |
| 17 | + ], |
| 18 | + "source": [ |
| 19 | + "from isatools import isatab\n", |
| 20 | + "from isatools.isajson import ISAJSONEncoder\n", |
| 21 | + "from isatools.convert import isatab2json\n", |
| 22 | + "from isatools import isajson\n", |
| 23 | + "from rdflib import Graph\n", |
| 24 | + "\n", |
| 25 | + "import os\n", |
| 26 | + "import json\n", |
| 27 | + "import isatools" |
| 28 | + ] |
| 29 | + }, |
| 30 | + { |
| 31 | + "cell_type": "code", |
| 32 | + "execution_count": 2, |
| 33 | + "id": "f80b3857-d16f-43c4-8b9d-e3f20001c54c", |
| 34 | + "metadata": {}, |
| 35 | + "outputs": [], |
| 36 | + "source": [ |
| 37 | + "# Read back the ISA-Tab \n", |
| 38 | + "\n", |
| 39 | + "# MTBLS718_117783_compressed_files\n", |
| 40 | + "\n", |
| 41 | + "# MTBLS1820_135004_compressed_files \n", |
| 42 | + "# MTBLS2289_155095_compressed_files\n", |
| 43 | + "# MTBLS3563_133388_compressed_files\n", |
| 44 | + "# MTBLS4381_183928_compressed_files\n", |
| 45 | + "data_dir = './MTBLS4381_183928_compressed_files/'\n", |
| 46 | + "\n", |
| 47 | + "with open(os.path.join(data_dir, 'i_Investigation.txt')) as f:\n", |
| 48 | + " loaded_investigation = isatab.load(f)\n" |
| 49 | + ] |
| 50 | + }, |
| 51 | + { |
| 52 | + "cell_type": "code", |
| 53 | + "execution_count": 3, |
| 54 | + "id": "311ab08b-1bcd-45bf-83ba-e5a3f8935d42", |
| 55 | + "metadata": {}, |
| 56 | + "outputs": [ |
| 57 | + { |
| 58 | + "name": "stdout", |
| 59 | + "output_type": "stream", |
| 60 | + "text": [ |
| 61 | + "Name: isatools\n", |
| 62 | + "Version: 0.14.3\n", |
| 63 | + "Summary: Metadata tracking tools help to manage an increasingly diverse set of life science, environmental and biomedical experiments\n", |
| 64 | + "Home-page: \n", |
| 65 | + "Author: ISA Infrastructure Team\n", |
| 66 | + "Author-email: ISA Infrastructure Team <isatools@googlegroups.com>\n", |
| 67 | + "License-Expression: CPAL-1.0\n", |
| 68 | + "Location: /Users/oerc0042/.pyenv/versions/3.13.5/envs/py313isa/lib/python3.13/site-packages\n", |
| 69 | + "Requires: beautifulsoup4, biopython, chardet, flask, flask-sqlalchemy, graphene, graphql-core, iso8601, jinja2, jsonschema, lxml, mzml2isa, networkx, numpy, openpyxl, pandas, progressbar2, pytest-timeout, python-dateutil, pyyaml, rdflib, requests, ruff, setuptools, sqlalchemy\n", |
| 70 | + "Required-by: \n" |
| 71 | + ] |
| 72 | + } |
| 73 | + ], |
| 74 | + "source": [ |
| 75 | + "!pip show isatools" |
| 76 | + ] |
| 77 | + }, |
| 78 | + { |
| 79 | + "cell_type": "code", |
| 80 | + "execution_count": 4, |
| 81 | + "id": "84aedb36-5154-48d1-9d1b-62905c6465ae", |
| 82 | + "metadata": {}, |
| 83 | + "outputs": [ |
| 84 | + { |
| 85 | + "data": { |
| 86 | + "text/plain": [ |
| 87 | + "\"<p>Testing the effect of blue mussel ensilage as a novel cost-efficient protein feed source for Atlantic salmon feed. Fermentation of novel raw materials for fish feed is a new practice with little prior experience of direct relevance to aquaculture and fish farming. In Trial A, an alternative marine compound is included in the feed, which is based on seaweed. In the trial were multiple inclusion levels used to investigate the effect of seaweed inclusion in an aquaculture setting.</p><p><br></p><p><strong>HoloFood Trial A</strong> - <strong>seaweed-dose response</strong> is reported in the current study <a href='https://www.ebi.ac.uk/metabolights/MTBLS4381' rel='noopener noreferrer' target='_blank'><strong>MTBLS4381</strong></a>.</p><p><strong>HoloFood Trial B - blue mussel-dose response</strong> is reported in <a href='https://www.ebi.ac.uk/metabolights/MTBLS4382' rel='noopener noreferrer' target='_blank'><strong>MTBLS4382</strong></a>.</p><p><strong>HoloFood Trial C</strong> - <strong>blue mussel ensilage-dose response</strong> is reported in <a href='https://www.ebi.ac.uk/metabolights/MTBLS4384' rel='noopener noreferrer' target='_blank'><strong>MTBLS4384</strong></a>.</p><p><strong>HoloFood Trial D</strong> - <strong>fermented seaweed open water-dose response</strong> is reported in <a href='https://www.ebi.ac.uk/metabolights/MTBLS6733' rel='noopener noreferrer' target='_blank'><strong>MTBLS6733</strong></a>.</p><p><strong>HoloFood Trial 1/2/3</strong> is reported in <a href='https://www.ebi.ac.uk/metabolights/MTBLS6988' rel='noopener noreferrer' target='_blank'><strong>MTBLS6988</strong></a>.</p><p><br></p><p><strong>Linked cross omic data sets:</strong></p><p>Nucleic acid data associated with this study are available in the European Nucleotide Archive (ENA): accession number <a href='https://www.ebi.ac.uk/ena/browser/view/PRJEB43192' rel='noopener noreferrer' target='_blank'>PRJEB43192</a>.</p><p>Metagenomic data associated with this study are available from MGnify under the Super Study '<a href='https://www.ebi.ac.uk/metagenomics/super-studies/holofood' rel='noopener noreferrer' target='_blank'>holofood</a>'.</p>\"" |
| 88 | + ] |
| 89 | + }, |
| 90 | + "execution_count": 4, |
| 91 | + "metadata": {}, |
| 92 | + "output_type": "execute_result" |
| 93 | + } |
| 94 | + ], |
| 95 | + "source": [ |
| 96 | + "loaded_investigation.studies[0].description" |
| 97 | + ] |
| 98 | + }, |
| 99 | + { |
| 100 | + "cell_type": "code", |
| 101 | + "execution_count": 5, |
| 102 | + "id": "57f00977-5b9f-4d73-a15a-fbec0a2394c0", |
| 103 | + "metadata": {}, |
| 104 | + "outputs": [], |
| 105 | + "source": [ |
| 106 | + "validation_report = isatab.validate(open(os.path.join(data_dir, 'i_Investigation.txt')))" |
| 107 | + ] |
| 108 | + }, |
| 109 | + { |
| 110 | + "cell_type": "code", |
| 111 | + "execution_count": 6, |
| 112 | + "id": "8f76d77c-877a-4152-93af-8284aff0a141", |
| 113 | + "metadata": {}, |
| 114 | + "outputs": [ |
| 115 | + { |
| 116 | + "data": { |
| 117 | + "text/plain": [ |
| 118 | + "[{'message': 'A required property is missing',\n", |
| 119 | + " 'supplemental': 'A property value in Study PubMed ID of investigation file at column 1 is required',\n", |
| 120 | + " 'code': 4003},\n", |
| 121 | + " {'message': 'A required property is missing',\n", |
| 122 | + " 'supplemental': 'A property value in Study Publication DOI of investigation file at column 1 is required',\n", |
| 123 | + " 'code': 4003}]" |
| 124 | + ] |
| 125 | + }, |
| 126 | + "execution_count": 6, |
| 127 | + "metadata": {}, |
| 128 | + "output_type": "execute_result" |
| 129 | + } |
| 130 | + ], |
| 131 | + "source": [ |
| 132 | + "validation_report[\"errors\"]" |
| 133 | + ] |
| 134 | + }, |
| 135 | + { |
| 136 | + "cell_type": "code", |
| 137 | + "execution_count": 7, |
| 138 | + "id": "f849053b-c952-4396-b807-3265e3b53127", |
| 139 | + "metadata": {}, |
| 140 | + "outputs": [], |
| 141 | + "source": [ |
| 142 | + "isa_json = isatab2json.convert(data_dir,use_new_parser=True)\n", |
| 143 | + " \n", |
| 144 | + "with open(os.path.join(data_dir, 'isa.json'), 'w') as out_fp:\n", |
| 145 | + " json.dump(isa_json, out_fp)" |
| 146 | + ] |
| 147 | + }, |
| 148 | + { |
| 149 | + "cell_type": "code", |
| 150 | + "execution_count": 8, |
| 151 | + "id": "308ee48c-31a5-4427-a305-17d20715ed0b", |
| 152 | + "metadata": {}, |
| 153 | + "outputs": [], |
| 154 | + "source": [ |
| 155 | + "from isatools.convert.json2jsonld import ISALDSerializer\n", |
| 156 | + "from json import load" |
| 157 | + ] |
| 158 | + }, |
| 159 | + { |
| 160 | + "cell_type": "code", |
| 161 | + "execution_count": 9, |
| 162 | + "id": "f16c6ddf-786f-4391-8af3-e8394b83b958", |
| 163 | + "metadata": {}, |
| 164 | + "outputs": [], |
| 165 | + "source": [ |
| 166 | + "instance_path = os.path.join(data_dir, \"isa.json\")\n", |
| 167 | + "\n", |
| 168 | + "with open(instance_path, 'r') as instance_file:\n", |
| 169 | + " instance = load(instance_file)\n", |
| 170 | + " instance_file.close()" |
| 171 | + ] |
| 172 | + }, |
| 173 | + { |
| 174 | + "cell_type": "code", |
| 175 | + "execution_count": 10, |
| 176 | + "id": "638af7a1-76ed-4310-9bac-0df34e10430d", |
| 177 | + "metadata": {}, |
| 178 | + "outputs": [ |
| 179 | + { |
| 180 | + "ename": "TypeError", |
| 181 | + "evalue": "'NoneType' object is not iterable", |
| 182 | + "output_type": "error", |
| 183 | + "traceback": [ |
| 184 | + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", |
| 185 | + "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)", |
| 186 | + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m 1\u001b[39m ontology = \u001b[33m\"\u001b[39m\u001b[33mwd\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m serializer = \u001b[43mISALDSerializer\u001b[49m\u001b[43m(\u001b[49m\u001b[43minstance\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4\u001b[39m serializer.set_ontology(ontology)\n\u001b[32m 5\u001b[39m serializer.set_instance(instance)\n", |
| 187 | + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.5/envs/py313isa/lib/python3.13/site-packages/isatools/convert/json2jsonld.py:29\u001b[39m, in \u001b[36mISALDSerializer.__init__\u001b[39m\u001b[34m(self, json_instance, ontology, combined)\u001b[39m\n\u001b[32m 27\u001b[39m \u001b[38;5;28mself\u001b[39m.ontology = ontology\n\u001b[32m 28\u001b[39m \u001b[38;5;28mself\u001b[39m._resolve_network()\n\u001b[32m---> \u001b[39m\u001b[32m29\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mset_instance\u001b[49m\u001b[43m(\u001b[49m\u001b[43mjson_instance\u001b[49m\u001b[43m)\u001b[49m\n", |
| 188 | + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.5/envs/py313isa/lib/python3.13/site-packages/isatools/convert/json2jsonld.py:74\u001b[39m, in \u001b[36mISALDSerializer.set_instance\u001b[39m\u001b[34m(self, instance)\u001b[39m\n\u001b[32m 72\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(instance, \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m (instance.startswith(\u001b[33m\"\u001b[39m\u001b[33mhttp://\u001b[39m\u001b[33m\"\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m instance.startswith(\u001b[33m\"\u001b[39m\u001b[33mhttps://\u001b[39m\u001b[33m\"\u001b[39m)):\n\u001b[32m 73\u001b[39m \u001b[38;5;28mself\u001b[39m.instance = json.loads(get(instance).text)\n\u001b[32m---> \u001b[39m\u001b[32m74\u001b[39m \u001b[38;5;28mself\u001b[39m.output = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_inject_ld\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmain_schema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43minstance\u001b[49m\u001b[43m)\u001b[49m\n", |
| 189 | + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.5/envs/py313isa/lib/python3.13/site-packages/isatools/convert/json2jsonld.py:91\u001b[39m, in \u001b[36mISALDSerializer._inject_ld\u001b[39m\u001b[34m(self, schema_name, output, instance)\u001b[39m\n\u001b[32m 84\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 85\u001b[39m \u001b[33;03m:param schema_name: name of the schema\u001b[39;00m\n\u001b[32m 86\u001b[39m \u001b[33;03m:param output: the output to inject the ld attributes into\u001b[39;00m\n\u001b[32m 87\u001b[39m \u001b[33;03m:param instance: the json instance to get the fields\u001b[39;00m\n\u001b[32m 88\u001b[39m \u001b[33;03m:return:\u001b[39;00m\n\u001b[32m 89\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 90\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m.combined:\n\u001b[32m---> \u001b[39m\u001b[32m91\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_inject_ld_split\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minstance\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 92\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 93\u001b[39m filename = \u001b[33m\"\u001b[39m\u001b[33m../resources/json-context/\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m/isa_\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m_allinone_context.jsonld\u001b[39m\u001b[33m\"\u001b[39m % (\u001b[38;5;28mself\u001b[39m.ontology, \u001b[38;5;28mself\u001b[39m.ontology)\n", |
| 190 | + "\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.5/envs/py313isa/lib/python3.13/site-packages/isatools/convert/json2jsonld.py:117\u001b[39m, in \u001b[36mISALDSerializer._inject_ld_split\u001b[39m\u001b[34m(self, schema_name, output, instance, reference)\u001b[39m\n\u001b[32m 115\u001b[39m output[\u001b[33m\"\u001b[39m\u001b[33m@context\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[38;5;28mself\u001b[39m._get_context_url(reference)\n\u001b[32m 116\u001b[39m output[\u001b[33m\"\u001b[39m\u001b[33m@type\u001b[39m\u001b[33m\"\u001b[39m] = context_key\n\u001b[32m--> \u001b[39m\u001b[32m117\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfield\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43minstance\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 118\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfield\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mprops\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 119\u001b[39m \u001b[43m \u001b[49m\u001b[43mfield_props\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43mprops\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfield\u001b[49m\u001b[43m]\u001b[49m\n", |
| 191 | + "\u001b[31mTypeError\u001b[39m: 'NoneType' object is not iterable" |
| 192 | + ] |
| 193 | + } |
| 194 | + ], |
| 195 | + "source": [ |
| 196 | + "\n", |
| 197 | + "ontology = \"wd\"\n", |
| 198 | + "\n", |
| 199 | + "serializer = ISALDSerializer(instance)\n", |
| 200 | + "serializer.set_ontology(ontology)\n", |
| 201 | + "serializer.set_instance(instance)\n", |
| 202 | + "serializer.combined=True\n", |
| 203 | + "jsonldcontent = serializer.output\n" |
| 204 | + ] |
| 205 | + }, |
| 206 | + { |
| 207 | + "cell_type": "code", |
| 208 | + "execution_count": null, |
| 209 | + "id": "aa01a6a3-0b78-455f-8052-3acc96a375ea", |
| 210 | + "metadata": {}, |
| 211 | + "outputs": [], |
| 212 | + "source": [ |
| 213 | + "with open(os.path.join(data_dir, 'isa-rdf.json'), 'w') as outld_fp:\n", |
| 214 | + " json.dump(jsonldcontent, outld_fp)" |
| 215 | + ] |
| 216 | + }, |
| 217 | + { |
| 218 | + "cell_type": "code", |
| 219 | + "execution_count": null, |
| 220 | + "id": "93ade57e-9dc0-4305-b08b-b8e7e346cb32", |
| 221 | + "metadata": {}, |
| 222 | + "outputs": [], |
| 223 | + "source": [ |
| 224 | + "graph = Graph()\n", |
| 225 | + "graph.parse(os.path.join(data_dir, 'isa-rdf.json'))" |
| 226 | + ] |
| 227 | + }, |
| 228 | + { |
| 229 | + "cell_type": "code", |
| 230 | + "execution_count": null, |
| 231 | + "id": "df1b8017-96f9-4383-9c8f-dc503023a22f", |
| 232 | + "metadata": {}, |
| 233 | + "outputs": [], |
| 234 | + "source": [ |
| 235 | + "print(f\"Graph g has {len(graph)} statements.\")" |
| 236 | + ] |
| 237 | + }, |
| 238 | + { |
| 239 | + "cell_type": "code", |
| 240 | + "execution_count": 30, |
| 241 | + "id": "eba2eecc-c9d8-45b8-b373-e3b0428bed8b", |
| 242 | + "metadata": {}, |
| 243 | + "outputs": [], |
| 244 | + "source": [ |
| 245 | + "# Write turtle file\n", |
| 246 | + "rdf_path=os.path.join(data_dir, \"isa-rdf-\" + ontology + \"v3.ttl\")\n", |
| 247 | + "with open(rdf_path, 'w') as rdf_file:\n", |
| 248 | + " rdf_file.write(graph.serialize(format='turtle'))" |
| 249 | + ] |
| 250 | + }, |
| 251 | + { |
| 252 | + "cell_type": "code", |
| 253 | + "execution_count": null, |
| 254 | + "id": "5ed05d6f-c94f-443c-b4ec-d474b334cacd", |
| 255 | + "metadata": {}, |
| 256 | + "outputs": [], |
| 257 | + "source": [] |
| 258 | + }, |
| 259 | + { |
| 260 | + "cell_type": "code", |
| 261 | + "execution_count": null, |
| 262 | + "id": "b41de0e2-25dc-42a3-b502-c1ca835d24cd", |
| 263 | + "metadata": {}, |
| 264 | + "outputs": [], |
| 265 | + "source": [] |
| 266 | + } |
| 267 | + ], |
| 268 | + "metadata": { |
| 269 | + "kernelspec": { |
| 270 | + "display_name": "Python 3 (ipykernel)", |
| 271 | + "language": "python", |
| 272 | + "name": "python3" |
| 273 | + }, |
| 274 | + "language_info": { |
| 275 | + "codemirror_mode": { |
| 276 | + "name": "ipython", |
| 277 | + "version": 3 |
| 278 | + }, |
| 279 | + "file_extension": ".py", |
| 280 | + "mimetype": "text/x-python", |
| 281 | + "name": "python", |
| 282 | + "nbconvert_exporter": "python", |
| 283 | + "pygments_lexer": "ipython3", |
| 284 | + "version": "3.13.5" |
| 285 | + } |
| 286 | + }, |
| 287 | + "nbformat": 4, |
| 288 | + "nbformat_minor": 5 |
| 289 | +} |
0 commit comments