Skip to content

Commit 2a6e1b2

Browse files
Adding automatic model downloads to CLI (#44)
* adding functionality to auto-download models to CLI * fixing bugs in case of overlapping gene ids
1 parent 8a99bff commit 2a6e1b2

9 files changed

Lines changed: 262 additions & 11 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# GEMCAT specific
2+
models/
3+
14
# Byte-compiled / optimized / DLL files
25
__pycache__/
36
*.py[cod]

README.md

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,21 @@ Or clone the repository and install GEMCAT from there using:
2222
### Standard workflow from the Command-Line Interface (CLI)
2323

2424
Use a single file containing per-gene fold-changes to calculate the resulting differential centralities:
25-
``` gemcat ./expression_file.csv ./model_file.xml -e column_name -o <result_file.csv>```
25+
``` gemcat <./expression_file.csv> <./model_file.xml> -e <column_name> -o <result_file.csv>```
2626
Make sure the .csv file is either comma- or tab-delimited.
2727
`column_name` is the name of the column in the file containing the fold-change.
2828

2929
Alternatively, use two files (or one file) with expression values for condition and baseline:
3030
``` gemcat <./condition_file.csv> <./model_file.xml> -e <condition_column_name> -b <./baseline_file> -c <baseline_column_name> -o <result_file.csv>```
3131

32+
If you do not have a model file ready, some models can be automatically accessed using their names:
33+
``` gemcat ./expression_file.csv <model_name> -e column_name -o <result_file.csv>```
34+
35+
Model names currently supported are:
36+
- ```recon3d```: [Recon3D](http://bigg.ucsd.edu/models/Recon3D)
37+
- ```ratgem```: [Rat-GEM](https://github.com/SysBioChalmers/Rat-GEM)
38+
39+
3240
Currently only models in XML/SBML format are supported in the CLI.
3341
Further models can be used from the Python library.
3442
Support will come to the CLI soon.
@@ -74,23 +82,27 @@ All classes inheriting from the abstract base classes laid out in the modules ar
7482
## Core modules
7583
### Model
7684
The core of the package is the GEMCAT model structure that contains the model data, integrates the workflow, and calculates the results.
77-
### Adjacency
85+
### adjacency_transformation
7886
Different approaches can be used to calculate adjacency in the networks.
7987
We offer alternatives and a platform to create custom algorithms for the model.
80-
### Expression
88+
### expression
8189
Module covering the mapping of gene values onto reactions in the model via gene product rules.
8290
Providing different algorithms along with a platform to create alternatives.
83-
### Pagerank
91+
### ranking
8492
Module providing ranking algorithms for the models along with a platform to include custom algorithms.
8593
### workflows
8694
The workflow module contains example workflows.
8795
To customize the workflow to your needs simply copy the provided functions and switch out the desired steps.
96+
### cli
97+
Command-line interface for GEMCAT.
8898
### io
8999
Input and output functions that create GEMCAT models from different sources.
90-
## utils
100+
### utils
91101
Contains common utility functions used throughout the package.
92-
## verification
102+
### verification
93103
Functions to verify data integrity.
104+
### model_manager
105+
Funationality for automatic downloading, storing, and retrieving of common models.
94106

95107

96108
## Development

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "gemcat"
7-
version = "1.3.0"
7+
version = "1.4.0"
88

99
description = "A toolbox for gene expression-based prediction of metabolic alterations"
1010
keywords = ["python", "bioinformatics", "modeling", "metabolites", "omics"]

src/gemcat/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
expression,
55
io,
66
model,
7+
model_manager,
78
ranking,
89
utils,
910
verification,

src/gemcat/cli.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from pandas import DataFrame, Series, read_csv
1616

1717
from .io import load_json_cobra, load_mat_cobra, load_sbml_cobra
18+
from .model_manager import ModelManager
1819
from .workflows import workflow_standard
1920

2021

@@ -36,6 +37,8 @@ def not_implemented(whatever: Any):
3637
"mat": load_mat_cobra,
3738
}
3839

40+
model_manager = ModelManager()
41+
3942

4043
def wrong_filetype(any: Any):
4144
raise NotImplementedError(f"Not implemented for {any}")
@@ -163,7 +166,8 @@ def build_parser() -> argparse.ArgumentParser:
163166
)
164167

165168
parser.add_argument(
166-
"modelfile", help="Path to model file to use (XML/SBML, JSON format)"
169+
"modelfile",
170+
help=f"Path to model file to use (XML/SBML, JSON, or MAT format), or one of: {model_manager.get_managed_models_str()}",
167171
)
168172
parser.add_argument(
169173
"expressionfile", help="Path to file containing the condition expression data"
@@ -235,7 +239,11 @@ def cli_standard(args: argparse.Namespace):
235239
except (TypeError, ValueError):
236240
logging.info("Empty or invalid gene-fill value. Defaulting to 1.0 .")
237241
gene_fill = 1.0
238-
cobra_model = parse_model(args.modelfile)
242+
if args.modelfile in model_manager.get_managed_models():
243+
model_path = model_manager.get_model(args.modelfile)
244+
cobra_model = parse_model(model_path)
245+
else:
246+
cobra_model = parse_model(args.modelfile)
239247
return workflow_standard(
240248
cobra_model, baseline, expression, gene_fill
241249
), parse_outfile(args.outfile)

src/gemcat/expression.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,18 @@ def rewrite_single_gpr(self, rxn: str) -> str:
183183
if not isinstance(gpr, str):
184184
return ""
185185

186-
for gene in genes:
187-
gene_str = str(gene)
186+
# sort gene list according to reverse length to avoid partial substitutions
187+
# imagine a GPR: "ABC and ABCD"
188+
# typically we would first, we would start with ABC and would substitute f.ex. 1.0
189+
# we would then receive "1.0 and 1.0D", which is obviously false
190+
# sorting the list of genes solves that issue
191+
192+
sorted_genes = sorted(
193+
[str(gene) for gene in genes], key=len, reverse=True
194+
) # longest to shortest gene
195+
196+
for gene in sorted_genes:
197+
gene_str = gene
188198
gene_val = float(self.data.get(gene_str, self.gene_fill))
189199
gpr = gpr.replace(gene_str, f"{gene_val}")
190200

src/gemcat/model_manager.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
from pathlib import Path
2+
from typing import Any
3+
4+
import requests
5+
6+
MODEL_URLS = {
7+
"recon3d": ("http://bigg.ucsd.edu/api/v2/models/Recon3D/download", "json"),
8+
"ratgem": (
9+
"https://github.com/SysBioChalmers/Rat-GEM/raw/refs/heads/main/model/Rat-GEM.mat",
10+
"mat",
11+
),
12+
}
13+
SUPPORTED_MODELS = MODEL_URLS.keys()
14+
15+
16+
def ratgem_processing(response: requests.Response) -> Any:
17+
"""
18+
Parse request containing Rat-GEM .mat file.
19+
:param response: Response containing Rat-GEM model .mat binary
20+
:type response: requests.Response
21+
:return: Binary (.mat) representation of Rat-GEM
22+
:rtype: Any | Bytes
23+
"""
24+
return response.content
25+
26+
27+
def recon_processing(response: requests.Response) -> str:
28+
"""
29+
Parse request containing Recon3D JSON
30+
:param response: Response containing Recon3D JSON
31+
:type response: requests.Response
32+
:return: String representation of Recon3D JSON
33+
:rtype: str
34+
"""
35+
return response.text.replace("_AT", ".")
36+
37+
38+
processing = {
39+
"recon3d": recon_processing,
40+
"ratgem": ratgem_processing,
41+
}
42+
43+
44+
# TODO: make singleton
45+
class ModelManager:
46+
"""
47+
ModelManager class in charge of management of auto-downloading, storing and retrieving model files
48+
"""
49+
50+
def __init__(self):
51+
"""
52+
Initialize ModelManager
53+
"""
54+
self.model_files = {
55+
name: f"{name}.{MODEL_URLS[name][1]}" for name in MODEL_URLS.keys()
56+
}
57+
self.model_path = Path("./models")
58+
self.model_file_paths = {
59+
name: self.model_path / model_file
60+
for name, model_file in self.model_files.items()
61+
}
62+
self.model_path.mkdir(exist_ok=True)
63+
self.managed_models_str = ", ".join(SUPPORTED_MODELS)
64+
65+
def get_model(self, model: str) -> Path:
66+
"""
67+
Retrieve a given model by its name
68+
:param model: Name given to the model (see allowed models above)
69+
:type model: str
70+
:return: Path to model file
71+
:rtype: Path
72+
"""
73+
if not self.model_file_paths[model].exists():
74+
self.download_model(model)
75+
return self.model_file_paths[model]
76+
77+
def download_model(self, model: str):
78+
"""
79+
Download a given model by its name from a hardcoded URL
80+
:param model: Name given to the model (see allowed models above)
81+
:type model: str
82+
:raises ValueError: Raised if the model name is unknown
83+
:raises requests.HTTPError: Raised if the download fails
84+
"""
85+
if not model in MODEL_URLS.keys():
86+
raise ValueError(
87+
f"Illegal model: {model} . Supported models are: {self.managed_models_str}"
88+
)
89+
try:
90+
response = requests.get(MODEL_URLS[model][0])
91+
response.raise_for_status()
92+
except requests.HTTPError as error:
93+
raise requests.HTTPError(f"Failed to download {model}") from error
94+
95+
content = processing[model](response)
96+
save_file = self.model_file_paths[model]
97+
mode = "wb" if save_file.suffix == ".mat" else "w"
98+
with open(save_file, mode) as f:
99+
f.write(content)
100+
101+
def get_managed_models(self) -> list[str]:
102+
"""
103+
Return a list of the names of supported models
104+
:return: List of supported models' names
105+
:rtype: list[str]
106+
"""
107+
return SUPPORTED_MODELS
108+
109+
def get_managed_models_str(self) -> str:
110+
"""
111+
Return all supported models in a string representation
112+
:return: String of supported models
113+
:rtype: str
114+
"""
115+
return self.managed_models_str
116+
117+
def wipe(self):
118+
"""
119+
Wipes all previously downloaded model files and deletes the models folder
120+
:raises OSError: Raised if any file or the model folder cannot be deleted
121+
"""
122+
try:
123+
for model in self.model_file_paths.values():
124+
if not model.exists():
125+
continue
126+
model.unlink()
127+
self.model_path.rmdir()
128+
except Exception as error:
129+
raise OSError(
130+
"Could not delete model files, please delete manually in 'models' folder."
131+
) from error

tests/test_cli.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,21 @@ def test_cli_uc_json():
126126
out_file.unlink()
127127
assert allclose(expected.values, received.values, atol=0.3)
128128
# the vast majority of metabolites is well behaved, a few show "larger" but inconsequential deviations
129+
130+
131+
@pytest.mark.slow
132+
def test_cli_auto_rat():
133+
out_file = Path("./temp_outfile.csv")
134+
run(
135+
[
136+
"gemcat",
137+
"ratgem",
138+
str(expression_path / "prot_uc_vs_healthy.csv"),
139+
"-e",
140+
"foldchange",
141+
"-o",
142+
"temp_outfile.csv",
143+
]
144+
)
145+
assert out_file.exists()
146+
out_file.unlink()

tests/test_model_manager.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from pathlib import Path
2+
3+
from pytest import fixture
4+
5+
from gemcat.model_manager import ModelManager
6+
7+
model_path = Path("./models")
8+
9+
10+
def test_model_manager_setup():
11+
try:
12+
mm = ModelManager()
13+
assert isinstance(mm, ModelManager)
14+
except Exception:
15+
raise AssertionError()
16+
17+
18+
def test_wipe():
19+
try:
20+
mm = ModelManager()
21+
mm.model_path.mkdir(exist_ok=True)
22+
fake_model = mm.model_path / "recon3d.json"
23+
with open(fake_model, "w") as f:
24+
f.write("delete me")
25+
assert (mm.model_path / "recon3d.json").is_file()
26+
mm.wipe()
27+
assert not (mm.model_path / "recon3d.json").is_file()
28+
finally:
29+
if fake_model.exists():
30+
fake_model.unlink()
31+
32+
33+
@fixture
34+
def ensure_empty_models():
35+
mm = ModelManager()
36+
mm.wipe()
37+
38+
39+
def test_model_manager_recon(ensure_empty_models):
40+
mm = ModelManager()
41+
mm.download_model("recon3d")
42+
assert (mm.model_path / "recon3d.json").exists()
43+
44+
45+
def test_model_manager_ratgem(ensure_empty_models):
46+
mm = ModelManager()
47+
mm.download_model("ratgem")
48+
assert (mm.model_path / "ratgem.mat").exists()
49+
50+
51+
def test_model_manager_get_download(ensure_empty_models):
52+
mm = ModelManager()
53+
output = mm.get_model("ratgem")
54+
assert output == mm.model_path / "ratgem.mat"
55+
assert (output).exists()
56+
57+
58+
def test_model_manager_get_existing(ensure_empty_models):
59+
try:
60+
mm = ModelManager()
61+
output = mm.model_path / "recon3d.json"
62+
with open(output, "w") as f:
63+
f.write("I am a model.")
64+
result = mm.get_model("recon3d")
65+
assert result == output
66+
assert result.exists()
67+
finally:
68+
mm.wipe()

0 commit comments

Comments
 (0)