Skip to content

Commit 85cbc75

Browse files
authored
Merge pull request LREN-CHUV#42 from LREN-CHUV/anova-fix
limit covariables in anova & fix performance
2 parents 5d24618 + 1c3fcf9 commit 85cbc75

5 files changed

Lines changed: 71 additions & 23 deletions

File tree

python-anova/Dockerfile

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,22 @@
1+
## Run unit tests
2+
3+
FROM hbpmip/python-base-build:0.4.5
4+
5+
COPY requirements-dev.txt /requirements-dev.txt
6+
RUN pip install -r /requirements-dev.txt
7+
8+
COPY requirements.txt /requirements.txt
9+
RUN pip install -r /requirements.txt
10+
11+
COPY anova.py /src/anova.py
12+
COPY tests/unit/ /src/tests/
13+
14+
WORKDIR /src
15+
RUN python -m pytest tests/ -x --ff --capture=no
16+
17+
18+
## Build target image
19+
120
FROM hbpmip/python-mip:0.5.6
221

322
MAINTAINER mirco.nasuti@chuv.ch
@@ -6,10 +25,10 @@ ENV DOCKER_IMAGE=hbpmip/python-anova:0.4.1 \
625
FUNCTION=python-anova
726

827
COPY requirements.txt /requirements.txt
9-
COPY anova.py /anova.py
10-
1128
RUN pip install -r /requirements.txt
1229

30+
COPY anova.py /anova.py
31+
1332
ENTRYPOINT ["python", "/anova.py"]
1433

1534
LABEL org.label-schema.build-date=$BUILD_DATE \

python-anova/anova.py

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
11
#!/usr/bin/env python3
2-
3-
from mip_helper import io_helper
2+
# Copyright (C) 2017 LREN CHUV for Human Brain Project
3+
#
4+
# This program is free software: you can redistribute it and/or modify
5+
# it under the terms of the GNU Affero General Public License as
6+
# published by the Free Software Foundation, either version 3 of the
7+
# License, or (at your option) any later version.
8+
#
9+
# This program is distributed in the hope that it will be useful,
10+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
# GNU Affero General Public License for more details.
13+
#
14+
# You should have received a copy of the GNU Affero General Public License
15+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
16+
17+
from mip_helper import io_helper, errors, utils, parameters
418
from mip_helper.shapes import Shapes
519

620
import logging
@@ -12,10 +26,12 @@
1226

1327
DESIGN_PARAM = "design"
1428
DEFAULT_DESIGN = "factorial"
29+
MAX_FACTORIAL_COVARIABLES = 8
1530

1631
DEFAULT_DOCKER_IMAGE = "python-anova"
1732

1833

34+
@utils.catch_user_error
1935
def main():
2036
# Configure logging
2137
logging.basicConfig(level=logging.INFO)
@@ -24,12 +40,11 @@ def main():
2440
inputs = io_helper.fetch_data()
2541
dep_var = inputs["data"]["dependent"][0]
2642
inped_vars = inputs["data"]["independent"]
27-
design = get_parameter(inputs["parameters"], DESIGN_PARAM)
43+
design = parameters.get_parameter(DESIGN_PARAM, str, DEFAULT_DESIGN)
2844

2945
# Check dependent variable type (should be continuous)
3046
if dep_var["type"]["name"] not in ["integer", "real"]:
31-
logging.warning("Dependent variable should be continuous !")
32-
return None
47+
raise errors.UserError('Dependent variable should be continuous!')
3348

3449
# Extract data and parameters from inputs
3550
data = format_data(inputs["data"])
@@ -51,17 +66,10 @@ def format_output(statsmodels_dict):
5166
return json.dumps(DataFrame.from_dict(statsmodels_dict).transpose().fillna("NaN").to_dict())
5267

5368

54-
def get_parameter(params_list, param_name):
55-
for p in params_list:
56-
if p["name"] == param_name:
57-
return p["value"]
58-
return DEFAULT_DESIGN
59-
60-
6169
def compute_anova(dep_var, indep_vars, data, design='factorial'):
6270
formula = generate_formula(dep_var, indep_vars, design)
6371
logging.info("Formula: %s" % formula)
64-
lm = ols(data=data, formula=formula).fit()
72+
lm = ols(data=DataFrame(data), formula=formula).fit()
6573
logging.info(lm.summary())
6674
return anova_lm(lm)
6775

@@ -72,11 +80,19 @@ def generate_formula(dep_var, indep_vars, design):
7280
elif design == 'factorial':
7381
op = " * "
7482
else:
75-
logging.error("Invalid design parameter : %s" % design)
76-
return None
83+
raise errors.UserError("Invalid design parameter : %s" % design)
84+
85+
if design == 'factorial' and len(indep_vars) >= MAX_FACTORIAL_COVARIABLES:
86+
raise errors.UserError(
87+
'You can use at most {} covariables with factorial design ({} was used)'.format(
88+
MAX_FACTORIAL_COVARIABLES, len(indep_vars)
89+
)
90+
)
91+
7792
dep_var = dep_var["name"]
78-
indep_vars = [v["name"] if v["type"]["name"] in ["integer", "real"]
79-
else str.format("C(%s)" % v["name"]) for v in indep_vars]
93+
indep_vars = [
94+
v["name"] if v["type"]["name"] in ["integer", "real"] else str.format("C(%s)" % v["name"]) for v in indep_vars
95+
]
8096
indep_vars = op.join(indep_vars)
8197
indep_vars = indep_vars.strip(op)
8298
return str.format("%s ~ %s" % (dep_var, indep_vars))

python-anova/requirements-dev.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
mock
2+
pytest

python-anova/tests/docker-compose.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,11 @@ services:
8080
OUT_HOST: db
8181
OUT_PORT: 5432
8282
OUT_DATABASE: woken
83-
PARAM_variables: "score_test1"
84-
PARAM_covariables: "stress_before_test1,iq"
83+
PARAM_variables: "subjectageyears"
84+
PARAM_covariables: "lefthippocampus,leftthalamusproper,leftacgganteriorcingulategyrus,leftententorhinalarea"
8585
PARAM_grouping: ""
86-
PARAM_query: "SELECT stress_before_test1,score_test1,iq,cognitive_task2,practice_task2,response_time_task2,college_math,score_math_course1,score_math_course2 FROM SAMPLE_DATA"
87-
PARAM_meta: "{\"score_test1\":{\"code\":\"score_test1\",\"type\":\"real\"},\"stress_before_test1\":{\"code\":\"stress_before_test1\",\"type\":\"real\"},\"iq\":{\"code\":\"iq\",\"type\":\"real\"},\"cognitive_task2\":{\"code\":\"cognitive_task2\",\"type\":\"real\"},\"practice_task2\":{\"code\":\"practice_task2\",\"type\":\"real\"},\"response_time_task2\":{\"code\":\"response_time_task2\",\"type\":\"real\"},\"college_math\":{\"code\":\"college_math\",\"type\":\"real\"},\"score_math_course1\":{\"code\":\"score_math_course1\",\"type\":\"real\"},\"score_math_course2\":{\"code\":\"score_math_course2\",\"type\":\"real\"}}"
86+
PARAM_query: "SELECT subjectageyears,lefthippocampus,leftthalamusproper,leftacgganteriorcingulategyrus,leftententorhinalarea FROM cde_features_a"
87+
PARAM_meta: "{\"rightphgparahippocampalgyrus\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Right parahippocampal gyrus\",\"code\":\"rightphgparahippocampalgyrus\",\"units\":\"cm3\",\"type\":\"real\"},\"lefthippocampus\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Left Hippocampus\",\"code\":\"lefthippocampus\",\"units\":\"cm3\",\"type\":\"real\"},\"rightthalamusproper\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Right Thalamus\",\"code\":\"rightthalamusproper\",\"units\":\"cm3\",\"type\":\"real\"},\"rightacgganteriorcingulategyrus\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Right anterior cingulate gyrus\",\"code\":\"rightacgganteriorcingulategyrus\",\"units\":\"cm3\",\"type\":\"real\"},\"leftthalamusproper\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Left Thalamus\",\"code\":\"leftthalamusproper\",\"units\":\"cm3\",\"type\":\"real\"},\"leftphgparahippocampalgyrus\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Left parahippocampal gyrus\",\"code\":\"leftphgparahippocampalgyrus\",\"units\":\"cm3\",\"type\":\"real\"},\"rightmcggmiddlecingulategyrus\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Right middle cingulate gyrus\",\"code\":\"rightmcggmiddlecingulategyrus\",\"units\":\"cm3\",\"type\":\"real\"},\"leftacgganteriorcingulategyrus\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Left anterior cingulate gyrus\",\"code\":\"leftacgganteriorcingulategyrus\",\"units\":\"cm3\",\"type\":\"real\"},\"leftmcggmiddlecingulategyrus\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Left middle cingulate gyrus\",\"code\":\"leftmcggmiddlecingulategyrus\",\"units\":\"cm3\",\"type\":\"real\"},\"leftententorhinalarea\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Left entorhinal area\",\"code\":\"leftententorhinalarea\",\"units\":\"cm3\",\"type\":\"real\"},\"rightpcggposteriorcingulategyrus\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Right posterior cingulate gyrus\",\"code\":\"rightpcggposteriorcingulategyrus\",\"units\":\"cm3\",\"type\":\"real\"},\"leftpcggposteriorcingulategyrus\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Left posterior cingulate gyrus\",\"code\":\"leftpcggposteriorcingulategyrus\",\"units\":\"cm3\",\"type\":\"real\"},\"subjectageyears\":{\"description\":\"Subject age in years.\",\"methodology\":\"mip-cde\",\"label\":\"Age Years\",\"minValue\":0,\"code\":\"subjectageyears\",\"units\":\"years\",\"length\":3,\"maxValue\":130.0,\"type\":\"integer\"},\"righthippocampus\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Right Hippocampus\",\"code\":\"righthippocampus\",\"units\":\"cm3\",\"type\":\"real\"},\"rightententorhinalarea\":{\"description\":\"\",\"methodology\":\"lren-nmm-volumes\",\"label\":\"Right entorhinal area\",\"code\":\"rightententorhinalarea\",\"units\":\"cm3\",\"type\":\"real\"}}"
8888
MODEL_PARAM_design: "factorial"
8989
links:
9090
- "db:db"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import pytest
2+
from anova import generate_formula
3+
from mip_helper import errors
4+
5+
6+
def test_generate_formula():
7+
"""Raise error when factorial design and too many covariables are used."""
8+
dep_var = {'name': 'dep'}
9+
indep_vars = [{'name': str(i)} for i in range(10)]
10+
with pytest.raises(errors.UserError):
11+
generate_formula(dep_var, indep_vars, 'factorial')

0 commit comments

Comments
 (0)