improve usage metric arg

sborms · sborms · commit eccb3447bf16 · 2021-10-01T17:36:34.000+02:00
diff --git a/cobra/evaluation/evaluator.py b/cobra/evaluation/evaluator.py
@@ -88,13 +88,13 @@ def fit(self, y_true: np.ndarray, y_pred: np.ndarray):
         """
         fpr, tpr, thresholds = roc_curve(y_true=y_true, y_score=y_pred)
 
-        # if probability_cutoff is not set, take the optimal cut off
+        # if probability_cutoff is not set, take the optimal cut-off
         if not self.probability_cutoff:
             self.probability_cutoff = (ClassificationEvaluator.
                                        _compute_optimal_cutoff(fpr, tpr,
                                                                thresholds))
 
-        # Transform probabilities to binary array using cut off:
+        # Transform probabilities to binary array using cut-off
         y_pred_b = np.array([0 if pred <= self.probability_cutoff else 1
                              for pred in y_pred])
 
diff --git a/cobra/model_building/forward_selection.py b/cobra/model_building/forward_selection.py
@@ -77,8 +77,8 @@ def get_model_from_step(self, step: int):
 
     def compute_model_performances(self, data: pd.DataFrame,
                                    target_column_name: str,
-                                   splits: list = ["train", "selection", "validation"],
-                                   metric: Optional[Callable] = None,
+                                   splits: list=["train", "selection", "validation"],
+                                   metric: Optional[Callable]=None,
                                    ) -> pd.DataFrame:
         """Compute for each model the performance for different sets (e.g.
         train-selection-validation) and return them along with a list of
@@ -170,7 +170,6 @@ def fit(self, train_data: pd.DataFrame, target_column_name: str,
         """
 
         assert "split" in train_data.columns, "The train_data input df does not include a split column."
-        print(train_data["split"].unique())
         assert len(set(["train", "selection"]).difference(set(train_data["split"].unique()))) == 0, \
             "The train_data input df does not include a 'train' and 'selection' split."
 
diff --git a/cobra/model_building/models.py b/cobra/model_building/models.py
@@ -1,16 +1,18 @@
 
-# third party imports
 from typing import Callable, Optional
 
+# third party imports
 import numpy as np
 import pandas as pd
 from scipy import stats
 from sklearn.metrics import roc_auc_score, mean_squared_error
 from numpy import sqrt
 from sklearn.linear_model import LogisticRegression, LinearRegression
+from sklearn.metrics import roc_curve
 
 # custom imports
 import cobra.utils as utils
+from cobra.evaluation import ClassificationEvaluator
 
 class LogisticRegressionModel:
     """Wrapper around the LogisticRegression class, with additional methods
@@ -148,8 +150,8 @@ def score_model(self, X: pd.DataFrame) -> np.ndarray:
     def evaluate(self, X: pd.DataFrame, y: pd.Series,
                  split: str=None,
                  metric: Optional[Callable]=None) -> float:
-        """Evaluate the model on a given data set (X, y). The optional split
-        parameter is to indicate that the data set belongs to
+        """Evaluate the model on a given dataset (X, y). The optional split
+        parameter is to indicate that the dataset belongs to
         (train, selection, validation), so that the computation on these sets
         can be cached!
 
@@ -164,7 +166,7 @@ def evaluate(self, X: pd.DataFrame, y: pd.Series,
         metric: Callable (function), optional
             Function that computes an evaluation metric to evaluate the model's
             performances, instead of the default metric (AUC).
-            The function should require y_true and y_pred arguments.
+            The function should require y_true and y_pred (binary output) arguments.
             Metric functions from sklearn can be used, for example, see
             https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics.
 
@@ -173,20 +175,25 @@ def evaluate(self, X: pd.DataFrame, y: pd.Series,
         float
             The performance score of the model (AUC by default).
         """
+        if metric is not None:  # decouple from _eval_metrics_by_split attribute
+            y_pred = self.score_model(X)
 
-        if (split is None) or (split not in self._eval_metrics_by_split):
+            fpr, tpr, thresholds = roc_curve(y_true=y, y_score=y_pred)
+            cutoff = (ClassificationEvaluator._compute_optimal_cutoff(fpr, tpr, thresholds))
+            y_pred_b = np.array([0 if pred <= cutoff else 1 for pred in y_pred])
 
-            y_pred = self.score_model(X)
+            performance = metric(y_true=y, y_pred=y_pred_b)
 
-            if metric is None:
+            return performance
+        else:
+            if (split is None) or (split not in self._eval_metrics_by_split):
+                y_pred = self.score_model(X)
                 performance = roc_auc_score(y_true=y, y_score=y_pred)
-            else:
-                performance = metric(y_true=y, y_pred=y_pred)
 
-            if split is None:
-                return performance
-            else:
-                self._eval_metrics_by_split[split] = performance
+                if split is None:
+                    return performance
+                else:
+                    self._eval_metrics_by_split[split] = performance
 
         return self._eval_metrics_by_split[split]
 
@@ -371,8 +378,8 @@ def score_model(self, X: pd.DataFrame) -> np.ndarray:
     def evaluate(self, X: pd.DataFrame, y: pd.Series,
                  split: str=None,
                  metric: Optional[Callable]=None) -> float:
-        """Evaluate the model on a given data set (X, y). The optional split
-        parameter is to indicate that the data set belongs to
+        """Evaluate the model on a given dataset (X, y). The optional split
+        parameter is to indicate that the dataset belongs to
         (train, selection, validation), so that the computation on these sets
         can be cached!
 
@@ -396,19 +403,20 @@ def evaluate(self, X: pd.DataFrame, y: pd.Series,
         float
             The performance score of the model (RMSE by default).
         """
-
-        if (split is None) or (split not in self._eval_metrics_by_split):
-
+        if metric is not None:  # decouple from _eval_metrics_by_split attribute
             y_pred = self.score_model(X)
-            if metric is None:
+            performance = metric(y_true=y, y_pred=y_pred)
+
+            return performance
+        else:
+            if (split is None) or (split not in self._eval_metrics_by_split):
+                y_pred = self.score_model(X)
                 performance = sqrt(mean_squared_error(y_true=y, y_pred=y_pred))
-            else:
-                performance = metric(y_true=y, y_pred=y_pred)
 
-            if split is None:
-                return performance
-            else:
-                self._eval_metrics_by_split[split] = performance
+                if split is None:
+                    return performance
+                else:
+                    self._eval_metrics_by_split[split] = performance
 
         return self._eval_metrics_by_split[split]
 
diff --git a/cobra/preprocessing/preprocessor.py b/cobra/preprocessing/preprocessor.py
@@ -1,5 +1,5 @@
 
-# std lib imports
+# standard lib imports
 import inspect
 import time
 import math
diff --git a/tests/model_building/test_forward_selection.py b/tests/model_building/test_forward_selection.py
@@ -61,7 +61,7 @@ def test_compute_model_performances(self, mocker, model_type):
             mock_model_num_pred(3, model_type=model_type)
         ]
 
-        def mock_evaluate(self, X, y, split):  # on AUC scale, but gives the same for RMSE as it is a mock
+        def mock_evaluate(self, X, y, split, metric):  # on AUC scale, but gives the same for RMSE as it is a mock
             if split == "train":
                 return 0.612
             else:
diff --git a/tutorials/tutorial_Cobra_linear_regression.ipynb b/tutorials/tutorial_Cobra_linear_regression.ipynb
diff --git a/tutorials/tutorial_Cobra_logistic_regression.ipynb b/tutorials/tutorial_Cobra_logistic_regression.ipynb

Original file line number	Diff line number	Diff line change
`@@ -61,7 +61,7 @@ def test_compute_model_performances(self, mocker, model_type):`
`61`	`61`	`mock_model_num_pred(3, model_type=model_type)`
`62`	`62`	`]`
`63`	`63`
`64`		`- def mock_evaluate(self, X, y, split): # on AUC scale, but gives the same for RMSE as it is a mock`
	`64`	`+ def mock_evaluate(self, X, y, split, metric): # on AUC scale, but gives the same for RMSE as it is a mock`
`65`	`65`	`if split == "train":`
`66`	`66`	`return 0.612`
`67`	`67`	`else:`