Merge pull request #111 from PythonPredictions/selectable_evaluation_metric

Sam Borms · web-flow · commit 390fc1c20a33 · 2021-10-01T15:51:17.000+02:00
Selectable evaluation metric
diff --git a/cobra/model_building/forward_selection.py b/cobra/model_building/forward_selection.py
@@ -1,5 +1,6 @@
 
 import logging
+from typing import Callable, Optional
 
 import pandas as pd
 from tqdm.auto import tqdm
@@ -76,7 +77,8 @@ def get_model_from_step(self, step: int):
 
     def compute_model_performances(self, data: pd.DataFrame,
                                    target_column_name: str,
-                                   splits: list=["train", "selection", "validation"]
+                                   splits: list = ["train", "selection", "validation"],
+                                   metric: Optional[Callable] = None,
                                    ) -> pd.DataFrame:
         """Compute for each model the performance for different sets (e.g.
         train-selection-validation) and return them along with a list of
@@ -92,6 +94,13 @@ def compute_model_performances(self, data: pd.DataFrame,
             Name of the target column.
         splits : list, optional
             List of splits to compute performance on.
+        metric: Callable (function), optional
+            Function that computes an evaluation metric to evaluate the model's
+            performances, instead of the default metric (AUC for
+            classification, RMSE for regression).
+            The function should require y_true and y_pred arguments.
+            Metric functions from sklearn can be used, for example, see
+            https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics.
 
         Returns
         -------
@@ -116,7 +125,8 @@ def compute_model_performances(self, data: pd.DataFrame,
                 f"{split}_performance": model.evaluate(
                     data[data["split"] == split],
                     data[data["split"] == split][target_column_name],
-                    split=split  # parameter used for caching
+                    split=split,  # parameter used for caching
+                    metric=metric
                 )
                 for split in splits
             })
diff --git a/cobra/model_building/models.py b/cobra/model_building/models.py
@@ -1,5 +1,7 @@
 
 # third party imports
+from typing import Callable, Optional
+
 import numpy as np
 import pandas as pd
 from scipy import stats
@@ -144,7 +146,8 @@ def score_model(self, X: pd.DataFrame) -> np.ndarray:
         return self.logit.predict_proba(X[self.predictors])[:, 1]
 
     def evaluate(self, X: pd.DataFrame, y: pd.Series,
-                 split: str=None) -> float:
+                 split: str=None,
+                 metric: Optional[Callable]=None) -> float:
         """Evaluate the model on a given data set (X, y). The optional split
         parameter is to indicate that the data set belongs to
         (train, selection, validation), so that the computation on these sets
@@ -158,18 +161,27 @@ def evaluate(self, X: pd.DataFrame, y: pd.Series,
             Dataset containing the target of each observation.
         split : str, optional
             Split name of the dataset (e.g. "train", "selection", or "validation").
+        metric: Callable (function), optional
+            Function that computes an evaluation metric to evaluate the model's
+            performances, instead of the default metric (AUC).
+            The function should require y_true and y_pred arguments.
+            Metric functions from sklearn can be used, for example, see
+            https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics.
 
         Returns
         -------
         float
-            The performance score of the model (AUC).
+            The performance score of the model (AUC by default).
         """
 
         if (split is None) or (split not in self._eval_metrics_by_split):
 
             y_pred = self.score_model(X)
 
-            performance = roc_auc_score(y_true=y, y_score=y_pred)
+            if metric is None:
+                performance = roc_auc_score(y_true=y, y_score=y_pred)
+            else:
+                performance = metric(y_true=y, y_pred=y_pred)
 
             if split is None:
                 return performance
@@ -357,7 +369,8 @@ def score_model(self, X: pd.DataFrame) -> np.ndarray:
         return self.linear.predict(X[self.predictors])
 
     def evaluate(self, X: pd.DataFrame, y: pd.Series,
-                 split: str=None) -> float:
+                 split: str=None,
+                 metric: Optional[Callable]=None) -> float:
         """Evaluate the model on a given data set (X, y). The optional split
         parameter is to indicate that the data set belongs to
         (train, selection, validation), so that the computation on these sets
@@ -371,18 +384,26 @@ def evaluate(self, X: pd.DataFrame, y: pd.Series,
             Dataset containing the target of each observation.
         split : str, optional
             Split name of the dataset (e.g. "train", "selection", or "validation").
+        metric: Callable (function), optional
+            Function that computes an evaluation metric to evaluate the model's
+            performances, instead of the default metric (RMSE).
+            The function should require y_true and y_pred arguments.
+            Metric functions from sklearn can be used, for example, see
+            https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics.
 
         Returns
         -------
         float
-            The performance score of the model (RMSE).
+            The performance score of the model (RMSE by default).
         """
 
         if (split is None) or (split not in self._eval_metrics_by_split):
 
             y_pred = self.score_model(X)
-
-            performance = sqrt(mean_squared_error(y_true=y, y_pred=y_pred))
+            if metric is None:
+                performance = sqrt(mean_squared_error(y_true=y, y_pred=y_pred))
+            else:
+                performance = metric(y_true=y, y_pred=y_pred)
 
             if split is None:
                 return performance
diff --git a/tests/model_building/test_forward_selection.py b/tests/model_building/test_forward_selection.py
@@ -76,7 +76,8 @@ def mock_evaluate(self, X, y, split):  # on AUC scale, but gives the same for RM
 
         actual = (fw_selection
                   .compute_model_performances(data, "target",
-                                              splits=["train", "selection"]))
+                                              splits=["train", "selection"],
+                                              metric=None))
 
         expected = pd.DataFrame([
             {"predictors": ["var1_enc"],