Skip to content

Commit 3f06750

Browse files
author
sborms
committed
integrating comments Sander & finetuning
1 parent 5964f5b commit 3f06750

6 files changed

Lines changed: 204 additions & 177 deletions

File tree

cobra/evaluation/evaluator.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class ClassificationEvaluator():
4040
cumulative_gains : tuple
4141
Data for plotting cumulative gains curve.
4242
evaluation_metrics : dict
43-
Map containing various scalar evaluation metrics (precision, recall, accuracy, AUC, F1, etc.)
43+
Map containing various scalar evaluation metrics (precision, recall, accuracy, AUC, F1, etc.).
4444
lift_at : float
4545
Parameter to determine at which top level percentage the lift of the
4646
model should be computed.
@@ -191,7 +191,7 @@ def plot_roc_curve(self, path: str=None, dim: tuple=(12, 8)):
191191
ax.set_xlabel("False Positive Rate", fontsize=15)
192192
ax.set_ylabel("True Positive Rate", fontsize=15)
193193
ax.legend(loc="lower right")
194-
ax.set_title("ROC Curve", fontsize=20)
194+
ax.set_title("ROC curve", fontsize=20)
195195

196196
if path:
197197
plt.savefig(path, format="png", dpi=300, bbox_inches="tight")
@@ -274,7 +274,7 @@ def plot_cumulative_response_curve(self, path: str=None, dim: tuple=(12, 8)):
274274
ax.grid(False)
275275

276276
# Description
277-
ax.set_title("Cumulative response", fontsize=20)
277+
ax.set_title("Cumulative Response curve", fontsize=20)
278278

279279
if path is not None:
280280
plt.savefig(path, format="png", dpi=300, bbox_inches="tight")
@@ -323,7 +323,7 @@ def plot_lift_curve(self, path: str=None, dim: tuple=(12, 8)):
323323
ax.grid(False)
324324

325325
# Description
326-
ax.set_title("Cumulative Lift", fontsize=20)
326+
ax.set_title("Cumulative Lift curve", fontsize=20)
327327

328328
if path is not None:
329329
plt.savefig(path, format="png", dpi=300, bbox_inches="tight")
@@ -350,7 +350,7 @@ def plot_cumulative_gains(self, path: str=None, dim: tuple=(12, 8)):
350350
ax.plot(ax.get_xlim(), ax.get_ylim(), linewidth=3,
351351
ls="--", color="darkorange", label="random selection")
352352

353-
ax.set_title("Cumulative Gains", fontsize=20)
353+
ax.set_title("Cumulative Gains curve", fontsize=20)
354354

355355
# Format axes
356356
ax.set_xlim([0, 100])
@@ -681,7 +681,7 @@ def plot_predictions(self, path: str=None, dim: tuple=(12, 8)):
681681
ax.set_xlabel("Index", fontsize=15)
682682
ax.set_ylabel("Value", fontsize=15)
683683
ax.legend(loc="best")
684-
ax.set_title("Prediction Plot", fontsize=20)
684+
ax.set_title("Predictions vs. Actuals", fontsize=20)
685685

686686
if path:
687687
plt.savefig(path, format="png", dpi=300, bbox_inches="tight")
@@ -722,7 +722,7 @@ def plot_qq(self, path: str=None, dim: tuple=(12, 8)):
722722
ax.set_yticks(range(int(np.floor(min(y))), int(np.ceil(max(y[x < float("inf")])))+1, 1))
723723

724724
ax.legend(loc="best")
725-
ax.set_title("Q-Q Plot", fontsize=20)
725+
ax.set_title("Q-Q plot", fontsize=20)
726726

727727
if path:
728728
plt.savefig(path, format="png", dpi=300, bbox_inches="tight")

cobra/model_building/forward_selection.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def compute_model_performances(self, data: pd.DataFrame,
110110
"last_added_predictor": list(last_added_predictor)[0]
111111
}
112112

113-
# Evaluate model on each data set split,
113+
# Evaluate model on each dataset split,
114114
# e.g. train-selection-validation
115115
tmp.update({
116116
f"{split}_performance": model.evaluate(
@@ -138,9 +138,11 @@ def fit(self, train_data: pd.DataFrame, target_column_name: str,
138138
Parameters
139139
----------
140140
train_data : pd.DataFrame
141-
Data on which to fit the model. The "train" split is used to
142-
train a model, the "selection" split is used to evaluate
143-
the actual forward feature selection.
141+
Data on which to fit the model. Should include a "train"
142+
and "selection" split for correct model selection! The
143+
"train" split is used to train a model, the "selection"
144+
split is used to evaluate which model to include in the
145+
actual forward feature selection.
144146
target_column_name : str
145147
Name of the target column.
146148
predictors : list
@@ -156,6 +158,9 @@ def fit(self, train_data: pd.DataFrame, target_column_name: str,
156158
In case the number of forced predictors is larger than the maximum
157159
number of allowed predictors in the model.
158160
"""
161+
assert all(s in ["train", "selection"] for s in train_data["split"].unique()), \
162+
"The train_data input df does not include a 'train' and 'selection' split."
163+
159164
# remove excluded predictors from predictor lists
160165
filtered_predictors = [var for var in predictors
161166
if (var not in excluded_predictors and
@@ -164,13 +169,13 @@ def fit(self, train_data: pd.DataFrame, target_column_name: str,
164169
# checks on predictor lists and self.max_predictors attr
165170
if len(forced_predictors) > self.max_predictors:
166171
raise ValueError("Size of forced_predictors cannot be bigger than "
167-
"max_predictors")
172+
"max_predictors.")
168173
elif len(forced_predictors) == self.max_predictors:
169174
log.info("Size of forced_predictors equals max_predictors "
170175
"only one model will be trained...")
171176
# train model with all forced_predictors (only)
172177
(self._fitted_models
173-
.append(self._train_model(train_data,
178+
.append(self._train_model(train_data[train_data["split"] == "train"],
174179
target_column_name,
175180
forced_predictors)))
176181
else:

cobra/preprocessing/preprocessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class PreProcessor(BaseEstimator):
3737
Instance of CategoricalDataProcessor to do the preprocessing of
3838
categorical variables.
3939
discretizer : KBinsDiscretizer
40-
Instance of KBinsDiscretizer to do the prepocessing of continuous
40+
Instance of KBinsDiscretizer to do the preprocessing of continuous
4141
variables by means of discretization.
4242
target_encoder : TargetEncoder
4343
Instance of TargetEncoder to do the incidence replacement.

cobra/preprocessing/target_encoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def __init__(self, weight: float=0.0,
6666
imputation_strategy: str="mean"):
6767

6868
if weight < 0:
69-
raise ValueError("The value of weight cannot be smaller than zero")
69+
raise ValueError("The value of weight cannot be smaller than zero.")
7070
elif imputation_strategy not in self.valid_imputation_strategies:
7171
raise ValueError("Valid options for 'imputation_strategy' are {}."
7272
" Got imputation_strategy={!r} instead."

0 commit comments

Comments
 (0)