Skip to content

Commit f2c71ed

Browse files
Add additional unittest for TargetEncoder
1 parent 0a5a7ff commit f2c71ed

2 files changed

Lines changed: 42 additions & 2 deletions

File tree

cobra/evaluation/plotting_utils.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88

99
def plot_univariate_predictor_quality(df_auc: pd.DataFrame,
10-
dim: tuple=(12, 8)):
10+
dim: tuple=(12, 8),
11+
path: str=None):
1112
"""Plot univariate quality of the predictors
1213
1314
Parameters
@@ -18,6 +19,8 @@ def plot_univariate_predictor_quality(df_auc: pd.DataFrame,
1819
criteria
1920
dim : tuple, optional
2021
tuple with width and lentgh of the plot
22+
path : str, optional
23+
path to store the figure
2124
"""
2225

2326
df = (df_auc[df_auc["preselection"]]
@@ -41,6 +44,9 @@ def plot_univariate_predictor_quality(df_auc: pd.DataFrame,
4144
# Remove white lines from the second axis
4245
ax.grid(False)
4346

47+
if path is not None:
48+
plt.savefig(path, format="png", dpi=300, bbox_inches="tight")
49+
4450
plt.show()
4551

4652

@@ -70,6 +76,7 @@ def plot_correlation_matrix(df_corr: pd.DataFrame,
7076

7177
def plot_performance_curves(model_performance: pd.DataFrame,
7278
dim: tuple=(12, 8),
79+
path: str=None,
7380
colors: dict={"train": "#0099bf",
7481
"selection": "#ff9500",
7582
"validation": "#8064a2"}):
@@ -83,6 +90,8 @@ def plot_performance_curves(model_performance: pd.DataFrame,
8390
in the forward feature selection
8491
dim : tuple, optional
8592
tuple with width and lentgh of the plot
93+
path : str, optional
94+
path to store the figure
8695
"""
8796
highest_auc = np.round(max(max(model_performance['train_performance']),
8897
max(model_performance['selection_performance']),
@@ -113,6 +122,10 @@ def plot_performance_curves(model_performance: pd.DataFrame,
113122
fig.suptitle('Performance curves - forward feature selection',
114123
fontsize=20)
115124
plt.ylabel('Model performance')
125+
126+
if path is not None:
127+
plt.savefig(path, format="png", dpi=300, bbox_inches="tight")
128+
116129
plt.show()
117130

118131

tests/preprocessing/test_target_encoder.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def test_target_encoder_fit_column(self):
9090
'target': [1, 1, 0, 0, 1, 0, 0, 0, 1, 1]})
9191

9292
encoder = TargetEncoder()
93-
encoder._global_mean = 0.0
93+
encoder._global_mean = 0.5
9494
actual = encoder._fit_column(X=df.variable, y=df.target)
9595

9696
expected = pd.Series(data=[0.333333, 0.50000, 0.666667],
@@ -164,6 +164,33 @@ def test_target_encoder_transform(self):
164164
pd.testing.assert_frame_equal(actual, expected,
165165
check_less_precise=5)
166166

167+
def test_target_encoder_transform_new_category(self):
168+
169+
df = pd.DataFrame({'variable': ['positive', 'positive', 'negative',
170+
'neutral', 'negative', 'positive',
171+
'negative', 'neutral', 'neutral',
172+
'neutral'],
173+
'target': [1, 1, 0, 0, 1, 0, 0, 0, 1, 1]})
174+
175+
df_appended = df.append({"variable": "new", "target": 1},
176+
ignore_index=True)
177+
178+
# inputs of TargetEncoder will be of dtype category
179+
df["variable"] = df["variable"].astype("category")
180+
df_appended["variable"] = df_appended["variable"].astype("category")
181+
182+
expected = df_appended.copy()
183+
expected["variable_enc"] = [0.666667, 0.666667, 0.333333, 0.50000,
184+
0.333333, 0.666667, 0.333333, 0.50000,
185+
0.50000, 0.50000, 0.333333]
186+
187+
encoder = TargetEncoder(imputation_strategy="min")
188+
encoder.fit(data=df, column_names=["variable"], target_column="target")
189+
actual = encoder.transform(data=df_appended, column_names=["variable"])
190+
191+
pd.testing.assert_frame_equal(actual, expected,
192+
check_less_precise=5)
193+
167194
# Tests for _clean_column_name
168195
def test_target_encoder_clean_column_name(self):
169196

0 commit comments

Comments
 (0)