Skip to content

Commit 3b71e4b

Browse files
author
Sam Borms
authored
Merge pull request #108 from PythonPredictions/issue-#32-improve-PIGs
issue-#32-improve-PIGs
2 parents eb3a400 + 53a9858 commit 3b71e4b

1 file changed

Lines changed: 25 additions & 5 deletions

File tree

cobra/evaluation/pigs_tables.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,9 @@ def plot_incidence(pig_tables: pd.DataFrame,
147147
with plt.style.context("seaborn-ticks"):
148148
fig, ax = plt.subplots(figsize=dim)
149149

150-
# -----------------
150+
# --------------------------
151151
# Left axis - average target
152-
# -----------------
152+
# --------------------------
153153
ax.plot(df_plot['label'], df_plot['avg_target'],
154154
color="#00ccff", marker=".",
155155
markersize=20, linewidth=3,
@@ -168,7 +168,9 @@ def plot_incidence(pig_tables: pd.DataFrame,
168168
ax.set_ylabel('incidence' if model_type == "classification" else "mean target value",
169169
fontsize=16)
170170
ax.set_xlabel('{} bins' ''.format(variable), fontsize=16)
171-
ax.xaxis.set_tick_params(rotation=45, labelsize=14)
171+
ax.xaxis.set_tick_params(labelsize=14)
172+
plt.setp(ax.get_xticklabels(),
173+
rotation=45, ha="right", rotation_mode="anchor")
172174
ax.yaxis.set_tick_params(labelsize=14)
173175

174176
if model_type == "classification":
@@ -177,6 +179,24 @@ def plot_incidence(pig_tables: pd.DataFrame,
177179
ax.set_yticks(np.arange(0, max(df_plot['avg_target'])+0.05, 0.05))
178180
ax.yaxis.set_major_formatter(
179181
FuncFormatter(lambda y, _: '{:.1%}'.format(y)))
182+
elif model_type == "regression":
183+
# If both the difference between the highest avg target of all bins
184+
# versus the global avg target AND the difference between the
185+
# lowest avg target versus the global avg target are both smaller
186+
# than 25% of the global avg target itself, we increase the y
187+
# axis range, to avoid that the minor avg target differences are
188+
# spread out over the configure figure height, suggesting
189+
# incorrectly that there are big differences in avg target across
190+
# the bins and versus the global avg target.
191+
# (Motivation for the AND above: if on one end there IS enough
192+
# difference, the effect that we discuss here does not occur.)
193+
global_avg_target = max(df_plot['global_avg_target']) # series of same number, for every bin.
194+
if (np.abs((max(df_plot['avg_target']) - global_avg_target))
195+
/ global_avg_target < 0.25) \
196+
and (np.abs((min(df_plot['avg_target']) - global_avg_target))
197+
/ global_avg_target < 0.25):
198+
ax.set_ylim(global_avg_target * 0.75,
199+
global_avg_target * 1.25)
180200

181201
# Remove ticks but keep the labels
182202
ax.tick_params(axis='both', which='both', length=0)
@@ -192,13 +212,13 @@ def plot_incidence(pig_tables: pd.DataFrame,
192212
align='center', color="#939598", zorder=1)
193213

194214
# Set labels & ticks
195-
ax2.set_ylabel('population size', fontsize=16)
196215
ax2.set_xlabel('{} bins' ''.format(variable), fontsize=16)
197216
ax2.xaxis.set_tick_params(rotation=45, labelsize=14)
217+
198218
ax2.yaxis.set_tick_params(labelsize=14)
199219
ax2.yaxis.set_major_formatter(
200220
FuncFormatter(lambda y, _: '{:.1%}'.format(y)))
201-
221+
ax2.set_ylabel('population size', fontsize=16)
202222
ax2.tick_params(axis='y', colors="#939598")
203223
ax2.yaxis.label.set_color('#939598')
204224

0 commit comments

Comments
 (0)