@@ -147,9 +147,9 @@ def plot_incidence(pig_tables: pd.DataFrame,
147147 with plt .style .context ("seaborn-ticks" ):
148148 fig , ax = plt .subplots (figsize = dim )
149149
150- # -----------------
150+ # --------------------------
151151 # Left axis - average target
152- # -----------------
152+ # --------------------------
153153 ax .plot (df_plot ['label' ], df_plot ['avg_target' ],
154154 color = "#00ccff" , marker = "." ,
155155 markersize = 20 , linewidth = 3 ,
@@ -168,7 +168,9 @@ def plot_incidence(pig_tables: pd.DataFrame,
168168 ax .set_ylabel ('incidence' if model_type == "classification" else "mean target value" ,
169169 fontsize = 16 )
170170 ax .set_xlabel ('{} bins' '' .format (variable ), fontsize = 16 )
171- ax .xaxis .set_tick_params (rotation = 45 , labelsize = 14 )
171+ ax .xaxis .set_tick_params (labelsize = 14 )
172+ plt .setp (ax .get_xticklabels (),
173+ rotation = 45 , ha = "right" , rotation_mode = "anchor" )
172174 ax .yaxis .set_tick_params (labelsize = 14 )
173175
174176 if model_type == "classification" :
@@ -177,6 +179,24 @@ def plot_incidence(pig_tables: pd.DataFrame,
177179 ax .set_yticks (np .arange (0 , max (df_plot ['avg_target' ])+ 0.05 , 0.05 ))
178180 ax .yaxis .set_major_formatter (
179181 FuncFormatter (lambda y , _ : '{:.1%}' .format (y )))
182+ elif model_type == "regression" :
183+ # If both the difference between the highest avg target of all bins
184+ # versus the global avg target AND the difference between the
185+ # lowest avg target versus the global avg target are both smaller
186+ # than 25% of the global avg target itself, we increase the y
187+ # axis range, to avoid that the minor avg target differences are
188+ # spread out over the configure figure height, suggesting
189+ # incorrectly that there are big differences in avg target across
190+ # the bins and versus the global avg target.
191+ # (Motivation for the AND above: if on one end there IS enough
192+ # difference, the effect that we discuss here does not occur.)
193+ global_avg_target = max (df_plot ['global_avg_target' ]) # series of same number, for every bin.
194+ if (np .abs ((max (df_plot ['avg_target' ]) - global_avg_target ))
195+ / global_avg_target < 0.25 ) \
196+ and (np .abs ((min (df_plot ['avg_target' ]) - global_avg_target ))
197+ / global_avg_target < 0.25 ):
198+ ax .set_ylim (global_avg_target * 0.75 ,
199+ global_avg_target * 1.25 )
180200
181201 # Remove ticks but keep the labels
182202 ax .tick_params (axis = 'both' , which = 'both' , length = 0 )
@@ -192,13 +212,13 @@ def plot_incidence(pig_tables: pd.DataFrame,
192212 align = 'center' , color = "#939598" , zorder = 1 )
193213
194214 # Set labels & ticks
195- ax2 .set_ylabel ('population size' , fontsize = 16 )
196215 ax2 .set_xlabel ('{} bins' '' .format (variable ), fontsize = 16 )
197216 ax2 .xaxis .set_tick_params (rotation = 45 , labelsize = 14 )
217+
198218 ax2 .yaxis .set_tick_params (labelsize = 14 )
199219 ax2 .yaxis .set_major_formatter (
200220 FuncFormatter (lambda y , _ : '{:.1%}' .format (y )))
201-
221+ ax2 . set_ylabel ( 'population size' , fontsize = 16 )
202222 ax2 .tick_params (axis = 'y' , colors = "#939598" )
203223 ax2 .yaxis .label .set_color ('#939598' )
204224
0 commit comments