|
| 1 | +""" pyplots.ai |
| 2 | +violin-box: Violin Plot with Embedded Box Plot |
| 3 | +Library: bokeh 3.8.1 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-30 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +from bokeh.io import export_png, output_file, save |
| 9 | +from bokeh.models import ColumnDataSource, Legend, LegendItem |
| 10 | +from bokeh.plotting import figure |
| 11 | + |
| 12 | + |
| 13 | +# Data - Test scores by study method (educational context) |
| 14 | +np.random.seed(42) |
| 15 | +categories = ["Method A", "Method B", "Method C", "Method D"] |
| 16 | + |
| 17 | +# Create distributions with different characteristics to showcase features |
| 18 | +data = { |
| 19 | + "Method A": np.random.normal(72, 12, 120), # Standard distribution |
| 20 | + "Method B": np.concatenate( |
| 21 | + [ |
| 22 | + np.random.normal(78, 8, 100), # Main group |
| 23 | + np.array([48, 52, 95, 98]), # Outliers both ends |
| 24 | + ] |
| 25 | + ), |
| 26 | + "Method C": np.random.normal(65, 15, 120), # Wider spread |
| 27 | + "Method D": np.concatenate( |
| 28 | + [ |
| 29 | + np.random.normal(80, 6, 90), # Tight main group |
| 30 | + np.random.normal(55, 3, 20), # Bimodal lower group |
| 31 | + np.array([38, 40, 42]), # Low outliers |
| 32 | + ] |
| 33 | + ), |
| 34 | +} |
| 35 | + |
| 36 | +# Colors - Python Blue for violin, golden yellow for box |
| 37 | +violin_color = "#306998" |
| 38 | +box_color = "#FFD43B" |
| 39 | + |
| 40 | +# Create figure with categorical x-axis |
| 41 | +p = figure( |
| 42 | + width=4800, |
| 43 | + height=2700, |
| 44 | + title="violin-box \u00b7 bokeh \u00b7 pyplots.ai", |
| 45 | + x_axis_label="Study Method", |
| 46 | + y_axis_label="Test Score", |
| 47 | + x_range=categories, |
| 48 | + toolbar_location=None, |
| 49 | +) |
| 50 | + |
| 51 | +# Styling for 4800x2700 px |
| 52 | +p.title.text_font_size = "36pt" |
| 53 | +p.xaxis.axis_label_text_font_size = "28pt" |
| 54 | +p.yaxis.axis_label_text_font_size = "28pt" |
| 55 | +p.xaxis.major_label_text_font_size = "22pt" |
| 56 | +p.yaxis.major_label_text_font_size = "22pt" |
| 57 | + |
| 58 | +# Grid styling - visible but subtle |
| 59 | +p.xgrid.grid_line_color = None |
| 60 | +p.ygrid.grid_line_alpha = 0.3 |
| 61 | +p.ygrid.grid_line_dash = "dashed" |
| 62 | + |
| 63 | +# Background styling |
| 64 | +p.background_fill_color = None |
| 65 | +p.border_fill_color = None |
| 66 | + |
| 67 | +# Violin width scaling (0.4 = 40% of category spacing) |
| 68 | +violin_width = 0.38 |
| 69 | + |
| 70 | +# Track renderers for legend |
| 71 | +violin_renderer = None |
| 72 | +box_renderer = None |
| 73 | +median_renderer = None |
| 74 | +outlier_renderer = None |
| 75 | + |
| 76 | +# Collect outlier data for all categories |
| 77 | +all_outliers_x = [] |
| 78 | +all_outliers_y = [] |
| 79 | + |
| 80 | +# Draw violins with embedded box plots for each category |
| 81 | +for cat in categories: |
| 82 | + values = np.array(data[cat]) |
| 83 | + n = len(values) |
| 84 | + |
| 85 | + # Compute KDE using Gaussian kernel (Silverman's rule for bandwidth) |
| 86 | + std = np.std(values) |
| 87 | + iqr = np.percentile(values, 75) - np.percentile(values, 25) |
| 88 | + bandwidth = 0.9 * min(std, iqr / 1.34) * n ** (-0.2) |
| 89 | + bandwidth = max(bandwidth, 0.1) |
| 90 | + |
| 91 | + y_grid = np.linspace(values.min() - std, values.max() + std, 100) |
| 92 | + density = np.zeros_like(y_grid, dtype=float) |
| 93 | + for xi in values: |
| 94 | + density += np.exp(-0.5 * ((y_grid - xi) / bandwidth) ** 2) |
| 95 | + density /= n * bandwidth * np.sqrt(2 * np.pi) |
| 96 | + |
| 97 | + # Scale density to violin width |
| 98 | + density_scaled = density / density.max() * violin_width |
| 99 | + |
| 100 | + # Create violin shape (mirrored on both sides) |
| 101 | + x_left = -density_scaled |
| 102 | + x_right = density_scaled |
| 103 | + |
| 104 | + # Convert to categorical offset format for bokeh |
| 105 | + xs_left = [(cat, float(xl)) for xl in x_left] |
| 106 | + xs_right = [(cat, float(xr)) for xr in x_right[::-1]] |
| 107 | + |
| 108 | + # Draw violin patch |
| 109 | + vr = p.patch( |
| 110 | + xs_left + xs_right, |
| 111 | + list(y_grid) + list(y_grid[::-1]), |
| 112 | + fill_color=violin_color, |
| 113 | + fill_alpha=0.6, |
| 114 | + line_color=violin_color, |
| 115 | + line_width=3, |
| 116 | + ) |
| 117 | + if violin_renderer is None: |
| 118 | + violin_renderer = vr |
| 119 | + |
| 120 | + # Compute box plot statistics |
| 121 | + q1 = np.percentile(values, 25) |
| 122 | + median = np.percentile(values, 50) |
| 123 | + q3 = np.percentile(values, 75) |
| 124 | + iqr_val = q3 - q1 |
| 125 | + whisker_low = max(values.min(), q1 - 1.5 * iqr_val) |
| 126 | + whisker_high = min(values.max(), q3 + 1.5 * iqr_val) |
| 127 | + |
| 128 | + # Draw box inside violin (IQR from Q1 to Q3) |
| 129 | + box_width = 0.08 |
| 130 | + br = p.quad( |
| 131 | + left=[(cat, -box_width)], |
| 132 | + right=[(cat, box_width)], |
| 133 | + top=[q3], |
| 134 | + bottom=[q1], |
| 135 | + fill_color=box_color, |
| 136 | + fill_alpha=0.9, |
| 137 | + line_color="black", |
| 138 | + line_width=3, |
| 139 | + ) |
| 140 | + if box_renderer is None: |
| 141 | + box_renderer = br |
| 142 | + |
| 143 | + # Draw median line |
| 144 | + mr = p.segment( |
| 145 | + x0=[(cat, -box_width * 1.3)], |
| 146 | + y0=[median], |
| 147 | + x1=[(cat, box_width * 1.3)], |
| 148 | + y1=[median], |
| 149 | + line_color="black", |
| 150 | + line_width=5, |
| 151 | + ) |
| 152 | + if median_renderer is None: |
| 153 | + median_renderer = mr |
| 154 | + |
| 155 | + # Whiskers (vertical lines from box to whisker limits) |
| 156 | + p.segment(x0=[cat], y0=[q1], x1=[cat], y1=[whisker_low], line_color="black", line_width=3) |
| 157 | + p.segment(x0=[cat], y0=[q3], x1=[cat], y1=[whisker_high], line_color="black", line_width=3) |
| 158 | + |
| 159 | + # Whisker caps |
| 160 | + cap_width = 0.05 |
| 161 | + p.segment( |
| 162 | + x0=[(cat, -cap_width)], |
| 163 | + y0=[whisker_low], |
| 164 | + x1=[(cat, cap_width)], |
| 165 | + y1=[whisker_low], |
| 166 | + line_color="black", |
| 167 | + line_width=3, |
| 168 | + ) |
| 169 | + p.segment( |
| 170 | + x0=[(cat, -cap_width)], |
| 171 | + y0=[whisker_high], |
| 172 | + x1=[(cat, cap_width)], |
| 173 | + y1=[whisker_high], |
| 174 | + line_color="black", |
| 175 | + line_width=3, |
| 176 | + ) |
| 177 | + |
| 178 | + # Collect outliers |
| 179 | + outliers = values[(values < whisker_low) | (values > whisker_high)] |
| 180 | + for out in outliers: |
| 181 | + all_outliers_x.append(cat) |
| 182 | + all_outliers_y.append(out) |
| 183 | + |
| 184 | +# Draw all outliers |
| 185 | +if len(all_outliers_x) > 0: |
| 186 | + outlier_source = ColumnDataSource(data={"x": all_outliers_x, "y": all_outliers_y}) |
| 187 | + outlier_renderer = p.scatter( |
| 188 | + x="x", |
| 189 | + y="y", |
| 190 | + source=outlier_source, |
| 191 | + size=18, |
| 192 | + fill_color="white", |
| 193 | + line_color="black", |
| 194 | + line_width=3, |
| 195 | + marker="circle", |
| 196 | + ) |
| 197 | + |
| 198 | +# Create legend |
| 199 | +legend_items = [ |
| 200 | + LegendItem(label="Distribution (KDE)", renderers=[violin_renderer]), |
| 201 | + LegendItem(label="IQR (Q1-Q3)", renderers=[box_renderer]), |
| 202 | + LegendItem(label="Median", renderers=[median_renderer]), |
| 203 | +] |
| 204 | +if outlier_renderer is not None: |
| 205 | + legend_items.append(LegendItem(label="Outliers", renderers=[outlier_renderer])) |
| 206 | + |
| 207 | +legend = Legend(items=legend_items, location="top_right") |
| 208 | +legend.label_text_font_size = "20pt" |
| 209 | +legend.glyph_height = 30 |
| 210 | +legend.glyph_width = 30 |
| 211 | +legend.spacing = 15 |
| 212 | +legend.padding = 20 |
| 213 | +legend.background_fill_alpha = 0.8 |
| 214 | +p.add_layout(legend, "right") |
| 215 | + |
| 216 | +# Save outputs |
| 217 | +export_png(p, filename="plot.png") |
| 218 | +output_file("plot.html") |
| 219 | +save(p) |
0 commit comments