Skip to content

Commit 26427dd

Browse files
feat(bokeh): implement violin-box (#2683)
## Implementation: `violin-box` - bokeh Implements the **bokeh** version of `violin-box`. **File:** `plots/violin-box/implementations/bokeh.py` --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20595338446)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 4fc86f1 commit 26427dd

2 files changed

Lines changed: 248 additions & 0 deletions

File tree

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
""" pyplots.ai
2+
violin-box: Violin Plot with Embedded Box Plot
3+
Library: bokeh 3.8.1 | Python 3.13.11
4+
Quality: 91/100 | Created: 2025-12-30
5+
"""
6+
7+
import numpy as np
8+
from bokeh.io import export_png, output_file, save
9+
from bokeh.models import ColumnDataSource, Legend, LegendItem
10+
from bokeh.plotting import figure
11+
12+
13+
# Data - Test scores by study method (educational context)
14+
np.random.seed(42)
15+
categories = ["Method A", "Method B", "Method C", "Method D"]
16+
17+
# Create distributions with different characteristics to showcase features
18+
data = {
19+
"Method A": np.random.normal(72, 12, 120), # Standard distribution
20+
"Method B": np.concatenate(
21+
[
22+
np.random.normal(78, 8, 100), # Main group
23+
np.array([48, 52, 95, 98]), # Outliers both ends
24+
]
25+
),
26+
"Method C": np.random.normal(65, 15, 120), # Wider spread
27+
"Method D": np.concatenate(
28+
[
29+
np.random.normal(80, 6, 90), # Tight main group
30+
np.random.normal(55, 3, 20), # Bimodal lower group
31+
np.array([38, 40, 42]), # Low outliers
32+
]
33+
),
34+
}
35+
36+
# Colors - Python Blue for violin, golden yellow for box
37+
violin_color = "#306998"
38+
box_color = "#FFD43B"
39+
40+
# Create figure with categorical x-axis
41+
p = figure(
42+
width=4800,
43+
height=2700,
44+
title="violin-box \u00b7 bokeh \u00b7 pyplots.ai",
45+
x_axis_label="Study Method",
46+
y_axis_label="Test Score",
47+
x_range=categories,
48+
toolbar_location=None,
49+
)
50+
51+
# Styling for 4800x2700 px
52+
p.title.text_font_size = "36pt"
53+
p.xaxis.axis_label_text_font_size = "28pt"
54+
p.yaxis.axis_label_text_font_size = "28pt"
55+
p.xaxis.major_label_text_font_size = "22pt"
56+
p.yaxis.major_label_text_font_size = "22pt"
57+
58+
# Grid styling - visible but subtle
59+
p.xgrid.grid_line_color = None
60+
p.ygrid.grid_line_alpha = 0.3
61+
p.ygrid.grid_line_dash = "dashed"
62+
63+
# Background styling
64+
p.background_fill_color = None
65+
p.border_fill_color = None
66+
67+
# Violin width scaling (0.4 = 40% of category spacing)
68+
violin_width = 0.38
69+
70+
# Track renderers for legend
71+
violin_renderer = None
72+
box_renderer = None
73+
median_renderer = None
74+
outlier_renderer = None
75+
76+
# Collect outlier data for all categories
77+
all_outliers_x = []
78+
all_outliers_y = []
79+
80+
# Draw violins with embedded box plots for each category
81+
for cat in categories:
82+
values = np.array(data[cat])
83+
n = len(values)
84+
85+
# Compute KDE using Gaussian kernel (Silverman's rule for bandwidth)
86+
std = np.std(values)
87+
iqr = np.percentile(values, 75) - np.percentile(values, 25)
88+
bandwidth = 0.9 * min(std, iqr / 1.34) * n ** (-0.2)
89+
bandwidth = max(bandwidth, 0.1)
90+
91+
y_grid = np.linspace(values.min() - std, values.max() + std, 100)
92+
density = np.zeros_like(y_grid, dtype=float)
93+
for xi in values:
94+
density += np.exp(-0.5 * ((y_grid - xi) / bandwidth) ** 2)
95+
density /= n * bandwidth * np.sqrt(2 * np.pi)
96+
97+
# Scale density to violin width
98+
density_scaled = density / density.max() * violin_width
99+
100+
# Create violin shape (mirrored on both sides)
101+
x_left = -density_scaled
102+
x_right = density_scaled
103+
104+
# Convert to categorical offset format for bokeh
105+
xs_left = [(cat, float(xl)) for xl in x_left]
106+
xs_right = [(cat, float(xr)) for xr in x_right[::-1]]
107+
108+
# Draw violin patch
109+
vr = p.patch(
110+
xs_left + xs_right,
111+
list(y_grid) + list(y_grid[::-1]),
112+
fill_color=violin_color,
113+
fill_alpha=0.6,
114+
line_color=violin_color,
115+
line_width=3,
116+
)
117+
if violin_renderer is None:
118+
violin_renderer = vr
119+
120+
# Compute box plot statistics
121+
q1 = np.percentile(values, 25)
122+
median = np.percentile(values, 50)
123+
q3 = np.percentile(values, 75)
124+
iqr_val = q3 - q1
125+
whisker_low = max(values.min(), q1 - 1.5 * iqr_val)
126+
whisker_high = min(values.max(), q3 + 1.5 * iqr_val)
127+
128+
# Draw box inside violin (IQR from Q1 to Q3)
129+
box_width = 0.08
130+
br = p.quad(
131+
left=[(cat, -box_width)],
132+
right=[(cat, box_width)],
133+
top=[q3],
134+
bottom=[q1],
135+
fill_color=box_color,
136+
fill_alpha=0.9,
137+
line_color="black",
138+
line_width=3,
139+
)
140+
if box_renderer is None:
141+
box_renderer = br
142+
143+
# Draw median line
144+
mr = p.segment(
145+
x0=[(cat, -box_width * 1.3)],
146+
y0=[median],
147+
x1=[(cat, box_width * 1.3)],
148+
y1=[median],
149+
line_color="black",
150+
line_width=5,
151+
)
152+
if median_renderer is None:
153+
median_renderer = mr
154+
155+
# Whiskers (vertical lines from box to whisker limits)
156+
p.segment(x0=[cat], y0=[q1], x1=[cat], y1=[whisker_low], line_color="black", line_width=3)
157+
p.segment(x0=[cat], y0=[q3], x1=[cat], y1=[whisker_high], line_color="black", line_width=3)
158+
159+
# Whisker caps
160+
cap_width = 0.05
161+
p.segment(
162+
x0=[(cat, -cap_width)],
163+
y0=[whisker_low],
164+
x1=[(cat, cap_width)],
165+
y1=[whisker_low],
166+
line_color="black",
167+
line_width=3,
168+
)
169+
p.segment(
170+
x0=[(cat, -cap_width)],
171+
y0=[whisker_high],
172+
x1=[(cat, cap_width)],
173+
y1=[whisker_high],
174+
line_color="black",
175+
line_width=3,
176+
)
177+
178+
# Collect outliers
179+
outliers = values[(values < whisker_low) | (values > whisker_high)]
180+
for out in outliers:
181+
all_outliers_x.append(cat)
182+
all_outliers_y.append(out)
183+
184+
# Draw all outliers
185+
if len(all_outliers_x) > 0:
186+
outlier_source = ColumnDataSource(data={"x": all_outliers_x, "y": all_outliers_y})
187+
outlier_renderer = p.scatter(
188+
x="x",
189+
y="y",
190+
source=outlier_source,
191+
size=18,
192+
fill_color="white",
193+
line_color="black",
194+
line_width=3,
195+
marker="circle",
196+
)
197+
198+
# Create legend
199+
legend_items = [
200+
LegendItem(label="Distribution (KDE)", renderers=[violin_renderer]),
201+
LegendItem(label="IQR (Q1-Q3)", renderers=[box_renderer]),
202+
LegendItem(label="Median", renderers=[median_renderer]),
203+
]
204+
if outlier_renderer is not None:
205+
legend_items.append(LegendItem(label="Outliers", renderers=[outlier_renderer]))
206+
207+
legend = Legend(items=legend_items, location="top_right")
208+
legend.label_text_font_size = "20pt"
209+
legend.glyph_height = 30
210+
legend.glyph_width = 30
211+
legend.spacing = 15
212+
legend.padding = 20
213+
legend.background_fill_alpha = 0.8
214+
p.add_layout(legend, "right")
215+
216+
# Save outputs
217+
export_png(p, filename="plot.png")
218+
output_file("plot.html")
219+
save(p)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
library: bokeh
2+
specification_id: violin-box
3+
created: '2025-12-30T11:26:52Z'
4+
updated: '2025-12-30T11:55:25Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 20595338446
7+
issue: 0
8+
python_version: 3.13.11
9+
library_version: 3.8.1
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/violin-box/bokeh/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/violin-box/bokeh/plot_thumb.png
12+
preview_html: https://storage.googleapis.com/pyplots-images/plots/violin-box/bokeh/plot.html
13+
quality_score: 91
14+
review:
15+
strengths:
16+
- Excellent manual implementation of violin plot using KDE calculation and patch
17+
rendering
18+
- Clean color scheme with Python Blue violins and golden yellow boxes provides excellent
19+
visual distinction
20+
- Proper legend with all four components clearly identified
21+
- Good data variety showing different distribution shapes (normal, bimodal, with
22+
outliers)
23+
- Correct title format following pyplots.ai conventions
24+
- Appropriate font sizing for 4800x2700 canvas
25+
weaknesses:
26+
- Legend is placed outside the plot area creating slight visual separation from
27+
the main plot
28+
- Outlier markers could be slightly larger for better visibility at full resolution
29+
- Grid/legend positioning could be improved by placing legend inside the plot area

0 commit comments

Comments
 (0)