Skip to content

Commit dffd16e

Browse files
update(violin-basic): bokeh — comprehensive quality review (#4325)
## Summary Updated **bokeh** implementation for **violin-basic**. **Changes:** Comprehensive quality review improving code quality, data choice, visual design, spec compliance, and library feature usage. ### Changes - Improved data generation with distinct distribution shapes per category - Enhanced visual design (explicit font sizes, refined color palette, layout balance) - Fixed review weaknesses from previous evaluation - Updated metadata with current library/Python versions - Preview images uploaded to GCS staging ## Test Plan - [x] Preview images uploaded to GCS staging - [x] Implementation file passes ruff format/check - [x] Metadata YAML updated with current versions - [ ] Automated review triggered --- Generated with [Claude Code](https://claude.com/claude-code) `/update` command --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent afab661 commit dffd16e

2 files changed

Lines changed: 297 additions & 194 deletions

File tree

plots/violin-basic/implementations/bokeh.py

Lines changed: 137 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,47 @@
11
""" pyplots.ai
22
violin-basic: Basic Violin Plot
3-
Library: bokeh 3.8.1 | Python 3.13.11
4-
Quality: 91/100 | Created: 2025-12-23
3+
Library: bokeh 3.8.2 | Python 3.14.3
4+
Quality: 92/100 | Updated: 2026-02-21
55
"""
66

77
import numpy as np
88
from bokeh.io import export_png, output_file, save
9+
from bokeh.models import ColumnDataSource, HoverTool, NumeralTickFormatter
910
from bokeh.plotting import figure
11+
from scipy.stats import gaussian_kde
1012

1113

1214
# Data - Salary distributions by department (realistic scenario)
1315
np.random.seed(42)
1416
categories = ["Engineering", "Marketing", "Sales", "Support"]
15-
data = {
16-
"Engineering": np.random.normal(85000, 15000, 150),
17-
"Marketing": np.random.normal(65000, 12000, 150),
18-
"Sales": np.random.normal(70000, 20000, 150), # Higher variance
19-
"Support": np.random.normal(50000, 8000, 150), # Lower variance
20-
}
2117

22-
# Colors - Python Blue and Yellow first, then accessible colors
23-
colors = ["#306998", "#FFD43B", "#4B8BBE", "#FFE873"]
18+
# Engineering: normal, high mean — represents typical salaried professionals
19+
eng = np.random.normal(85000, 15000, 150)
2420

25-
# Create figure with categorical x-axis
21+
# Marketing: normal, mid-range
22+
mkt = np.random.normal(65000, 12000, 150)
23+
24+
# Sales: right-skewed — most earn base salary, some earn high commissions
25+
sales_base = np.random.exponential(15000, 150) + 45000
26+
sales = np.clip(sales_base, 30000, 150000)
27+
28+
# Support: bimodal — junior vs senior tiers with distinct pay bands
29+
support_junior = np.random.normal(42000, 5000, 90)
30+
support_senior = np.random.normal(62000, 6000, 60)
31+
support = np.concatenate([support_junior, support_senior])
32+
33+
data = {"Engineering": eng, "Marketing": mkt, "Sales": sales, "Support": support}
34+
35+
# Colors - four distinct colorblind-safe hues
36+
colors = ["#306998", "#E8943A", "#2A9D8F", "#E76F6F"]
37+
38+
# Visual hierarchy: emphasize non-normal distributions to guide the viewer
39+
alphas = [0.55, 0.55, 0.85, 0.85]
40+
41+
# Distribution type labels for data storytelling
42+
dist_labels = ["normal", "normal", "right-skewed", "bimodal"]
43+
44+
# Create figure with subtle warm background tint
2645
p = figure(
2746
width=4800,
2847
height=2700,
@@ -31,114 +50,159 @@
3150
y_axis_label="Annual Salary (USD)",
3251
x_range=categories,
3352
toolbar_location=None,
53+
background_fill_color="#FAFAF8",
3454
)
3555

36-
# Styling for 4800x2700 px
56+
# Title styling — lighter secondary color for visual weight
3757
p.title.text_font_size = "36pt"
58+
p.title.text_color = "#2D3436"
59+
p.title.text_font_style = "bold"
60+
61+
# Text sizing for 4800x2700 px
3862
p.xaxis.axis_label_text_font_size = "28pt"
3963
p.yaxis.axis_label_text_font_size = "28pt"
4064
p.xaxis.major_label_text_font_size = "22pt"
4165
p.yaxis.major_label_text_font_size = "22pt"
66+
p.xaxis.axis_label_text_color = "#555555"
67+
p.yaxis.axis_label_text_color = "#555555"
68+
69+
# Format y-axis as readable currency
70+
p.yaxis.formatter = NumeralTickFormatter(format="$0,0")
4271

43-
# Grid styling
72+
# Visual refinement - clean, polished design
4473
p.xgrid.grid_line_color = None
45-
p.ygrid.grid_line_alpha = 0.3
74+
p.ygrid.grid_line_alpha = 0.15
4675
p.ygrid.grid_line_dash = "dashed"
47-
48-
# Violin width scaling (0.4 = 40% of category spacing)
76+
p.ygrid.grid_line_color = "#B0B0B0"
77+
p.outline_line_color = None
78+
p.axis.minor_tick_line_color = None
79+
p.axis.major_tick_line_color = None
80+
p.axis.axis_line_color = "#D5D5D5"
81+
p.border_fill_color = "#FAFAF8"
82+
83+
# Tighten y-axis to data range with room for annotations
84+
all_values = np.concatenate(list(data.values()))
85+
y_pad = (all_values.max() - all_values.min()) * 0.12
86+
p.y_range.start = all_values.min() - y_pad
87+
p.y_range.end = all_values.max() + y_pad
88+
89+
# Violin width scaling
4990
violin_width = 0.4
5091

5192
# Draw violins for each category
5293
for i, cat in enumerate(categories):
5394
values = data[cat]
54-
n = len(values)
55-
56-
# Compute KDE using Gaussian kernel (Silverman's rule for bandwidth)
57-
std = np.std(values)
58-
iqr = np.percentile(values, 75) - np.percentile(values, 25)
59-
bandwidth = 0.9 * min(std, iqr / 1.34) * n ** (-0.2)
60-
bandwidth = max(bandwidth, 0.1)
6195

62-
y_grid = np.linspace(values.min() - std, values.max() + std, 100)
63-
density = np.zeros_like(y_grid, dtype=float)
64-
for xi in values:
65-
density += np.exp(-0.5 * ((y_grid - xi) / bandwidth) ** 2)
66-
density /= n * bandwidth * np.sqrt(2 * np.pi)
96+
# Compute KDE using scipy (idiomatic, robust bandwidth selection)
97+
kde = gaussian_kde(values)
98+
y_grid = np.linspace(values.min() - np.std(values) * 0.5, values.max() + np.std(values) * 0.5, 100)
99+
density = kde(y_grid)
67100

68101
# Scale density to violin width
69102
density_scaled = density / density.max() * violin_width
70103

71-
# Create violin shape (mirrored on both sides)
72-
x_left = -density_scaled
73-
x_right = density_scaled
74-
75-
# Convert to categorical offset format for bokeh
76-
xs_left = [(cat, float(xl)) for xl in x_left]
77-
xs_right = [(cat, float(xr)) for xr in x_right[::-1]]
104+
# Create mirrored violin shape using categorical offset tuples
105+
xs_left = [(cat, float(-d)) for d in density_scaled]
106+
xs_right = [(cat, float(d)) for d in density_scaled[::-1]]
78107

79-
# Draw violin patch
108+
# Draw violin patch via ColumnDataSource with varying alpha for hierarchy
109+
violin_source = ColumnDataSource(data={"x": xs_left + xs_right, "y": list(y_grid) + list(y_grid[::-1])})
80110
p.patch(
81-
xs_left + xs_right,
82-
list(y_grid) + list(y_grid[::-1]),
111+
x="x",
112+
y="y",
113+
source=violin_source,
83114
fill_color=colors[i],
84-
fill_alpha=0.7,
115+
fill_alpha=alphas[i],
85116
line_color=colors[i],
117+
line_alpha=min(alphas[i] + 0.15, 1.0),
86118
line_width=3,
87119
)
88120

89-
# Compute quartiles
121+
# Quartiles and median
90122
q1, median, q3 = np.percentile(values, [25, 50, 75])
91123

92-
# Draw thin box inside violin (quartile markers)
124+
# Inner box (Q1-Q3) with ColumnDataSource for HoverTool
93125
box_width = 0.06
94-
p.quad(
95-
left=[(cat, -box_width)],
96-
right=[(cat, box_width)],
97-
top=[q3],
98-
bottom=[q1],
126+
box_source = ColumnDataSource(
127+
data={
128+
"left": [(cat, -box_width)],
129+
"right": [(cat, box_width)],
130+
"top": [q3],
131+
"bottom": [q1],
132+
"dept": [cat],
133+
"median_val": [f"${median:,.0f}"],
134+
"q1_val": [f"${q1:,.0f}"],
135+
"q3_val": [f"${q3:,.0f}"],
136+
"n": [str(len(values))],
137+
}
138+
)
139+
box_renderer = p.quad(
140+
left="left",
141+
right="right",
142+
top="top",
143+
bottom="bottom",
144+
source=box_source,
99145
fill_color="white",
100146
fill_alpha=0.9,
101147
line_color="black",
102148
line_width=3,
103149
)
104150

105-
# Draw median line
106-
p.segment(
107-
x0=[(cat, -box_width * 1.5)],
108-
y0=[median],
109-
x1=[(cat, box_width * 1.5)],
110-
y1=[median],
111-
line_color="black",
112-
line_width=5,
151+
# Add HoverTool for interactive HTML output
152+
hover = HoverTool(
153+
renderers=[box_renderer],
154+
tooltips=[
155+
("Department", "@dept"),
156+
("Median", "@median_val"),
157+
("Q1", "@q1_val"),
158+
("Q3", "@q3_val"),
159+
("N", "@n"),
160+
],
161+
)
162+
p.add_tools(hover)
163+
164+
# Median line
165+
med_source = ColumnDataSource(
166+
data={"x0": [(cat, -box_width * 1.5)], "y0": [median], "x1": [(cat, box_width * 1.5)], "y1": [median]}
113167
)
168+
p.segment(x0="x0", y0="y0", x1="x1", y1="y1", source=med_source, line_color="black", line_width=5)
114169

115-
# Whiskers (to 1.5*IQR or data extent)
170+
# Whiskers (1.5*IQR or data extent)
116171
iqr_val = q3 - q1
117172
whisker_low = max(values.min(), q1 - 1.5 * iqr_val)
118173
whisker_high = min(values.max(), q3 + 1.5 * iqr_val)
119174

120-
# Vertical whisker lines
121-
p.segment(x0=[cat], y0=[q1], x1=[cat], y1=[whisker_low], line_color="black", line_width=3)
122-
p.segment(x0=[cat], y0=[q3], x1=[cat], y1=[whisker_high], line_color="black", line_width=3)
175+
whisker_source = ColumnDataSource(
176+
data={"x0": [cat, cat], "y0": [q1, q3], "x1": [cat, cat], "y1": [whisker_low, whisker_high]}
177+
)
178+
p.segment(x0="x0", y0="y0", x1="x1", y1="y1", source=whisker_source, line_color="black", line_width=3)
123179

124180
# Whisker caps
125181
cap_width = 0.04
126-
p.segment(
127-
x0=[(cat, -cap_width)],
128-
y0=[whisker_low],
129-
x1=[(cat, cap_width)],
130-
y1=[whisker_low],
131-
line_color="black",
132-
line_width=3,
133-
)
134-
p.segment(
135-
x0=[(cat, -cap_width)],
136-
y0=[whisker_high],
137-
x1=[(cat, cap_width)],
138-
y1=[whisker_high],
139-
line_color="black",
140-
line_width=3,
182+
cap_source = ColumnDataSource(
183+
data={
184+
"x0": [(cat, -cap_width), (cat, -cap_width)],
185+
"y0": [whisker_low, whisker_high],
186+
"x1": [(cat, cap_width), (cat, cap_width)],
187+
"y1": [whisker_low, whisker_high],
188+
}
141189
)
190+
p.segment(x0="x0", y0="y0", x1="x1", y1="y1", source=cap_source, line_color="black", line_width=3)
191+
192+
# Distribution type annotations — guide the viewer to the data story
193+
annotation_y = all_values.min() - y_pad * 0.65
194+
ann_source = ColumnDataSource(data={"x": categories, "y": [annotation_y] * len(categories), "text": dist_labels})
195+
p.text(
196+
x="x",
197+
y="y",
198+
text="text",
199+
source=ann_source,
200+
text_font_size="18pt",
201+
text_font_style="italic",
202+
text_color="#999999",
203+
text_align="center",
204+
text_baseline="top",
205+
)
142206

143207
# Save outputs
144208
export_png(p, filename="plot.png")

0 commit comments

Comments
 (0)