|
1 | 1 | """ pyplots.ai |
2 | 2 | violin-basic: Basic Violin Plot |
3 | | -Library: bokeh 3.8.1 | Python 3.13.11 |
4 | | -Quality: 91/100 | Created: 2025-12-23 |
| 3 | +Library: bokeh 3.8.2 | Python 3.14.3 |
| 4 | +Quality: /100 | Updated: 2026-02-21 |
5 | 5 | """ |
6 | 6 |
|
7 | 7 | import numpy as np |
8 | 8 | from bokeh.io import export_png, output_file, save |
| 9 | +from bokeh.models import NumeralTickFormatter |
9 | 10 | from bokeh.plotting import figure |
| 11 | +from scipy.stats import gaussian_kde |
10 | 12 |
|
11 | 13 |
|
12 | 14 | # Data - Salary distributions by department (realistic scenario) |
|
15 | 17 | data = { |
16 | 18 | "Engineering": np.random.normal(85000, 15000, 150), |
17 | 19 | "Marketing": np.random.normal(65000, 12000, 150), |
18 | | - "Sales": np.random.normal(70000, 20000, 150), # Higher variance |
19 | | - "Support": np.random.normal(50000, 8000, 150), # Lower variance |
| 20 | + "Sales": np.random.normal(70000, 20000, 150), |
| 21 | + "Support": np.random.normal(50000, 8000, 150), |
20 | 22 | } |
21 | 23 |
|
22 | | -# Colors - Python Blue and Yellow first, then accessible colors |
| 24 | +# Colors - Python Blue first, then accessible palette |
23 | 25 | colors = ["#306998", "#FFD43B", "#4B8BBE", "#FFE873"] |
24 | 26 |
|
25 | | -# Create figure with categorical x-axis |
| 27 | +# Create figure |
26 | 28 | p = figure( |
27 | 29 | width=4800, |
28 | 30 | height=2700, |
29 | | - title="violin-basic · bokeh · pyplots.ai", |
| 31 | + title="violin-basic \u00b7 bokeh \u00b7 pyplots.ai", |
30 | 32 | x_axis_label="Department", |
31 | 33 | y_axis_label="Annual Salary (USD)", |
32 | 34 | x_range=categories, |
33 | 35 | toolbar_location=None, |
34 | 36 | ) |
35 | 37 |
|
36 | | -# Styling for 4800x2700 px |
| 38 | +# Text sizing for 4800x2700 px |
37 | 39 | p.title.text_font_size = "36pt" |
38 | 40 | p.xaxis.axis_label_text_font_size = "28pt" |
39 | 41 | p.yaxis.axis_label_text_font_size = "28pt" |
40 | 42 | p.xaxis.major_label_text_font_size = "22pt" |
41 | 43 | p.yaxis.major_label_text_font_size = "22pt" |
42 | 44 |
|
43 | | -# Grid styling |
| 45 | +# Format y-axis as readable currency |
| 46 | +p.yaxis.formatter = NumeralTickFormatter(format="$0,0") |
| 47 | + |
| 48 | +# Visual refinement - clean design |
44 | 49 | p.xgrid.grid_line_color = None |
45 | | -p.ygrid.grid_line_alpha = 0.3 |
| 50 | +p.ygrid.grid_line_alpha = 0.2 |
46 | 51 | p.ygrid.grid_line_dash = "dashed" |
| 52 | +p.outline_line_color = None |
| 53 | +p.axis.minor_tick_line_color = None |
| 54 | +p.axis.major_tick_line_color = None |
| 55 | +p.axis.axis_line_color = "#cccccc" |
47 | 56 |
|
48 | | -# Violin width scaling (0.4 = 40% of category spacing) |
| 57 | +# Violin width scaling |
49 | 58 | violin_width = 0.4 |
50 | 59 |
|
51 | 60 | # Draw violins for each category |
52 | 61 | for i, cat in enumerate(categories): |
53 | 62 | values = data[cat] |
54 | | - n = len(values) |
55 | 63 |
|
56 | | - # Compute KDE using Gaussian kernel (Silverman's rule for bandwidth) |
| 64 | + # Compute KDE using scipy (idiomatic, robust bandwidth selection) |
| 65 | + kde = gaussian_kde(values) |
57 | 66 | std = np.std(values) |
58 | | - iqr = np.percentile(values, 75) - np.percentile(values, 25) |
59 | | - bandwidth = 0.9 * min(std, iqr / 1.34) * n ** (-0.2) |
60 | | - bandwidth = max(bandwidth, 0.1) |
61 | | - |
62 | 67 | y_grid = np.linspace(values.min() - std, values.max() + std, 100) |
63 | | - density = np.zeros_like(y_grid, dtype=float) |
64 | | - for xi in values: |
65 | | - density += np.exp(-0.5 * ((y_grid - xi) / bandwidth) ** 2) |
66 | | - density /= n * bandwidth * np.sqrt(2 * np.pi) |
| 68 | + density = kde(y_grid) |
67 | 69 |
|
68 | 70 | # Scale density to violin width |
69 | 71 | density_scaled = density / density.max() * violin_width |
70 | 72 |
|
71 | | - # Create violin shape (mirrored on both sides) |
72 | | - x_left = -density_scaled |
73 | | - x_right = density_scaled |
74 | | - |
75 | | - # Convert to categorical offset format for bokeh |
76 | | - xs_left = [(cat, float(xl)) for xl in x_left] |
77 | | - xs_right = [(cat, float(xr)) for xr in x_right[::-1]] |
| 73 | + # Create mirrored violin shape using categorical offset tuples |
| 74 | + xs_left = [(cat, float(-d)) for d in density_scaled] |
| 75 | + xs_right = [(cat, float(d)) for d in density_scaled[::-1]] |
78 | 76 |
|
79 | 77 | # Draw violin patch |
80 | 78 | p.patch( |
|
86 | 84 | line_width=3, |
87 | 85 | ) |
88 | 86 |
|
89 | | - # Compute quartiles |
| 87 | + # Quartiles and median |
90 | 88 | q1, median, q3 = np.percentile(values, [25, 50, 75]) |
91 | 89 |
|
92 | | - # Draw thin box inside violin (quartile markers) |
| 90 | + # Inner box (Q1-Q3) |
93 | 91 | box_width = 0.06 |
94 | 92 | p.quad( |
95 | 93 | left=[(cat, -box_width)], |
|
102 | 100 | line_width=3, |
103 | 101 | ) |
104 | 102 |
|
105 | | - # Draw median line |
| 103 | + # Median line |
106 | 104 | p.segment( |
107 | 105 | x0=[(cat, -box_width * 1.5)], |
108 | 106 | y0=[median], |
|
112 | 110 | line_width=5, |
113 | 111 | ) |
114 | 112 |
|
115 | | - # Whiskers (to 1.5*IQR or data extent) |
| 113 | + # Whiskers (1.5*IQR or data extent) |
116 | 114 | iqr_val = q3 - q1 |
117 | 115 | whisker_low = max(values.min(), q1 - 1.5 * iqr_val) |
118 | 116 | whisker_high = min(values.max(), q3 + 1.5 * iqr_val) |
119 | 117 |
|
120 | | - # Vertical whisker lines |
121 | 118 | p.segment(x0=[cat], y0=[q1], x1=[cat], y1=[whisker_low], line_color="black", line_width=3) |
122 | 119 | p.segment(x0=[cat], y0=[q3], x1=[cat], y1=[whisker_high], line_color="black", line_width=3) |
123 | 120 |
|
|
0 commit comments