|
| 1 | +""" pyplots.ai |
| 2 | +cat-box-strip: Box Plot with Strip Overlay |
| 3 | +Library: bokeh 3.8.1 | Python 3.13.11 |
| 4 | +Quality: 92/100 | Created: 2025-12-30 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +from bokeh.io import export_png, output_file, save |
| 10 | +from bokeh.models import ColumnDataSource, Whisker |
| 11 | +from bokeh.plotting import figure |
| 12 | +from bokeh.transform import jitter |
| 13 | + |
| 14 | + |
| 15 | +# Data - Plant growth measurements across different soil types |
| 16 | +np.random.seed(42) |
| 17 | + |
| 18 | +categories = ["Sandy", "Clay", "Loamy", "Silty"] |
| 19 | +n_per_group = [35, 40, 45, 38] |
| 20 | + |
| 21 | +# Generate data with different distributions per group |
| 22 | +data = [] |
| 23 | +for cat, n in zip(categories, n_per_group, strict=True): |
| 24 | + if cat == "Sandy": |
| 25 | + values = np.random.normal(25, 6, n) # Lower growth, moderate variance |
| 26 | + elif cat == "Clay": |
| 27 | + values = np.random.normal(32, 8, n) # Medium growth, high variance |
| 28 | + values = np.append(values, [55, 58]) # Add outliers |
| 29 | + elif cat == "Loamy": |
| 30 | + values = np.random.normal(42, 5, n) # High growth, low variance |
| 31 | + else: # Silty |
| 32 | + values = np.random.normal(35, 7, n) # Medium-high growth |
| 33 | + values = np.append(values, [12, 14]) # Add low outliers |
| 34 | + |
| 35 | + for v in values: |
| 36 | + data.append({"category": cat, "value": v}) |
| 37 | + |
| 38 | +df = pd.DataFrame(data) |
| 39 | + |
| 40 | +# Calculate box plot statistics for each category |
| 41 | +box_data = {"category": [], "q1": [], "q2": [], "q3": [], "upper": [], "lower": []} |
| 42 | + |
| 43 | +for cat in categories: |
| 44 | + group = df[df["category"] == cat]["value"] |
| 45 | + q1 = group.quantile(0.25) |
| 46 | + q2 = group.quantile(0.50) |
| 47 | + q3 = group.quantile(0.75) |
| 48 | + iqr = q3 - q1 |
| 49 | + upper_whisker = group[group <= q3 + 1.5 * iqr].max() |
| 50 | + lower_whisker = group[group >= q1 - 1.5 * iqr].min() |
| 51 | + |
| 52 | + box_data["category"].append(cat) |
| 53 | + box_data["q1"].append(q1) |
| 54 | + box_data["q2"].append(q2) |
| 55 | + box_data["q3"].append(q3) |
| 56 | + box_data["upper"].append(upper_whisker) |
| 57 | + box_data["lower"].append(lower_whisker) |
| 58 | + |
| 59 | +box_source = ColumnDataSource(data=box_data) |
| 60 | + |
| 61 | +# Create figure with categorical x-axis |
| 62 | +p = figure( |
| 63 | + width=4800, |
| 64 | + height=2700, |
| 65 | + x_range=categories, |
| 66 | + title="cat-box-strip · bokeh · pyplots.ai", |
| 67 | + x_axis_label="Soil Type", |
| 68 | + y_axis_label="Plant Growth (cm)", |
| 69 | + tools="", |
| 70 | + toolbar_location=None, |
| 71 | +) |
| 72 | + |
| 73 | +# Styling - scaled for 4800x2700 canvas |
| 74 | +p.title.text_font_size = "36pt" |
| 75 | +p.xaxis.axis_label_text_font_size = "28pt" |
| 76 | +p.yaxis.axis_label_text_font_size = "28pt" |
| 77 | +p.xaxis.major_label_text_font_size = "24pt" |
| 78 | +p.yaxis.major_label_text_font_size = "22pt" |
| 79 | +p.xaxis.axis_label_standoff = 25 |
| 80 | +p.yaxis.axis_label_standoff = 25 |
| 81 | + |
| 82 | +# Grid styling |
| 83 | +p.grid.grid_line_alpha = 0.3 |
| 84 | +p.grid.grid_line_dash = [6, 4] |
| 85 | +p.xgrid.grid_line_color = None |
| 86 | + |
| 87 | +# Background |
| 88 | +p.background_fill_color = "#fafafa" |
| 89 | + |
| 90 | +# Draw whiskers using the Whisker annotation |
| 91 | +upper_whisker = Whisker( |
| 92 | + source=box_source, base="category", upper="upper", lower="q3", line_color="#306998", line_width=2.5 |
| 93 | +) |
| 94 | +upper_whisker.upper_head.size = 30 |
| 95 | +upper_whisker.upper_head.line_color = "#306998" |
| 96 | +upper_whisker.upper_head.line_width = 2.5 |
| 97 | +upper_whisker.lower_head.size = 0 |
| 98 | +p.add_layout(upper_whisker) |
| 99 | + |
| 100 | +lower_whisker = Whisker( |
| 101 | + source=box_source, base="category", upper="q1", lower="lower", line_color="#306998", line_width=2.5 |
| 102 | +) |
| 103 | +lower_whisker.lower_head.size = 30 |
| 104 | +lower_whisker.lower_head.line_color = "#306998" |
| 105 | +lower_whisker.lower_head.line_width = 2.5 |
| 106 | +lower_whisker.upper_head.size = 0 |
| 107 | +p.add_layout(lower_whisker) |
| 108 | + |
| 109 | +# Draw boxes (IQR range) - upper half |
| 110 | +p.vbar( |
| 111 | + x="category", |
| 112 | + top="q3", |
| 113 | + bottom="q2", |
| 114 | + width=0.5, |
| 115 | + source=box_source, |
| 116 | + fill_color="#306998", |
| 117 | + fill_alpha=0.4, |
| 118 | + line_color="#306998", |
| 119 | + line_width=3, |
| 120 | +) |
| 121 | + |
| 122 | +# Draw boxes (IQR range) - lower half |
| 123 | +p.vbar( |
| 124 | + x="category", |
| 125 | + top="q2", |
| 126 | + bottom="q1", |
| 127 | + width=0.5, |
| 128 | + source=box_source, |
| 129 | + fill_color="#306998", |
| 130 | + fill_alpha=0.4, |
| 131 | + line_color="#306998", |
| 132 | + line_width=3, |
| 133 | +) |
| 134 | + |
| 135 | +# Median line (horizontal segment across the box) |
| 136 | +p.segment(x0="category", x1="category", y0="q2", y1="q2", source=box_source, line_color="#1a3d5c", line_width=4) |
| 137 | + |
| 138 | +# Strip plot overlay with jitter |
| 139 | +strip_source = ColumnDataSource(data={"category": df["category"], "value": df["value"]}) |
| 140 | + |
| 141 | +p.scatter( |
| 142 | + x=jitter("category", width=0.3, range=p.x_range), |
| 143 | + y="value", |
| 144 | + source=strip_source, |
| 145 | + size=16, |
| 146 | + fill_color="#FFD43B", |
| 147 | + fill_alpha=0.75, |
| 148 | + line_color="#b8860b", |
| 149 | + line_width=2, |
| 150 | +) |
| 151 | + |
| 152 | +# Save PNG |
| 153 | +export_png(p, filename="plot.png") |
| 154 | + |
| 155 | +# Save HTML for interactive version |
| 156 | +output_file("plot.html") |
| 157 | +save(p) |
0 commit comments