|
| 1 | +""" pyplots.ai |
| 2 | +cat-box-strip: Box Plot with Strip Overlay |
| 3 | +Library: plotly 6.5.0 | Python 3.13.11 |
| 4 | +Quality: 92/100 | Created: 2025-12-30 |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | +import pandas as pd |
| 9 | +import plotly.graph_objects as go |
| 10 | + |
| 11 | + |
| 12 | +# Data - Performance scores across different training methods |
| 13 | +np.random.seed(42) |
| 14 | + |
| 15 | +categories = ["Method A", "Method B", "Method C", "Method D"] |
| 16 | +n_per_group = [35, 40, 30, 45] |
| 17 | + |
| 18 | +data = [] |
| 19 | +# Method A: Normal distribution, moderate spread |
| 20 | +data.extend([{"Category": "Method A", "Score": v} for v in np.random.normal(72, 8, n_per_group[0])]) |
| 21 | +# Method B: Higher scores, tighter spread |
| 22 | +data.extend([{"Category": "Method B", "Score": v} for v in np.random.normal(85, 5, n_per_group[1])]) |
| 23 | +# Method C: Lower scores with some outliers |
| 24 | +scores_c = np.concatenate([np.random.normal(58, 10, n_per_group[2] - 3), [25, 28, 95]]) |
| 25 | +data.extend([{"Category": "Method C", "Score": v} for v in scores_c]) |
| 26 | +# Method D: Bimodal distribution |
| 27 | +scores_d = np.concatenate( |
| 28 | + [np.random.normal(65, 6, n_per_group[3] // 2), np.random.normal(80, 6, n_per_group[3] - n_per_group[3] // 2)] |
| 29 | +) |
| 30 | +data.extend([{"Category": "Method D", "Score": v} for v in scores_d]) |
| 31 | + |
| 32 | +df = pd.DataFrame(data) |
| 33 | + |
| 34 | +# Colors |
| 35 | +python_blue = "#306998" |
| 36 | +python_yellow = "#FFD43B" |
| 37 | + |
| 38 | +# Create figure |
| 39 | +fig = go.Figure() |
| 40 | + |
| 41 | +# Add box plots for each category |
| 42 | +for cat in categories: |
| 43 | + cat_data = df[df["Category"] == cat]["Score"] |
| 44 | + fig.add_trace( |
| 45 | + go.Box( |
| 46 | + y=cat_data, |
| 47 | + x=[cat] * len(cat_data), |
| 48 | + name=cat, |
| 49 | + marker_color=python_blue, |
| 50 | + fillcolor="rgba(48, 105, 152, 0.4)", |
| 51 | + line=dict(color=python_blue, width=2), |
| 52 | + boxmean=False, |
| 53 | + boxpoints=False, |
| 54 | + showlegend=False, |
| 55 | + width=0.5, |
| 56 | + ) |
| 57 | + ) |
| 58 | + |
| 59 | +# Add strip (scatter) points for each category with jitter |
| 60 | +np.random.seed(123) # Separate seed for jitter |
| 61 | +for cat in categories: |
| 62 | + cat_data = df[df["Category"] == cat]["Score"] |
| 63 | + jitter_vals = np.random.uniform(-0.15, 0.15, len(cat_data)) |
| 64 | + |
| 65 | + fig.add_trace( |
| 66 | + go.Scatter( |
| 67 | + x=[cat] * len(cat_data), |
| 68 | + y=cat_data, |
| 69 | + mode="markers", |
| 70 | + name=cat, |
| 71 | + marker=dict(color=python_yellow, size=10, opacity=0.7, line=dict(color=python_blue, width=1)), |
| 72 | + showlegend=False, |
| 73 | + hovertemplate=f"{cat}<br>Score: %{{y:.1f}}<extra></extra>", |
| 74 | + customdata=jitter_vals, |
| 75 | + ) |
| 76 | + ) |
| 77 | + |
| 78 | +# Apply jitter by offsetting x positions |
| 79 | +for i, cat in enumerate(categories): |
| 80 | + trace_idx = len(categories) + i |
| 81 | + cat_data = df[df["Category"] == cat]["Score"] |
| 82 | + jitter_vals = fig.data[trace_idx].customdata |
| 83 | + # Convert category to position and add jitter |
| 84 | + fig.data[trace_idx].x = [i + j for j in jitter_vals] |
| 85 | + |
| 86 | +# Update x-axis to use category positions |
| 87 | +fig.update_layout( |
| 88 | + title=dict(text="cat-box-strip · plotly · pyplots.ai", font=dict(size=28), x=0.5, xanchor="center"), |
| 89 | + xaxis=dict( |
| 90 | + title=dict(text="Training Method", font=dict(size=22)), |
| 91 | + tickfont=dict(size=18), |
| 92 | + tickmode="array", |
| 93 | + tickvals=list(range(len(categories))), |
| 94 | + ticktext=categories, |
| 95 | + range=[-0.5, len(categories) - 0.5], |
| 96 | + ), |
| 97 | + yaxis=dict( |
| 98 | + title=dict(text="Performance Score", font=dict(size=22)), |
| 99 | + tickfont=dict(size=18), |
| 100 | + gridcolor="rgba(0, 0, 0, 0.1)", |
| 101 | + gridwidth=1, |
| 102 | + ), |
| 103 | + template="plotly_white", |
| 104 | + plot_bgcolor="white", |
| 105 | + showlegend=False, |
| 106 | + margin=dict(l=80, r=50, t=100, b=80), |
| 107 | +) |
| 108 | + |
| 109 | +# Update box plots to use numeric positions |
| 110 | +for i, cat in enumerate(categories): |
| 111 | + fig.data[i].x = [i] * len(df[df["Category"] == cat]) |
| 112 | + |
| 113 | +# Save as PNG and HTML |
| 114 | +fig.write_image("plot.png", width=1600, height=900, scale=3) |
| 115 | +fig.write_html("plot.html", include_plotlyjs=True, full_html=True) |
0 commit comments