|
1 | 1 | """ pyplots.ai |
2 | 2 | box-basic: Basic Box Plot |
3 | | -Library: plotnine 0.15.2 | Python 3.13.11 |
4 | | -Quality: 91/100 | Created: 2025-12-23 |
| 3 | +Library: plotnine 0.15.3 | Python 3.14 |
| 4 | +Quality: 92/100 | Created: 2025-12-23 |
5 | 5 | """ |
6 | 6 |
|
7 | 7 | import numpy as np |
8 | 8 | import pandas as pd |
9 | | -from plotnine import aes, element_text, geom_boxplot, ggplot, labs, theme, theme_minimal |
| 9 | +from plotnine import ( |
| 10 | + aes, |
| 11 | + annotate, |
| 12 | + coord_cartesian, |
| 13 | + element_blank, |
| 14 | + element_line, |
| 15 | + element_rect, |
| 16 | + element_text, |
| 17 | + geom_boxplot, |
| 18 | + ggplot, |
| 19 | + labs, |
| 20 | + scale_fill_manual, |
| 21 | + scale_y_continuous, |
| 22 | + stat_summary, |
| 23 | + theme, |
| 24 | + theme_minimal, |
| 25 | +) |
10 | 26 |
|
11 | 27 |
|
12 | 28 | # Data |
13 | 29 | np.random.seed(42) |
14 | | -categories = ["Engineering", "Marketing", "Sales", "Support"] |
15 | | -data = {"category": [], "value": []} |
| 30 | +categories = ["Engineering", "Marketing", "Sales", "Support", "Research"] |
| 31 | +records = [] |
16 | 32 |
|
17 | | -# Generate salary data for each department with varied distributions |
18 | 33 | for cat in categories: |
19 | | - n = np.random.randint(50, 150) |
| 34 | + n = np.random.randint(60, 120) |
20 | 35 | if cat == "Engineering": |
21 | 36 | values = np.random.normal(95000, 15000, n) |
22 | 37 | elif cat == "Marketing": |
23 | 38 | values = np.random.normal(75000, 12000, n) |
24 | 39 | elif cat == "Sales": |
25 | | - values = np.random.normal(70000, 20000, n) |
26 | | - else: # Support |
27 | | - values = np.random.normal(55000, 10000, n) |
28 | | - data["category"].extend([cat] * n) |
29 | | - data["value"].extend(values) |
| 40 | + base = np.random.normal(68000, 18000, n) |
| 41 | + outliers = np.random.normal(135000, 6000, 4) |
| 42 | + values = np.concatenate([base, outliers]) |
| 43 | + elif cat == "Support": |
| 44 | + values = np.random.normal(55000, 8000, n) |
| 45 | + else: # Research |
| 46 | + values = np.random.normal(85000, 20000, n) |
| 47 | + records.extend({"department": cat, "salary": v} for v in values) |
| 48 | + |
| 49 | +df = pd.DataFrame(records) |
| 50 | +dept_order = ["Support", "Marketing", "Sales", "Research", "Engineering"] |
| 51 | +df["department"] = pd.Categorical(df["department"], categories=dept_order, ordered=True) |
30 | 52 |
|
31 | | -df = pd.DataFrame(data) |
| 53 | +# Compute medians for storytelling annotations |
| 54 | +medians = df.groupby("department", observed=True)["salary"].median() |
| 55 | +eng_median = medians["Engineering"] |
| 56 | +sup_median = medians["Support"] |
| 57 | +gap = eng_median - sup_median |
32 | 58 |
|
33 | | -# Plot |
| 59 | +# Custom palette — cohesive muted tones starting from Python Blue |
| 60 | +palette = ["#7FAACC", "#E8A87C", "#D4A5C9", "#82C9B0", "#306998"] |
| 61 | + |
| 62 | +# Plot with stat_summary and annotate for storytelling |
34 | 63 | plot = ( |
35 | | - ggplot(df, aes(x="category", y="value", fill="category")) |
36 | | - + geom_boxplot(outlier_size=3, outlier_alpha=0.6, size=0.8) |
| 64 | + ggplot(df, aes(x="department", y="salary", fill="department")) |
| 65 | + + geom_boxplot( |
| 66 | + outlier_size=3.5, outlier_alpha=0.7, outlier_colour="#C0392B", size=0.5, alpha=0.88, width=0.6, color="#444444" |
| 67 | + ) |
| 68 | + # Median diamond markers via stat_summary — distinctive plotnine feature |
| 69 | + + stat_summary(fun_y=np.median, geom="point", size=5, shape="D", color="#1a1a1a", fill="#1a1a1a") |
| 70 | + + scale_fill_manual(values=palette) |
| 71 | + + scale_y_continuous(labels=lambda vals: [f"${v / 1000:.0f}k" for v in vals], breaks=range(20000, 160001, 20000)) |
| 72 | + + coord_cartesian(ylim=(12000, 156000)) |
| 73 | + # Annotation: salary gap between Engineering and Support |
| 74 | + + annotate( |
| 75 | + "text", |
| 76 | + x=3, |
| 77 | + y=151000, |
| 78 | + label=f"Engineering earns ${gap / 1000:.0f}k more than Support", |
| 79 | + color="#306998", |
| 80 | + size=10, |
| 81 | + ha="center", |
| 82 | + fontweight="bold", |
| 83 | + ) |
| 84 | + + annotate("segment", x=1, xend=5, y=144000, yend=144000, color="#306998", size=0.6, linetype="dashed", alpha=0.5) |
| 85 | + # Annotation: Sales outlier cluster callout |
| 86 | + + annotate( |
| 87 | + "label", |
| 88 | + x=3.5, |
| 89 | + y=132000, |
| 90 | + label="Senior hires\nabove market rate", |
| 91 | + size=9, |
| 92 | + color="#8B0000", |
| 93 | + fill="#FFF0F0", |
| 94 | + alpha=0.9, |
| 95 | + label_size=0, |
| 96 | + ha="left", |
| 97 | + ) |
| 98 | + + annotate("segment", x=3.18, xend=3.42, y=132000, yend=132000, color="#C0392B", size=0.5, alpha=0.5) |
| 99 | + # Annotation: Support tight distribution insight |
| 100 | + + annotate( |
| 101 | + "text", x=1, y=28000, label="Narrow spread\n(σ ≈ $8k)", color="#666666", size=8, ha="center", fontstyle="italic" |
| 102 | + ) |
37 | 103 | + labs(x="Department", y="Salary ($)", title="box-basic · plotnine · pyplots.ai") |
38 | 104 | + theme_minimal() |
39 | 105 | + theme( |
40 | 106 | figure_size=(16, 9), |
41 | | - text=element_text(size=14), |
42 | | - axis_title=element_text(size=20), |
43 | | - axis_text=element_text(size=16), |
44 | | - plot_title=element_text(size=24), |
| 107 | + text=element_text(size=14, color="#333333"), |
| 108 | + plot_title=element_text(size=24, color="#1a1a1a", weight="bold", margin={"b": 14}), |
| 109 | + axis_title_x=element_text(size=20, color="#222222", margin={"t": 14}), |
| 110 | + axis_title_y=element_text(size=20, color="#222222", margin={"r": 14}), |
| 111 | + axis_text=element_text(size=16, color="#555555"), |
45 | 112 | legend_position="none", |
| 113 | + panel_grid_major_x=element_blank(), |
| 114 | + panel_grid_minor=element_blank(), |
| 115 | + panel_grid_major_y=element_line(color="#e0e0e0", size=0.5), |
| 116 | + axis_ticks_major_x=element_blank(), |
| 117 | + axis_ticks_major_y=element_blank(), |
| 118 | + plot_background=element_rect(fill="white", color="white"), |
| 119 | + panel_background=element_rect(fill="white", color="white"), |
46 | 120 | ) |
47 | 121 | ) |
48 | 122 |
|
|
0 commit comments