|
1 | 1 | """ pyplots.ai |
2 | 2 | box-basic: Basic Box Plot |
3 | | -Library: seaborn 0.13.2 | Python 3.13.11 |
4 | | -Quality: 93/100 | Created: 2025-12-23 |
| 3 | +Library: seaborn 0.13.2 | Python 3.14 |
| 4 | +Quality: 94/100 | Created: 2025-12-23 |
5 | 5 | """ |
6 | 6 |
|
7 | 7 | import matplotlib.pyplot as plt |
|
13 | 13 | # Data |
14 | 14 | np.random.seed(42) |
15 | 15 |
|
16 | | -categories = ["Engineering", "Marketing", "Sales", "HR", "Finance"] |
17 | | -data = [] |
| 16 | +dept_params = { |
| 17 | + "Engineering": {"loc": 95000, "scale": 15000, "n": 80}, |
| 18 | + "Marketing": {"loc": 75000, "scale": 12000, "n": 60}, |
| 19 | + "Sales": {"loc": 70000, "scale": 20000, "n": 100}, |
| 20 | + "HR": {"loc": 65000, "scale": 10000, "n": 50}, |
| 21 | + "Finance": {"loc": 85000, "scale": 18000, "n": 70}, |
| 22 | +} |
18 | 23 |
|
19 | | -for category in categories: |
20 | | - # Generate realistic salary distributions with different characteristics |
21 | | - if category == "Engineering": |
22 | | - values = np.random.normal(95000, 15000, 80) |
23 | | - elif category == "Marketing": |
24 | | - values = np.random.normal(75000, 12000, 60) |
25 | | - elif category == "Sales": |
26 | | - values = np.random.normal(70000, 20000, 100) |
27 | | - elif category == "HR": |
28 | | - values = np.random.normal(65000, 10000, 50) |
29 | | - else: # Finance |
30 | | - values = np.random.normal(85000, 18000, 70) |
31 | | - |
32 | | - # Add some outliers |
| 24 | +data = [] |
| 25 | +for dept, params in dept_params.items(): |
| 26 | + if dept == "Sales": |
| 27 | + # Right-skewed distribution to show distributional diversity |
| 28 | + values = np.random.exponential(scale=15000, size=params["n"]) + 45000 |
| 29 | + else: |
| 30 | + values = np.random.normal(params["loc"], params["scale"], params["n"]) |
33 | 31 | outliers = np.random.uniform(values.min() - 20000, values.max() + 25000, 3) |
34 | 32 | values = np.concatenate([values, outliers]) |
35 | | - |
36 | 33 | for v in values: |
37 | | - data.append({"Department": category, "Salary": v}) |
| 34 | + data.append({"Department": dept, "Salary": v}) |
38 | 35 |
|
39 | 36 | df = pd.DataFrame(data) |
40 | 37 |
|
| 38 | +# Seaborn context for global scaling |
| 39 | +sns.set_context("talk", font_scale=1.1) |
| 40 | + |
41 | 41 | # Plot |
| 42 | +palette = ["#306998", "#E8A838", "#4CAF50", "#FF7043", "#9C27B0"] |
| 43 | + |
42 | 44 | fig, ax = plt.subplots(figsize=(16, 9)) |
43 | 45 |
|
44 | | -# Use hue with palette to avoid seaborn 0.14+ warning |
45 | 46 | sns.boxplot( |
46 | 47 | data=df, |
47 | 48 | x="Department", |
48 | 49 | y="Salary", |
49 | 50 | hue="Department", |
50 | | - palette=["#306998", "#FFD43B", "#4CAF50", "#FF7043", "#9C27B0"], |
| 51 | + palette=palette, |
51 | 52 | linewidth=2.5, |
52 | | - fliersize=10, |
| 53 | + fliersize=0, |
53 | 54 | width=0.6, |
54 | 55 | legend=False, |
55 | 56 | ax=ax, |
56 | 57 | ) |
57 | 58 |
|
| 59 | +sns.stripplot( |
| 60 | + data=df, |
| 61 | + x="Department", |
| 62 | + y="Salary", |
| 63 | + hue="Department", |
| 64 | + palette=palette, |
| 65 | + size=5, |
| 66 | + alpha=0.4, |
| 67 | + jitter=0.25, |
| 68 | + legend=False, |
| 69 | + ax=ax, |
| 70 | +) |
| 71 | + |
58 | 72 | # Style |
59 | 73 | ax.set_xlabel("Department", fontsize=20) |
60 | 74 | ax.set_ylabel("Salary ($)", fontsize=20) |
61 | | -ax.set_title("box-basic · seaborn · pyplots.ai", fontsize=24) |
| 75 | +ax.set_title("box-basic \u00b7 seaborn \u00b7 pyplots.ai", fontsize=24, fontweight="medium") |
62 | 76 | ax.tick_params(axis="both", labelsize=16) |
63 | | -ax.grid(True, alpha=0.3, linestyle="--", axis="y") |
| 77 | +ax.yaxis.grid(True, alpha=0.2, linewidth=0.8) |
| 78 | +sns.despine(ax=ax) |
64 | 79 |
|
65 | | -# Format y-axis as currency |
66 | 80 | ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"${x / 1000:.0f}K")) |
67 | 81 |
|
| 82 | +# Tighten y-axis to reduce empty space |
| 83 | +y_min = df["Salary"].min() - 5000 |
| 84 | +y_max = df["Salary"].max() + 8000 |
| 85 | +ax.set_ylim(y_min, y_max) |
| 86 | + |
| 87 | +# Data storytelling: annotate key insights |
| 88 | +medians = df.groupby("Department")["Salary"].median() |
| 89 | +spreads = df.groupby("Department")["Salary"].apply(lambda x: x.quantile(0.75) - x.quantile(0.25)) |
| 90 | + |
| 91 | +highest_dept = medians.idxmax() |
| 92 | +widest_dept = spreads.idxmax() |
| 93 | + |
| 94 | +dept_positions = {dept: i for i, dept in enumerate(dept_params.keys())} |
| 95 | + |
| 96 | +# Annotate highest median (Engineering, position 0) |
| 97 | +ax.annotate( |
| 98 | + f"Highest median: ${medians[highest_dept] / 1000:.0f}K", |
| 99 | + xy=(dept_positions[highest_dept], medians[highest_dept]), |
| 100 | + xytext=(dept_positions[highest_dept] + 1.6, y_max - (y_max - y_min) * 0.05), |
| 101 | + fontsize=13, |
| 102 | + fontweight="bold", |
| 103 | + color="#306998", |
| 104 | + ha="center", |
| 105 | + arrowprops={"arrowstyle": "->", "color": "#306998", "lw": 1.8, "connectionstyle": "arc3,rad=-0.2"}, |
| 106 | +) |
| 107 | + |
| 108 | +# Annotate widest spread (Finance, position 4) |
| 109 | +ax.annotate( |
| 110 | + f"Widest IQR: ${spreads[widest_dept] / 1000:.0f}K spread", |
| 111 | + xy=(dept_positions[widest_dept], medians[widest_dept]), |
| 112 | + xytext=(dept_positions[widest_dept] - 1.6, y_min + (y_max - y_min) * 0.07), |
| 113 | + fontsize=13, |
| 114 | + fontweight="bold", |
| 115 | + color="#9C27B0", |
| 116 | + ha="center", |
| 117 | + arrowprops={"arrowstyle": "->", "color": "#9C27B0", "lw": 1.8, "connectionstyle": "arc3,rad=0.2"}, |
| 118 | +) |
| 119 | + |
| 120 | +# Annotate right-skewed Sales distribution |
| 121 | +sales_pos = dept_positions["Sales"] |
| 122 | +sales_q3 = df[df["Department"] == "Sales"]["Salary"].quantile(0.75) |
| 123 | +ax.annotate( |
| 124 | + "Right-skewed\ndistribution", |
| 125 | + xy=(sales_pos, sales_q3), |
| 126 | + xytext=(sales_pos + 0.8, sales_q3 + (y_max - y_min) * 0.13), |
| 127 | + fontsize=13, |
| 128 | + fontweight="bold", |
| 129 | + color="#4CAF50", |
| 130 | + ha="center", |
| 131 | + arrowprops={"arrowstyle": "->", "color": "#4CAF50", "lw": 1.8}, |
| 132 | +) |
| 133 | + |
68 | 134 | plt.tight_layout() |
69 | 135 | plt.savefig("plot.png", dpi=300, bbox_inches="tight") |
0 commit comments