Skip to content

Commit 3cb6f77

Browse files
Merge branch 'main' into implementation/violin-basic/bokeh
2 parents 5834256 + afab661 commit 3cb6f77

14 files changed

Lines changed: 1140 additions & 868 deletions

File tree

plots/violin-basic/implementations/altair.py

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
""" pyplots.ai
22
violin-basic: Basic Violin Plot
3-
Library: altair 6.0.0 | Python 3.13.11
4-
Quality: 92/100 | Created: 2025-12-23
3+
Library: altair 6.0.0 | Python 3.14.3
4+
Quality: 94/100 | Updated: 2026-02-21
55
"""
66

77
import altair as alt
@@ -16,39 +16,29 @@
1616

1717
for cat in categories:
1818
if cat == "Engineering":
19-
# Higher salaries with moderate spread
2019
values = np.random.normal(92000, 16000, 150)
2120
elif cat == "Marketing":
22-
# Mid-range salaries
2321
values = np.random.normal(70000, 13000, 150)
2422
elif cat == "Sales":
2523
# Bimodal: base salary + high performers with commissions
2624
values = np.concatenate([np.random.normal(50000, 8000, 75), np.random.normal(92000, 11000, 75)])
2725
else: # Support
28-
# Lower salary, tighter distribution
2926
values = np.random.normal(55000, 10000, 150)
3027

3128
for v in values:
3229
data.append({"Department": cat, "Salary": v})
3330

3431
df = pd.DataFrame(data)
3532

36-
# Calculate statistics for quartile markers
37-
stats = (
38-
df.groupby("Department")["Salary"]
39-
.agg(q1=lambda x: x.quantile(0.25), median=lambda x: x.quantile(0.5), q3=lambda x: x.quantile(0.75))
40-
.reset_index()
41-
)
42-
43-
# Merge stats for layering
44-
df_with_stats = df.merge(stats, on="Department")
33+
# Department order: unimodal distributions first, bimodal Sales last as focal point
34+
dept_order = ["Support", "Marketing", "Engineering", "Sales"]
4535

46-
# Colors - Python palette
47-
colors = ["#306998", "#FFD43B", "#4B8BBE", "#FFE873"]
48-
color_scale = alt.Scale(domain=categories, range=colors)
36+
# Colors - four fully distinct colorblind-safe hues with Python Blue
37+
# brown, purple, Python Blue, orange — each maximally distinct
38+
palette = ["#8B6C42", "#9467BD", "#306998", "#E5832D"]
39+
color_scale = alt.Scale(domain=dept_order, range=palette)
4940

50-
# Base chart
51-
base = alt.Chart(df_with_stats)
41+
base = alt.Chart(df)
5242

5343
# Violin shape using kernel density transform
5444
violin = (
@@ -69,14 +59,25 @@
6959
axis=alt.Axis(labels=False, values=[0], grid=False, ticks=False),
7060
),
7161
color=alt.Color("Department:N", scale=color_scale, legend=None),
62+
tooltip=[alt.Tooltip("Department:N"), alt.Tooltip("Salary:Q", format="$,.0f")],
7263
)
7364
)
7465

75-
# IQR rule (black vertical line)
76-
quartile_rule = base.mark_rule(color="black", strokeWidth=5).encode(y="q1:Q", y2="q3:Q")
66+
# IQR rule via declarative aggregate (one rule per department)
67+
quartile_rule = (
68+
base.transform_aggregate(q1="q1(Salary)", q3="q3(Salary)", groupby=["Department"])
69+
.mark_rule(color="#1a1a1a", strokeWidth=5)
70+
.encode(y="q1:Q", y2="q3:Q")
71+
)
7772

78-
# Median point (white dot with black border)
79-
median_point = base.mark_point(color="white", size=250, filled=True, strokeWidth=3, stroke="black").encode(y="median:Q")
73+
# Median point via declarative aggregate (one dot per department)
74+
median_point = (
75+
base.transform_aggregate(med="median(Salary)", groupby=["Department"])
76+
.mark_point(color="white", size=250, filled=True, strokeWidth=3, stroke="#1a1a1a")
77+
.encode(
78+
y="med:Q", tooltip=[alt.Tooltip("Department:N"), alt.Tooltip("med:Q", title="Median Salary", format="$,.0f")]
79+
)
80+
)
8081

8182
# Combine layers and facet by department
8283
chart = (
@@ -85,14 +86,14 @@
8586
column=alt.Column(
8687
"Department:N",
8788
header=alt.Header(labelFontSize=20, labelOrient="bottom", title=None, labelPadding=15),
88-
sort=categories,
89+
sort=dept_order,
8990
)
9091
)
9192
.resolve_scale(x="independent")
9293
.properties(title=alt.Title("violin-basic · altair · pyplots.ai", fontSize=28, anchor="middle"))
9394
.configure_facet(spacing=20)
9495
.configure_view(stroke=None, continuousWidth=350, continuousHeight=750)
95-
.configure_axis(labelFontSize=18, titleFontSize=22, gridOpacity=0.3, gridDash=[3, 3])
96+
.configure_axis(labelFontSize=18, titleFontSize=22, gridOpacity=0.2, gridDash=[3, 3])
9697
)
9798

9899
# Save outputs
Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
""" pyplots.ai
22
violin-basic: Basic Violin Plot
3-
Library: letsplot 4.8.1 | Python 3.13.11
4-
Quality: 91/100 | Created: 2025-12-23
3+
Library: letsplot 4.8.2 | Python 3.14.3
4+
Quality: 90/100 | Updated: 2026-02-21
55
"""
66

77
import numpy as np
@@ -15,53 +15,66 @@
1515
# Data
1616
np.random.seed(42)
1717

18-
categories = ["Engineering", "Marketing", "Sales", "Design"]
19-
colors = ["#306998", "#FFD43B", "#4B8BBE", "#FFE873"]
18+
# Ordered by median salary (high → low) for visual storytelling
19+
dept_order = ["Engineering", "Design", "Marketing", "Sales"]
20+
palette = ["#306998", "#2E8B57", "#E8A317", "#E07A5F"]
2021

21-
# Generate realistic salary distributions per department
2222
data = []
23-
distributions = {
24-
"Engineering": {"mean": 95000, "std": 20000, "n": 200},
25-
"Marketing": {"mean": 75000, "std": 15000, "n": 150},
26-
"Sales": {"mean": 70000, "std": 25000, "n": 180},
27-
"Design": {"mean": 80000, "std": 18000, "n": 120},
28-
}
2923

30-
for cat in categories:
31-
dist = distributions[cat]
32-
values = np.random.normal(dist["mean"], dist["std"], dist["n"])
33-
values = np.clip(values, 30000, 200000) # Realistic salary bounds
34-
for v in values:
35-
data.append({"Department": cat, "Salary": v})
24+
# Engineering: bimodal (junior ~$70k + senior ~$115k) — showcases violin strength
25+
eng_junior = np.random.normal(70000, 8000, 80)
26+
eng_senior = np.random.normal(115000, 12000, 120)
27+
eng_values = np.clip(np.concatenate([eng_junior, eng_senior]), 30000, 200000)
28+
for v in eng_values:
29+
data.append({"Department": "Engineering", "Salary": v})
30+
31+
# Design: moderate spread, roughly normal
32+
design_values = np.random.normal(80000, 18000, 120)
33+
design_values = np.clip(design_values, 30000, 200000)
34+
for v in design_values:
35+
data.append({"Department": "Design", "Salary": v})
36+
37+
# Marketing: narrower with a small cluster of high earners
38+
mkt_base = np.random.normal(72000, 12000, 130)
39+
mkt_high = np.random.normal(105000, 8000, 20)
40+
mkt_values = np.clip(np.concatenate([mkt_base, mkt_high]), 30000, 200000)
41+
for v in mkt_values:
42+
data.append({"Department": "Marketing", "Salary": v})
43+
44+
# Sales: right-skewed (many moderate earners, few top performers)
45+
sales_values = np.random.exponential(20000, 180) + 45000
46+
sales_values = np.clip(sales_values, 30000, 200000)
47+
for v in sales_values:
48+
data.append({"Department": "Sales", "Salary": v})
3649

3750
df = pd.DataFrame(data)
3851

3952
# Plot
4053
plot = (
4154
ggplot(df, aes(x="Department", y="Salary", fill="Department")) # noqa: F405
4255
+ geom_violin( # noqa: F405
43-
quantiles=[0.25, 0.5, 0.75], # Show quartiles including median
44-
quantile_lines=True, # Draw lines at quantiles
45-
size=1.5, # Border thickness
46-
alpha=0.8,
47-
trim=False, # Show full tails
56+
quantiles=[0.25, 0.5, 0.75], quantile_lines=True, size=1.2, alpha=0.85, trim=False, color="#2C3E50"
57+
)
58+
+ scale_x_discrete(limits=dept_order) # noqa: F405
59+
+ scale_fill_manual(values=dict(zip(dept_order, palette, strict=True))) # noqa: F405
60+
+ scale_y_continuous( # noqa: F405
61+
format="${,.0f}"
4862
)
49-
+ scale_fill_manual(values=colors) # noqa: F405
5063
+ labs( # noqa: F405
51-
x="Department", y="Salary ($)", title="violin-basic · lets-plot · pyplots.ai"
64+
x="Department", y="Salary", title="violin-basic \u00b7 letsplot \u00b7 pyplots.ai"
5265
)
5366
+ theme_minimal() # noqa: F405
5467
+ theme( # noqa: F405
5568
axis_title=element_text(size=20), # noqa: F405
5669
axis_text=element_text(size=16), # noqa: F405
5770
plot_title=element_text(size=24), # noqa: F405
58-
legend_position="none", # Legend not needed, x-axis shows categories
71+
legend_position="none",
72+
panel_grid_major_x=element_blank(), # noqa: F405
73+
axis_ticks=element_blank(), # noqa: F405
5974
)
6075
+ ggsize(1600, 900) # noqa: F405
6176
)
6277

63-
# Save PNG (scale 3x to get 4800 × 2700 px)
78+
# Save
6479
export_ggsave(plot, filename="plot.png", path=".", scale=3)
65-
66-
# Save HTML for interactive version
6780
export_ggsave(plot, filename="plot.html", path=".")
Lines changed: 56 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,81 @@
11
""" pyplots.ai
22
violin-basic: Basic Violin Plot
3-
Library: matplotlib 3.10.8 | Python 3.13.11
4-
Quality: 92/100 | Created: 2025-12-23
3+
Library: matplotlib 3.10.8 | Python 3.14.3
4+
Quality: 92/100 | Updated: 2026-02-21
55
"""
66

7+
import matplotlib.patheffects as pe
78
import matplotlib.pyplot as plt
89
import numpy as np
910

1011

11-
# Data - simulated test scores across different schools
12+
# Data - simulated test scores (0-100) across different schools
1213
np.random.seed(42)
13-
categories = ["School A", "School B", "School C", "School D"]
14+
categories = ["Lincoln HS", "Roosevelt Acad.", "Jefferson HS", "Hamilton Prep"]
1415
data = [
15-
np.random.normal(75, 10, 150), # School A: centered around 75
16-
np.random.normal(82, 8, 150), # School B: higher scores, less spread
17-
np.random.normal(68, 15, 150), # School C: lower average, more spread
18-
np.random.normal(78, 12, 150), # School D: moderate
16+
np.clip(np.random.normal(75, 10, 150), 0, 100), # Lincoln: normal, centered ~75
17+
np.clip(np.random.normal(85, 6, 150), 0, 100), # Roosevelt: high, tight cluster
18+
np.clip(np.random.normal(62, 15, 150), 0, 100), # Jefferson: lower, wide spread
19+
np.clip(
20+
np.concatenate([np.random.normal(70, 5, 80), np.random.normal(88, 4, 70)]), 0, 100
21+
), # Hamilton: bimodal (two subgroups)
1922
]
2023

21-
# Create plot (4800x2700 px)
24+
# Multi-series palette starting with Python Blue; warm accent for bimodal Hamilton
25+
colors = ["#306998", "#5BA58B", "#7A6FB5", "#D4853F"]
26+
edge_colors = ["#1E4060", "#3A7460", "#524A80", "#9A5F2A"]
27+
28+
# Plot
2229
fig, ax = plt.subplots(figsize=(16, 9))
2330

24-
# Create violin plot with quartile markers
25-
parts = ax.violinplot(data, positions=range(len(categories)), showmeans=False, showmedians=True, showextrema=True)
31+
parts = ax.violinplot(
32+
data,
33+
positions=range(len(categories)),
34+
quantiles=[[0.25, 0.5, 0.75]] * len(categories),
35+
showmeans=False,
36+
showmedians=False,
37+
showextrema=False,
38+
bw_method=0.3,
39+
widths=0.75,
40+
)
2641

27-
# Style the violins with Python Blue
28-
for pc in parts["bodies"]:
29-
pc.set_facecolor("#306998")
30-
pc.set_edgecolor("#1e4a6e")
31-
pc.set_alpha(0.7)
42+
# Style each violin body with a distinct color
43+
for i, pc in enumerate(parts["bodies"]):
44+
pc.set_facecolor(colors[i])
45+
pc.set_edgecolor(edge_colors[i])
46+
pc.set_alpha(0.8)
3247
pc.set_linewidth(2)
3348

34-
# Style the lines (median, min, max)
35-
parts["cmedians"].set_color("#FFD43B")
36-
parts["cmedians"].set_linewidth(3)
37-
parts["cmins"].set_color("#1e4a6e")
38-
parts["cmins"].set_linewidth(2)
39-
parts["cmaxes"].set_color("#1e4a6e")
40-
parts["cmaxes"].set_linewidth(2)
41-
parts["cbars"].set_color("#1e4a6e")
42-
parts["cbars"].set_linewidth(2)
43-
44-
# Add quartile markers (Q1 and Q3) as box indicators
45-
quartile1 = [np.percentile(d, 25) for d in data]
46-
quartile3 = [np.percentile(d, 75) for d in data]
47-
48-
# Draw thin boxes for interquartile range
49-
for i, (q1, q3) in enumerate(zip(quartile1, quartile3, strict=True)):
50-
ax.vlines(i, q1, q3, color="#1e4a6e", linewidth=6, zorder=3)
51-
52-
# Labels and styling (scaled font sizes for 4800x2700)
49+
# Quantile lines with path effects for legibility against colored bodies
50+
q_colors = ["white", "#FFD43B", "white"] * len(categories)
51+
q_widths = [2.5, 4, 2.5] * len(categories)
52+
parts["cquantiles"].set_colors(q_colors)
53+
parts["cquantiles"].set_linewidths(q_widths)
54+
parts["cquantiles"].set_path_effects([pe.Stroke(linewidth=6, foreground="black", alpha=0.3), pe.Normal()])
55+
56+
# Labels and styling
5357
ax.set_xticks(range(len(categories)))
5458
ax.set_xticklabels(categories)
5559
ax.set_xlabel("School", fontsize=20)
5660
ax.set_ylabel("Test Score (points)", fontsize=20)
57-
ax.set_title("violin-basic · matplotlib · pyplots.ai", fontsize=24)
61+
ax.set_title("violin-basic \u00b7 matplotlib \u00b7 pyplots.ai", fontsize=24, fontweight="medium")
5862
ax.tick_params(axis="both", labelsize=16)
59-
ax.grid(True, alpha=0.3, linestyle="--", axis="y")
63+
64+
ax.spines["top"].set_visible(False)
65+
ax.spines["right"].set_visible(False)
66+
ax.yaxis.grid(True, alpha=0.2, linewidth=0.8)
67+
68+
# Subtle annotation highlighting Hamilton Prep's bimodal distribution
69+
ax.annotate(
70+
"Two distinct\nperformance groups",
71+
xy=(3, 75),
72+
xytext=(3, 45),
73+
fontsize=13,
74+
color="#9A5F2A",
75+
fontstyle="italic",
76+
ha="center",
77+
arrowprops={"arrowstyle": "->", "color": "#9A5F2A", "lw": 1.5},
78+
)
6079

6180
plt.tight_layout()
6281
plt.savefig("plot.png", dpi=300, bbox_inches="tight")

0 commit comments

Comments
 (0)