Skip to content

Commit d591a9c

Browse files
update(dendrogram-basic): seaborn — comprehensive quality review (#5201)
## Summary Updated **seaborn** implementation for **dendrogram-basic**. **Changes:** Comprehensive review improving code quality, data choice, visual design, spec compliance, and library feature usage. ## Test Plan - [x] Preview images uploaded to GCS staging - [x] Implementation file passes ruff format/check - [x] Metadata YAML updated with current versions - [ ] Automated review triggered --- Generated with [Claude Code](https://claude.com/claude-code) `/update` command --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 7674e5c commit d591a9c

File tree

2 files changed

+232
-181
lines changed

2 files changed

+232
-181
lines changed
Lines changed: 95 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,81 +1,117 @@
11
""" pyplots.ai
22
dendrogram-basic: Basic Dendrogram
3-
Library: seaborn 0.13.2 | Python 3.13.11
4-
Quality: 91/100 | Created: 2025-12-23
3+
Library: seaborn 0.13.2 | Python 3.14.3
4+
Quality: 89/100 | Updated: 2026-04-05
55
"""
66

77
import matplotlib.pyplot as plt
88
import numpy as np
9+
import pandas as pd
910
import seaborn as sns
10-
from scipy.cluster.hierarchy import dendrogram, linkage
11-
from sklearn.datasets import load_iris
1211

1312

14-
# Set seaborn style for better aesthetics
15-
sns.set_theme(style="whitegrid")
13+
# Style - leverage seaborn's distinctive theming
14+
sns.set_theme(style="white", rc={"axes.linewidth": 0.8, "font.family": "sans-serif"})
1615
sns.set_context("talk", font_scale=1.2)
1716

18-
# Load iris dataset - use subset for readability (spec recommends 10-50 items)
19-
np.random.seed(42)
20-
iris = load_iris()
17+
# Custom palette starting with Python Blue
18+
species_palette = sns.color_palette(["#306998", "#E8843C", "#4EA86B"])
2119
species_names = ["Setosa", "Versicolor", "Virginica"]
20+
species_colors = dict(zip(species_names, species_palette, strict=True))
2221

23-
# Select 10 samples from each species (30 total) for clearer visualization
24-
indices = np.concatenate([np.random.choice(np.where(iris.target == i)[0], 10, replace=False) for i in range(3)])
25-
26-
X = iris.data[indices]
27-
28-
# Create clear labels: Species-Number format using vectorized approach
29-
species_ids = iris.target[indices]
30-
labels = [f"{species_names[sid]}-{np.sum(species_ids[: i + 1] == sid)}" for i, sid in enumerate(species_ids)]
31-
32-
# Compute linkage matrix using Ward's method
33-
linkage_matrix = linkage(X, method="ward")
22+
# Data - use seaborn's iris dataset (30 samples for readable dendrogram)
23+
np.random.seed(42)
24+
iris = sns.load_dataset("iris")
25+
samples = (
26+
iris.groupby("species").apply(lambda g: g.sample(10, random_state=42), include_groups=False).reset_index(level=0)
27+
)
3428

35-
# Create figure
36-
fig, ax = plt.subplots(figsize=(16, 9))
29+
feature_cols = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
30+
features = samples[feature_cols].copy()
31+
32+
# Build sample labels: Species-Number
33+
counters = dict.fromkeys(["setosa", "versicolor", "virginica"], 0)
34+
labels = []
35+
species_list = []
36+
for species in samples["species"]:
37+
counters[species] += 1
38+
labels.append(f"{species.title()}-{counters[species]}")
39+
species_list.append(species.title())
40+
41+
features.index = labels
42+
43+
# Rename columns to readable format
44+
features.columns = ["Sepal Length", "Sepal Width", "Petal Length", "Petal Width"]
45+
46+
# Row colors by species - seaborn distinctive feature for annotating clusters
47+
row_colors = pd.Series([species_colors[sp] for sp in species_list], index=labels, name="Species")
48+
49+
# sns.clustermap - seaborn's distinctive hierarchical clustering + dendrogram
50+
# This IS the idiomatic seaborn way to visualize dendrograms with data context
51+
g = sns.clustermap(
52+
features,
53+
method="ward",
54+
row_colors=row_colors,
55+
col_cluster=True,
56+
cmap=sns.color_palette("viridis", as_cmap=True),
57+
figsize=(16, 9),
58+
dendrogram_ratio=(0.25, 0.12),
59+
linewidths=0.5,
60+
linecolor="white",
61+
cbar_kws={"label": "Feature Value"},
62+
tree_kws={"linewidths": 1.8, "colors": "#666666"},
63+
xticklabels=True,
64+
yticklabels=True,
65+
)
3766

38-
# Define custom colors using seaborn colorblind palette for species
39-
palette = sns.color_palette("colorblind", n_colors=3)
40-
species_color_map = dict(zip(species_names, palette, strict=True))
67+
# Customize the row dendrogram (main dendrogram showing sample clustering)
68+
row_dendro_ax = g.ax_row_dendrogram
69+
row_dendro_ax.set_xlabel("Distance (Ward)", fontsize=14)
4170

42-
# Create dendrogram
43-
dendrogram(
44-
linkage_matrix,
45-
labels=labels,
46-
leaf_rotation=45,
47-
leaf_font_size=14,
48-
ax=ax,
49-
above_threshold_color="#888888",
50-
color_threshold=0.7 * max(linkage_matrix[:, 2]),
51-
)
71+
# Customize heatmap axis labels
72+
g.ax_heatmap.set_xlabel("Iris Features", fontsize=20)
73+
g.ax_heatmap.set_ylabel("Iris Samples (by Species)", fontsize=20)
74+
g.ax_heatmap.tick_params(axis="both", labelsize=13)
5275

53-
# Color the x-axis labels by species using exact palette colors
54-
for lbl in ax.get_xticklabels():
55-
text = lbl.get_text()
56-
species = text.rsplit("-", 1)[0]
57-
if species in species_color_map:
58-
lbl.set_color(species_color_map[species])
76+
# Color y-axis (sample) labels by species
77+
for lbl in g.ax_heatmap.get_yticklabels():
78+
species = lbl.get_text().rsplit("-", 1)[0]
79+
if species in species_colors:
80+
lbl.set_color(species_colors[species])
5981
lbl.set_fontweight("bold")
6082

61-
# Style the plot with seaborn-compatible settings
62-
ax.set_xlabel("Iris Samples (by Species)", fontsize=20)
63-
ax.set_ylabel("Distance (Ward Linkage)", fontsize=20)
64-
ax.set_title("dendrogram-basic · seaborn · pyplots.ai", fontsize=24)
65-
ax.tick_params(axis="y", labelsize=16)
66-
ax.tick_params(axis="x", labelsize=14)
67-
68-
# Make grid subtle
69-
ax.grid(True, alpha=0.3, linestyle="--", axis="y")
70-
ax.set_axisbelow(True)
83+
# Color x-axis (feature) labels
84+
for lbl in g.ax_heatmap.get_xticklabels():
85+
lbl.set_fontsize(14)
86+
lbl.set_rotation(30)
87+
lbl.set_ha("right")
88+
89+
# Style the colorbar
90+
cbar = g.cax
91+
cbar.tick_params(labelsize=12)
92+
cbar.set_ylabel("Feature Value", fontsize=14)
93+
94+
# Add species legend using scatter proxies
95+
legend_handles = [
96+
plt.Line2D([0], [0], marker="s", color="w", markerfacecolor=c, markersize=12, label=n)
97+
for n, c in species_colors.items()
98+
]
99+
g.ax_heatmap.legend(
100+
handles=legend_handles,
101+
title="Species",
102+
loc="upper left",
103+
bbox_to_anchor=(1.15, 1.0),
104+
fontsize=12,
105+
title_fontsize=13,
106+
framealpha=0.95,
107+
edgecolor="#cccccc",
108+
fancybox=True,
109+
)
71110

72-
# Add legend using scatter plot handles for exact color matching
73-
for i, species in enumerate(species_names):
74-
ax.scatter([], [], c=[palette[i]], s=150, label=species, marker="s")
75-
ax.legend(title="Species", loc="upper right", fontsize=14, title_fontsize=16, framealpha=0.9)
111+
# Title - placed on the figure
112+
g.figure.suptitle("dendrogram-basic \u00b7 seaborn \u00b7 pyplots.ai", fontsize=24, fontweight="medium", y=1.02)
76113

77-
# Remove top and right spines for cleaner look
78-
sns.despine(ax=ax)
114+
# Visual refinement
115+
sns.despine(ax=g.ax_heatmap, left=False, bottom=False)
79116

80-
plt.tight_layout()
81-
plt.savefig("plot.png", dpi=300, bbox_inches="tight")
117+
g.figure.savefig("plot.png", dpi=300, bbox_inches="tight")

0 commit comments

Comments
 (0)