Skip to content
Merged
154 changes: 95 additions & 59 deletions plots/dendrogram-basic/implementations/seaborn.py
Original file line number Diff line number Diff line change
@@ -1,81 +1,117 @@
""" pyplots.ai
dendrogram-basic: Basic Dendrogram
Library: seaborn 0.13.2 | Python 3.13.11
Quality: 91/100 | Created: 2025-12-23
Library: seaborn 0.13.2 | Python 3.14.3
Quality: 89/100 | Updated: 2026-04-05
"""

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.datasets import load_iris


# Set seaborn style for better aesthetics
sns.set_theme(style="whitegrid")
# Style - leverage seaborn's distinctive theming
sns.set_theme(style="white", rc={"axes.linewidth": 0.8, "font.family": "sans-serif"})
sns.set_context("talk", font_scale=1.2)

# Load iris dataset - use subset for readability (spec recommends 10-50 items)
np.random.seed(42)
iris = load_iris()
# Custom palette starting with Python Blue
species_palette = sns.color_palette(["#306998", "#E8843C", "#4EA86B"])
species_names = ["Setosa", "Versicolor", "Virginica"]
species_colors = dict(zip(species_names, species_palette, strict=True))

# Select 10 samples from each species (30 total) for clearer visualization
indices = np.concatenate([np.random.choice(np.where(iris.target == i)[0], 10, replace=False) for i in range(3)])

X = iris.data[indices]

# Create clear labels: Species-Number format using vectorized approach
species_ids = iris.target[indices]
labels = [f"{species_names[sid]}-{np.sum(species_ids[: i + 1] == sid)}" for i, sid in enumerate(species_ids)]

# Compute linkage matrix using Ward's method
linkage_matrix = linkage(X, method="ward")
# Data - use seaborn's iris dataset (30 samples for readable dendrogram)
np.random.seed(42)
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

np.random.seed(42) no longer affects the sampling (sampling uses random_state=42). Consider removing the seed call to avoid implying that NumPy randomness is involved in the data selection.

Suggested change
np.random.seed(42)

Copilot uses AI. Check for mistakes.
iris = sns.load_dataset("iris")
samples = (
iris.groupby("species").apply(lambda g: g.sample(10, random_state=42), include_groups=False).reset_index(level=0)
)

# Create figure
fig, ax = plt.subplots(figsize=(16, 9))
feature_cols = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
features = samples[feature_cols].copy()

# Build sample labels: Species-Number
counters = dict.fromkeys(["setosa", "versicolor", "virginica"], 0)
labels = []
species_list = []
for species in samples["species"]:
counters[species] += 1
labels.append(f"{species.title()}-{counters[species]}")
species_list.append(species.title())

features.index = labels

# Rename columns to readable format
features.columns = ["Sepal Length", "Sepal Width", "Petal Length", "Petal Width"]

# Row colors by species - seaborn distinctive feature for annotating clusters
row_colors = pd.Series([species_colors[sp] for sp in species_list], index=labels, name="Species")

# sns.clustermap - seaborn's distinctive hierarchical clustering + dendrogram
# This IS the idiomatic seaborn way to visualize dendrograms with data context
g = sns.clustermap(
features,
method="ward",
row_colors=row_colors,
col_cluster=True,
cmap=sns.color_palette("viridis", as_cmap=True),
figsize=(16, 9),
dendrogram_ratio=(0.25, 0.12),
linewidths=0.5,
linecolor="white",
cbar_kws={"label": "Feature Value"},
tree_kws={"linewidths": 1.8, "colors": "#666666"},
xticklabels=True,
yticklabels=True,
)

# Define custom colors using seaborn colorblind palette for species
palette = sns.color_palette("colorblind", n_colors=3)
species_color_map = dict(zip(species_names, palette, strict=True))
# Customize the row dendrogram (main dendrogram showing sample clustering)
row_dendro_ax = g.ax_row_dendrogram
row_dendro_ax.set_xlabel("Distance (Ward)", fontsize=14)

# Create dendrogram
dendrogram(
linkage_matrix,
labels=labels,
leaf_rotation=45,
leaf_font_size=14,
ax=ax,
above_threshold_color="#888888",
color_threshold=0.7 * max(linkage_matrix[:, 2]),
)
# Customize heatmap axis labels
g.ax_heatmap.set_xlabel("Iris Features", fontsize=20)
g.ax_heatmap.set_ylabel("Iris Samples (by Species)", fontsize=20)
g.ax_heatmap.tick_params(axis="both", labelsize=13)

# Color the x-axis labels by species using exact palette colors
for lbl in ax.get_xticklabels():
text = lbl.get_text()
species = text.rsplit("-", 1)[0]
if species in species_color_map:
lbl.set_color(species_color_map[species])
# Color y-axis (sample) labels by species
for lbl in g.ax_heatmap.get_yticklabels():
species = lbl.get_text().rsplit("-", 1)[0]
if species in species_colors:
lbl.set_color(species_colors[species])
lbl.set_fontweight("bold")

# Style the plot with seaborn-compatible settings
ax.set_xlabel("Iris Samples (by Species)", fontsize=20)
ax.set_ylabel("Distance (Ward Linkage)", fontsize=20)
ax.set_title("dendrogram-basic · seaborn · pyplots.ai", fontsize=24)
ax.tick_params(axis="y", labelsize=16)
ax.tick_params(axis="x", labelsize=14)

# Make grid subtle
ax.grid(True, alpha=0.3, linestyle="--", axis="y")
ax.set_axisbelow(True)
# Color x-axis (feature) labels
for lbl in g.ax_heatmap.get_xticklabels():
lbl.set_fontsize(14)
lbl.set_rotation(30)
lbl.set_ha("right")

# Style the colorbar
cbar = g.cax
cbar.tick_params(labelsize=12)
cbar.set_ylabel("Feature Value", fontsize=14)

# Add species legend using scatter proxies
legend_handles = [
plt.Line2D([0], [0], marker="s", color="w", markerfacecolor=c, markersize=12, label=n)
for n, c in species_colors.items()
]
g.ax_heatmap.legend(
handles=legend_handles,
title="Species",
loc="upper left",
bbox_to_anchor=(1.15, 1.0),
fontsize=12,
title_fontsize=13,
framealpha=0.95,
edgecolor="#cccccc",
fancybox=True,
)

# Add legend using scatter plot handles for exact color matching
for i, species in enumerate(species_names):
ax.scatter([], [], c=[palette[i]], s=150, label=species, marker="s")
ax.legend(title="Species", loc="upper right", fontsize=14, title_fontsize=16, framealpha=0.9)
# Title - placed on the figure
g.figure.suptitle("dendrogram-basic \u00b7 seaborn \u00b7 pyplots.ai", fontsize=24, fontweight="medium", y=1.02)

# Remove top and right spines for cleaner look
sns.despine(ax=ax)
# Visual refinement
sns.despine(ax=g.ax_heatmap, left=False, bottom=False)

plt.tight_layout()
plt.savefig("plot.png", dpi=300, bbox_inches="tight")
g.figure.savefig("plot.png", dpi=300, bbox_inches="tight")
Loading
Loading