|
1 | 1 | """ pyplots.ai |
2 | 2 | dendrogram-basic: Basic Dendrogram |
3 | | -Library: seaborn 0.13.2 | Python 3.13.11 |
4 | | -Quality: 91/100 | Created: 2025-12-23 |
| 3 | +Library: seaborn 0.13.2 | Python 3.14.3 |
| 4 | +Quality: 89/100 | Updated: 2026-04-05 |
5 | 5 | """ |
6 | 6 |
|
7 | 7 | import matplotlib.pyplot as plt |
8 | 8 | import numpy as np |
| 9 | +import pandas as pd |
9 | 10 | import seaborn as sns |
10 | | -from scipy.cluster.hierarchy import dendrogram, linkage |
11 | | -from sklearn.datasets import load_iris |
12 | 11 |
|
13 | 12 |
|
14 | | -# Set seaborn style for better aesthetics |
15 | | -sns.set_theme(style="whitegrid") |
| 13 | +# Style - leverage seaborn's distinctive theming |
| 14 | +sns.set_theme(style="white", rc={"axes.linewidth": 0.8, "font.family": "sans-serif"}) |
16 | 15 | sns.set_context("talk", font_scale=1.2) |
17 | 16 |
|
18 | | -# Load iris dataset - use subset for readability (spec recommends 10-50 items) |
19 | | -np.random.seed(42) |
20 | | -iris = load_iris() |
| 17 | +# Custom palette starting with Python Blue |
| 18 | +species_palette = sns.color_palette(["#306998", "#E8843C", "#4EA86B"]) |
21 | 19 | species_names = ["Setosa", "Versicolor", "Virginica"] |
| 20 | +species_colors = dict(zip(species_names, species_palette, strict=True)) |
22 | 21 |
|
23 | | -# Select 10 samples from each species (30 total) for clearer visualization |
24 | | -indices = np.concatenate([np.random.choice(np.where(iris.target == i)[0], 10, replace=False) for i in range(3)]) |
25 | | - |
26 | | -X = iris.data[indices] |
27 | | - |
28 | | -# Create clear labels: Species-Number format using vectorized approach |
29 | | -species_ids = iris.target[indices] |
30 | | -labels = [f"{species_names[sid]}-{np.sum(species_ids[: i + 1] == sid)}" for i, sid in enumerate(species_ids)] |
31 | | - |
32 | | -# Compute linkage matrix using Ward's method |
33 | | -linkage_matrix = linkage(X, method="ward") |
| 22 | +# Data - use seaborn's iris dataset (30 samples for readable dendrogram) |
| 23 | +np.random.seed(42) |
| 24 | +iris = sns.load_dataset("iris") |
| 25 | +samples = ( |
| 26 | + iris.groupby("species").apply(lambda g: g.sample(10, random_state=42), include_groups=False).reset_index(level=0) |
| 27 | +) |
34 | 28 |
|
35 | | -# Create figure |
36 | | -fig, ax = plt.subplots(figsize=(16, 9)) |
| 29 | +feature_cols = ["sepal_length", "sepal_width", "petal_length", "petal_width"] |
| 30 | +features = samples[feature_cols].copy() |
| 31 | + |
| 32 | +# Build sample labels: Species-Number |
| 33 | +counters = dict.fromkeys(["setosa", "versicolor", "virginica"], 0) |
| 34 | +labels = [] |
| 35 | +species_list = [] |
| 36 | +for species in samples["species"]: |
| 37 | + counters[species] += 1 |
| 38 | + labels.append(f"{species.title()}-{counters[species]}") |
| 39 | + species_list.append(species.title()) |
| 40 | + |
| 41 | +features.index = labels |
| 42 | + |
| 43 | +# Rename columns to readable format |
| 44 | +features.columns = ["Sepal Length", "Sepal Width", "Petal Length", "Petal Width"] |
| 45 | + |
| 46 | +# Row colors by species - seaborn distinctive feature for annotating clusters |
| 47 | +row_colors = pd.Series([species_colors[sp] for sp in species_list], index=labels, name="Species") |
| 48 | + |
| 49 | +# sns.clustermap - seaborn's distinctive hierarchical clustering + dendrogram |
| 50 | +# This IS the idiomatic seaborn way to visualize dendrograms with data context |
| 51 | +g = sns.clustermap( |
| 52 | + features, |
| 53 | + method="ward", |
| 54 | + row_colors=row_colors, |
| 55 | + col_cluster=True, |
| 56 | + cmap=sns.color_palette("viridis", as_cmap=True), |
| 57 | + figsize=(16, 9), |
| 58 | + dendrogram_ratio=(0.25, 0.12), |
| 59 | + linewidths=0.5, |
| 60 | + linecolor="white", |
| 61 | + cbar_kws={"label": "Feature Value"}, |
| 62 | + tree_kws={"linewidths": 1.8, "colors": "#666666"}, |
| 63 | + xticklabels=True, |
| 64 | + yticklabels=True, |
| 65 | +) |
37 | 66 |
|
38 | | -# Define custom colors using seaborn colorblind palette for species |
39 | | -palette = sns.color_palette("colorblind", n_colors=3) |
40 | | -species_color_map = dict(zip(species_names, palette, strict=True)) |
| 67 | +# Customize the row dendrogram (main dendrogram showing sample clustering) |
| 68 | +row_dendro_ax = g.ax_row_dendrogram |
| 69 | +row_dendro_ax.set_xlabel("Distance (Ward)", fontsize=14) |
41 | 70 |
|
42 | | -# Create dendrogram |
43 | | -dendrogram( |
44 | | - linkage_matrix, |
45 | | - labels=labels, |
46 | | - leaf_rotation=45, |
47 | | - leaf_font_size=14, |
48 | | - ax=ax, |
49 | | - above_threshold_color="#888888", |
50 | | - color_threshold=0.7 * max(linkage_matrix[:, 2]), |
51 | | -) |
| 71 | +# Customize heatmap axis labels |
| 72 | +g.ax_heatmap.set_xlabel("Iris Features", fontsize=20) |
| 73 | +g.ax_heatmap.set_ylabel("Iris Samples (by Species)", fontsize=20) |
| 74 | +g.ax_heatmap.tick_params(axis="both", labelsize=13) |
52 | 75 |
|
53 | | -# Color the x-axis labels by species using exact palette colors |
54 | | -for lbl in ax.get_xticklabels(): |
55 | | - text = lbl.get_text() |
56 | | - species = text.rsplit("-", 1)[0] |
57 | | - if species in species_color_map: |
58 | | - lbl.set_color(species_color_map[species]) |
| 76 | +# Color y-axis (sample) labels by species |
| 77 | +for lbl in g.ax_heatmap.get_yticklabels(): |
| 78 | + species = lbl.get_text().rsplit("-", 1)[0] |
| 79 | + if species in species_colors: |
| 80 | + lbl.set_color(species_colors[species]) |
59 | 81 | lbl.set_fontweight("bold") |
60 | 82 |
|
61 | | -# Style the plot with seaborn-compatible settings |
62 | | -ax.set_xlabel("Iris Samples (by Species)", fontsize=20) |
63 | | -ax.set_ylabel("Distance (Ward Linkage)", fontsize=20) |
64 | | -ax.set_title("dendrogram-basic · seaborn · pyplots.ai", fontsize=24) |
65 | | -ax.tick_params(axis="y", labelsize=16) |
66 | | -ax.tick_params(axis="x", labelsize=14) |
67 | | - |
68 | | -# Make grid subtle |
69 | | -ax.grid(True, alpha=0.3, linestyle="--", axis="y") |
70 | | -ax.set_axisbelow(True) |
| 83 | +# Color x-axis (feature) labels |
| 84 | +for lbl in g.ax_heatmap.get_xticklabels(): |
| 85 | + lbl.set_fontsize(14) |
| 86 | + lbl.set_rotation(30) |
| 87 | + lbl.set_ha("right") |
| 88 | + |
| 89 | +# Style the colorbar |
| 90 | +cbar = g.cax |
| 91 | +cbar.tick_params(labelsize=12) |
| 92 | +cbar.set_ylabel("Feature Value", fontsize=14) |
| 93 | + |
| 94 | +# Add species legend using scatter proxies |
| 95 | +legend_handles = [ |
| 96 | + plt.Line2D([0], [0], marker="s", color="w", markerfacecolor=c, markersize=12, label=n) |
| 97 | + for n, c in species_colors.items() |
| 98 | +] |
| 99 | +g.ax_heatmap.legend( |
| 100 | + handles=legend_handles, |
| 101 | + title="Species", |
| 102 | + loc="upper left", |
| 103 | + bbox_to_anchor=(1.15, 1.0), |
| 104 | + fontsize=12, |
| 105 | + title_fontsize=13, |
| 106 | + framealpha=0.95, |
| 107 | + edgecolor="#cccccc", |
| 108 | + fancybox=True, |
| 109 | +) |
71 | 110 |
|
72 | | -# Add legend using scatter plot handles for exact color matching |
73 | | -for i, species in enumerate(species_names): |
74 | | - ax.scatter([], [], c=[palette[i]], s=150, label=species, marker="s") |
75 | | -ax.legend(title="Species", loc="upper right", fontsize=14, title_fontsize=16, framealpha=0.9) |
| 111 | +# Title - placed on the figure |
| 112 | +g.figure.suptitle("dendrogram-basic \u00b7 seaborn \u00b7 pyplots.ai", fontsize=24, fontweight="medium", y=1.02) |
76 | 113 |
|
77 | | -# Remove top and right spines for cleaner look |
78 | | -sns.despine(ax=ax) |
| 114 | +# Visual refinement |
| 115 | +sns.despine(ax=g.ax_heatmap, left=False, bottom=False) |
79 | 116 |
|
80 | | -plt.tight_layout() |
81 | | -plt.savefig("plot.png", dpi=300, bbox_inches="tight") |
| 117 | +g.figure.savefig("plot.png", dpi=300, bbox_inches="tight") |
0 commit comments