|
1 | | -""" pyplots.ai |
| 1 | +"""pyplots.ai |
2 | 2 | dendrogram-basic: Basic Dendrogram |
3 | | -Library: seaborn 0.13.2 | Python 3.13.11 |
4 | | -Quality: 91/100 | Created: 2025-12-23 |
| 3 | +Library: seaborn 0.13.2 | Python 3.14.3 |
| 4 | +Quality: /100 | Updated: 2026-04-05 |
5 | 5 | """ |
6 | 6 |
|
7 | 7 | import matplotlib.pyplot as plt |
8 | 8 | import numpy as np |
9 | 9 | import seaborn as sns |
10 | | -from scipy.cluster.hierarchy import dendrogram, linkage |
11 | | -from sklearn.datasets import load_iris |
| 10 | +from scipy.cluster.hierarchy import dendrogram, linkage, set_link_color_palette |
12 | 11 |
|
13 | 12 |
|
14 | | -# Set seaborn style for better aesthetics |
15 | | -sns.set_theme(style="whitegrid") |
| 13 | +# Style |
| 14 | +sns.set_theme(style="white", rc={"axes.linewidth": 0.8}) |
16 | 15 | sns.set_context("talk", font_scale=1.2) |
| 16 | +palette = sns.color_palette("colorblind", n_colors=3) |
17 | 17 |
|
18 | | -# Load iris dataset - use subset for readability (spec recommends 10-50 items) |
| 18 | +# Data - use seaborn's iris dataset (30 samples for readable dendrogram) |
19 | 19 | np.random.seed(42) |
20 | | -iris = load_iris() |
21 | | -species_names = ["Setosa", "Versicolor", "Virginica"] |
| 20 | +iris = sns.load_dataset("iris") |
| 21 | +species_names = ["setosa", "versicolor", "virginica"] |
| 22 | +display_names = ["Setosa", "Versicolor", "Virginica"] |
| 23 | +species_color = dict(zip(display_names, palette, strict=True)) |
22 | 24 |
|
23 | | -# Select 10 samples from each species (30 total) for clearer visualization |
24 | | -indices = np.concatenate([np.random.choice(np.where(iris.target == i)[0], 10, replace=False) for i in range(3)]) |
| 25 | +samples = ( |
| 26 | + iris.groupby("species").apply(lambda g: g.sample(10, random_state=42), include_groups=False).reset_index(level=0) |
| 27 | +) |
25 | 28 |
|
26 | | -X = iris.data[indices] |
| 29 | +features = samples[["sepal_length", "sepal_width", "petal_length", "petal_width"]].values |
27 | 30 |
|
28 | | -# Create clear labels: Species-Number format using vectorized approach |
29 | | -species_ids = iris.target[indices] |
30 | | -labels = [f"{species_names[sid]}-{np.sum(species_ids[: i + 1] == sid)}" for i, sid in enumerate(species_ids)] |
| 31 | +# Build labels: Species-Number |
| 32 | +counters = dict.fromkeys(species_names, 0) |
| 33 | +labels = [] |
| 34 | +for species in samples["species"]: |
| 35 | + counters[species] += 1 |
| 36 | + labels.append(f"{species.title()}-{counters[species]}") |
31 | 37 |
|
32 | | -# Compute linkage matrix using Ward's method |
33 | | -linkage_matrix = linkage(X, method="ward") |
| 38 | +# Compute linkage |
| 39 | +linkage_matrix = linkage(features, method="ward") |
34 | 40 |
|
35 | | -# Create figure |
36 | | -fig, ax = plt.subplots(figsize=(16, 9)) |
| 41 | +# Map dendrogram branch colors to species palette |
| 42 | +hex_colors = ["#{:02x}{:02x}{:02x}".format(int(c[0] * 255), int(c[1] * 255), int(c[2] * 255)) for c in palette] |
| 43 | +set_link_color_palette(hex_colors) |
37 | 44 |
|
38 | | -# Define custom colors using seaborn colorblind palette for species |
39 | | -palette = sns.color_palette("colorblind", n_colors=3) |
40 | | -species_color_map = dict(zip(species_names, palette, strict=True)) |
| 45 | +# Plot |
| 46 | +fig, ax = plt.subplots(figsize=(16, 9)) |
41 | 47 |
|
42 | | -# Create dendrogram |
43 | 48 | dendrogram( |
44 | 49 | linkage_matrix, |
45 | 50 | labels=labels, |
46 | 51 | leaf_rotation=45, |
47 | 52 | leaf_font_size=14, |
48 | 53 | ax=ax, |
49 | | - above_threshold_color="#888888", |
| 54 | + above_threshold_color="#aaaaaa", |
50 | 55 | color_threshold=0.7 * max(linkage_matrix[:, 2]), |
51 | 56 | ) |
52 | 57 |
|
53 | | -# Color the x-axis labels by species using exact palette colors |
| 58 | +set_link_color_palette(None) |
| 59 | + |
| 60 | +# Color x-axis labels by species |
54 | 61 | for lbl in ax.get_xticklabels(): |
55 | | - text = lbl.get_text() |
56 | | - species = text.rsplit("-", 1)[0] |
57 | | - if species in species_color_map: |
58 | | - lbl.set_color(species_color_map[species]) |
| 62 | + species = lbl.get_text().rsplit("-", 1)[0] |
| 63 | + if species in species_color: |
| 64 | + lbl.set_color(species_color[species]) |
59 | 65 | lbl.set_fontweight("bold") |
60 | 66 |
|
61 | | -# Style the plot with seaborn-compatible settings |
| 67 | +# Axes and title |
62 | 68 | ax.set_xlabel("Iris Samples (by Species)", fontsize=20) |
63 | 69 | ax.set_ylabel("Distance (Ward Linkage)", fontsize=20) |
64 | | -ax.set_title("dendrogram-basic · seaborn · pyplots.ai", fontsize=24) |
| 70 | +ax.set_title("dendrogram-basic \u00b7 seaborn \u00b7 pyplots.ai", fontsize=24, fontweight="medium", pad=16) |
65 | 71 | ax.tick_params(axis="y", labelsize=16) |
66 | 72 | ax.tick_params(axis="x", labelsize=14) |
67 | 73 |
|
68 | | -# Make grid subtle |
69 | | -ax.grid(True, alpha=0.3, linestyle="--", axis="y") |
| 74 | +# Grid and spines |
| 75 | +ax.yaxis.grid(True, alpha=0.15, linewidth=0.8, color="#cccccc") |
70 | 76 | ax.set_axisbelow(True) |
71 | | - |
72 | | -# Add legend using scatter plot handles for exact color matching |
73 | | -for i, species in enumerate(species_names): |
74 | | - ax.scatter([], [], c=[palette[i]], s=150, label=species, marker="s") |
75 | | -ax.legend(title="Species", loc="upper right", fontsize=14, title_fontsize=16, framealpha=0.9) |
76 | | - |
77 | | -# Remove top and right spines for cleaner look |
78 | 77 | sns.despine(ax=ax) |
79 | 78 |
|
| 79 | +# Legend |
| 80 | +for name, color in species_color.items(): |
| 81 | + ax.scatter([], [], c=[color], s=150, label=name, marker="s") |
| 82 | +ax.legend(title="Species", loc="upper right", fontsize=14, title_fontsize=16, framealpha=0.9, edgecolor="#dddddd") |
| 83 | + |
80 | 84 | plt.tight_layout() |
81 | 85 | plt.savefig("plot.png", dpi=300, bbox_inches="tight") |
0 commit comments