Skip to content

Commit 51c79f6

Browse files
update(dendrogram-basic): letsplot — comprehensive quality review
Comprehensive review improving code quality, data choice, visual design, spec compliance, and library feature usage.
1 parent f514b51 commit 51c79f6

File tree

2 files changed

+92
-91
lines changed

2 files changed

+92
-91
lines changed
Lines changed: 88 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
""" pyplots.ai
1+
"""pyplots.ai
22
dendrogram-basic: Basic Dendrogram
3-
Library: letsplot 4.8.2 | Python 3.13.11
4-
Quality: 91/100 | Created: 2025-12-23
3+
Library: letsplot 4.8.2 | Python 3.14.3
4+
Quality: /100 | Updated: 2026-04-05
55
"""
66

77
import numpy as np
@@ -10,12 +10,14 @@
1010
LetsPlot,
1111
aes,
1212
element_blank,
13+
element_line,
1314
element_text,
1415
geom_segment,
1516
geom_text,
1617
ggplot,
1718
ggsize,
1819
labs,
20+
layer_tooltips,
1921
scale_color_manual,
2022
scale_x_continuous,
2123
scale_y_continuous,
@@ -24,132 +26,131 @@
2426
)
2527
from lets_plot.export import ggsave
2628
from scipy.cluster.hierarchy import linkage
29+
from sklearn.datasets import load_iris
2730

2831

2932
LetsPlot.setup_html()
3033

31-
# Data - Iris flower measurements (4 features for 15 samples)
34+
# Data - Iris flower measurements (15 samples, 3 species)
35+
iris = load_iris()
3236
np.random.seed(42)
37+
indices = np.sort(np.concatenate([np.random.choice(np.where(iris.target == k)[0], 5, replace=False) for k in range(3)]))
38+
features = iris.data[indices]
39+
species_names = ["Setosa", "Versicolor", "Virginica"]
40+
labels = [f"{species_names[iris.target[i]]}-{j + 1}" for j, i in enumerate(indices)]
3341

34-
# Simulate iris-like measurements: sepal length, sepal width, petal length, petal width
35-
# Three species with distinct characteristics
36-
samples_per_species = 5
37-
38-
labels = []
39-
data = []
40-
41-
# Setosa: shorter petals, wider sepals
42-
for i in range(samples_per_species):
43-
labels.append(f"Setosa-{i + 1}")
44-
data.append(
45-
[
46-
5.0 + np.random.randn() * 0.3, # sepal length
47-
3.4 + np.random.randn() * 0.3, # sepal width
48-
1.5 + np.random.randn() * 0.2, # petal length
49-
0.3 + np.random.randn() * 0.1, # petal width
50-
]
51-
)
52-
53-
# Versicolor: medium measurements
54-
for i in range(samples_per_species):
55-
labels.append(f"Versicolor-{i + 1}")
56-
data.append(
57-
[
58-
5.9 + np.random.randn() * 0.4, # sepal length
59-
2.8 + np.random.randn() * 0.3, # sepal width
60-
4.3 + np.random.randn() * 0.4, # petal length
61-
1.3 + np.random.randn() * 0.2, # petal width
62-
]
63-
)
64-
65-
# Virginica: longer petals and sepals
66-
for i in range(samples_per_species):
67-
labels.append(f"Virginica-{i + 1}")
68-
data.append(
69-
[
70-
6.6 + np.random.randn() * 0.5, # sepal length
71-
3.0 + np.random.randn() * 0.3, # sepal width
72-
5.5 + np.random.randn() * 0.5, # petal length
73-
2.0 + np.random.randn() * 0.3, # petal width
74-
]
75-
)
76-
77-
data = np.array(data)
78-
n_samples = len(labels)
79-
80-
# Compute hierarchical clustering using Ward's method
81-
linkage_matrix = linkage(data, method="ward")
42+
# Hierarchical clustering (Ward's method)
43+
linkage_matrix = linkage(features, method="ward")
8244

8345
# Build dendrogram coordinates from linkage matrix
8446
n = len(labels)
85-
leaf_positions = {i: i for i in range(n)}
86-
node_heights = dict.fromkeys(range(n), 0)
47+
leaf_positions = {i: float(i) for i in range(n)}
48+
node_heights = dict.fromkeys(range(n), 0.0)
8749
segments = []
8850

89-
# Color threshold for clustering (similar to matplotlib's default)
51+
# Color threshold — splits into 3 major clusters
9052
max_dist = linkage_matrix[:, 2].max()
9153
color_threshold = 0.7 * max_dist
9254

93-
# Process each merge in the linkage matrix
55+
# Track cluster identity for each node (leaf or merged)
56+
palette = {"above": "#306998", "Setosa": "#4DAF4A", "Versicolor": "#FF7F00", "Virginica": "#984EA3"}
57+
node_cluster = {i: labels[i].split("-")[0] for i in range(n)}
58+
9459
for i, (left, right, dist, _) in enumerate(linkage_matrix):
9560
left, right = int(left), int(right)
9661
new_node = n + i
9762

98-
# Get positions of children
9963
left_pos = leaf_positions[left]
10064
right_pos = leaf_positions[right]
101-
102-
# New node position is midpoint of children
103-
new_pos = (left_pos + right_pos) / 2
104-
leaf_positions[new_node] = new_pos
65+
leaf_positions[new_node] = (left_pos + right_pos) / 2
10566
node_heights[new_node] = dist
10667

107-
# Determine color based on height threshold
108-
color = "#306998" if dist >= color_threshold else "#FFD43B"
68+
# Cluster label: same species if both children match, otherwise "above"
69+
left_cl, right_cl = node_cluster[left], node_cluster[right]
70+
node_cluster[new_node] = left_cl if left_cl == right_cl else "above"
71+
cluster_label = node_cluster[new_node] if dist < color_threshold else "above"
72+
color = palette[cluster_label]
73+
display_cluster = cluster_label if cluster_label != "above" else "Inter-cluster"
10974

11075
left_height = node_heights[left]
11176
right_height = node_heights[right]
11277

113-
# Vertical segment from left child to merge height
114-
segments.append((left_pos, left_height, left_pos, dist, color))
115-
# Vertical segment from right child to merge height
116-
segments.append((right_pos, right_height, right_pos, dist, color))
117-
# Horizontal segment connecting the two
118-
segments.append((left_pos, dist, right_pos, dist, color))
78+
# Vertical segment from left child up to merge height
79+
segments.append(
80+
{
81+
"x": left_pos,
82+
"y": left_height,
83+
"xend": left_pos,
84+
"yend": dist,
85+
"color": color,
86+
"merge_dist": round(dist, 2),
87+
"cluster": display_cluster,
88+
}
89+
)
90+
# Vertical segment from right child up to merge height
91+
segments.append(
92+
{
93+
"x": right_pos,
94+
"y": right_height,
95+
"xend": right_pos,
96+
"yend": dist,
97+
"color": color,
98+
"merge_dist": round(dist, 2),
99+
"cluster": display_cluster,
100+
}
101+
)
102+
# Horizontal segment connecting the two children
103+
segments.append(
104+
{
105+
"x": left_pos,
106+
"y": dist,
107+
"xend": right_pos,
108+
"yend": dist,
109+
"color": color,
110+
"merge_dist": round(dist, 2),
111+
"cluster": display_cluster,
112+
}
113+
)
119114

120-
# Create segment dataframe
121-
segment_df = pd.DataFrame(segments, columns=["x", "y", "xend", "yend", "color"])
115+
segment_df = pd.DataFrame(segments)
122116

123-
# Create label dataframe for x-axis labels
124-
label_data = []
125-
for i, label in enumerate(labels):
126-
label_data.append({"x": leaf_positions[i], "y": -0.8, "label": label})
127-
label_df = pd.DataFrame(label_data)
117+
# Leaf labels positioned just below y=0
118+
label_df = pd.DataFrame([{"x": leaf_positions[i], "y": -0.3, "label": labels[i]} for i in range(n)])
128119

129120
# Plot
121+
color_values = {v: v for v in palette.values()}
122+
130123
plot = (
131124
ggplot()
132-
+ geom_segment(aes(x="x", y="y", xend="xend", yend="yend", color="color"), data=segment_df, size=1.5)
133-
+ geom_text(aes(x="x", y="y", label="label"), data=label_df, angle=35, hjust=1, vjust=1, size=10, color="#333333")
134-
+ scale_color_manual(values={"#306998": "#306998", "#FFD43B": "#FFD43B"}, guide="none")
135-
+ scale_x_continuous(expand=[0.06, 0.02])
136-
+ scale_y_continuous(expand=[0.18, 0.02])
137-
+ labs(x="Sample", y="Distance (Ward)", title="dendrogram-basic · letsplot · pyplots.ai")
125+
+ geom_segment(
126+
aes(x="x", y="y", xend="xend", yend="yend", color="color"),
127+
data=segment_df,
128+
size=1.8,
129+
tooltips=layer_tooltips().title("Merge").line("Distance|@merge_dist").line("Cluster|@cluster"),
130+
)
131+
+ geom_text(aes(x="x", y="y", label="label"), data=label_df, angle=40, hjust=1, vjust=1, size=10, color="#444444")
132+
+ scale_color_manual(values=color_values, guide="none")
133+
+ scale_x_continuous(expand=[0.05, 0.02])
134+
+ scale_y_continuous(name="Ward Linkage Distance", expand=[0.14, 0.01], breaks=[0, 2, 4, 6, 8, 10, 12])
135+
+ labs(x="", title="dendrogram-basic \u00b7 letsplot \u00b7 pyplots.ai")
138136
+ theme_minimal()
139137
+ theme(
140-
axis_title=element_text(size=20),
138+
plot_title=element_text(size=24, face="bold"),
139+
axis_title_y=element_text(size=20),
141140
axis_text=element_text(size=16),
142141
axis_text_x=element_blank(),
143142
axis_ticks_x=element_blank(),
144-
plot_title=element_text(size=24),
143+
axis_line_x=element_blank(),
144+
axis_line_y=element_line(size=0.5, color="#CCCCCC"),
145145
panel_grid_major_x=element_blank(),
146146
panel_grid_minor_x=element_blank(),
147+
panel_grid_major_y=element_line(size=0.5, color="#E8E8E8"),
148+
panel_grid_minor=element_blank(),
149+
plot_margin=[40, 20, 20, 20],
147150
)
148151
+ ggsize(1600, 900)
149152
)
150153

151-
# Save PNG (scale=3 gives 4800x2700)
154+
# Save
152155
ggsave(plot, "plot.png", path=".", scale=3)
153-
154-
# Save HTML for interactivity
155156
ggsave(plot, "plot.html", path=".")

plots/dendrogram-basic/metadata/letsplot.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
library: letsplot
22
specification_id: dendrogram-basic
33
created: '2025-12-23T10:01:23Z'
4-
updated: '2025-12-23T10:07:42Z'
5-
generated_by: claude-opus-4-5-20251101
4+
updated: '2026-04-05T20:00:00+00:00'
5+
generated_by: claude-opus-4-6
66
workflow_run: 20457536578
77
issue: 0
8-
python_version: 3.13.11
8+
python_version: '3.14.3'
99
library_version: 4.8.2
1010
preview_url: https://storage.googleapis.com/pyplots-images/plots/dendrogram-basic/letsplot/plot.png
1111
preview_html: https://storage.googleapis.com/pyplots-images/plots/dendrogram-basic/letsplot/plot.html
12-
quality_score: 91
12+
quality_score: null
1313
impl_tags:
1414
dependencies:
1515
- scipy

0 commit comments

Comments
 (0)