Skip to content

Commit f617721

Browse files
update(dendrogram-basic): letsplot — comprehensive quality review (#5208)
## Summary Updated **letsplot** implementation for **dendrogram-basic**. **Changes:** Comprehensive review improving code quality, data choice, visual design, spec compliance, and library feature usage. ## Test Plan - [x] Preview images uploaded to GCS staging - [x] Implementation file passes ruff format/check - [x] Metadata YAML updated with current versions - [ ] Automated review triggered --- Generated with [Claude Code](https://claude.com/claude-code) `/update` command --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 0c49500 commit f617721

File tree

2 files changed

+254
-213
lines changed

2 files changed

+254
-213
lines changed
Lines changed: 114 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
""" pyplots.ai
22
dendrogram-basic: Basic Dendrogram
3-
Library: letsplot 4.8.2 | Python 3.13.11
4-
Quality: 91/100 | Created: 2025-12-23
3+
Library: letsplot 4.8.2 | Python 3.14.3
4+
Quality: 87/100 | Updated: 2026-04-05
55
"""
66

77
import numpy as np
@@ -10,146 +10,167 @@
1010
LetsPlot,
1111
aes,
1212
element_blank,
13+
element_line,
14+
element_rect,
1315
element_text,
16+
geom_hline,
17+
geom_point,
1418
geom_segment,
1519
geom_text,
1620
ggplot,
1721
ggsize,
1822
labs,
19-
scale_color_manual,
23+
layer_tooltips,
24+
scale_color_identity,
2025
scale_x_continuous,
2126
scale_y_continuous,
2227
theme,
23-
theme_minimal,
28+
theme_void,
2429
)
2530
from lets_plot.export import ggsave
2631
from scipy.cluster.hierarchy import linkage
32+
from sklearn.datasets import load_iris
2733

2834

2935
LetsPlot.setup_html()
3036

31-
# Data - Iris flower measurements (4 features for 15 samples)
37+
# Data - Iris flower measurements (15 samples, 3 species)
38+
iris = load_iris()
3239
np.random.seed(42)
40+
indices = np.sort(np.concatenate([np.random.choice(np.where(iris.target == k)[0], 5, replace=False) for k in range(3)]))
41+
features = iris.data[indices]
42+
species_names = ["Setosa", "Versicolor", "Virginica"]
43+
labels = [f"{species_names[iris.target[i]][:3]}-{j + 1}" for j, i in enumerate(indices)]
3344

34-
# Simulate iris-like measurements: sepal length, sepal width, petal length, petal width
35-
# Three species with distinct characteristics
36-
samples_per_species = 5
37-
38-
labels = []
39-
data = []
40-
41-
# Setosa: shorter petals, wider sepals
42-
for i in range(samples_per_species):
43-
labels.append(f"Setosa-{i + 1}")
44-
data.append(
45-
[
46-
5.0 + np.random.randn() * 0.3, # sepal length
47-
3.4 + np.random.randn() * 0.3, # sepal width
48-
1.5 + np.random.randn() * 0.2, # petal length
49-
0.3 + np.random.randn() * 0.1, # petal width
50-
]
51-
)
52-
53-
# Versicolor: medium measurements
54-
for i in range(samples_per_species):
55-
labels.append(f"Versicolor-{i + 1}")
56-
data.append(
57-
[
58-
5.9 + np.random.randn() * 0.4, # sepal length
59-
2.8 + np.random.randn() * 0.3, # sepal width
60-
4.3 + np.random.randn() * 0.4, # petal length
61-
1.3 + np.random.randn() * 0.2, # petal width
62-
]
63-
)
64-
65-
# Virginica: longer petals and sepals
66-
for i in range(samples_per_species):
67-
labels.append(f"Virginica-{i + 1}")
68-
data.append(
69-
[
70-
6.6 + np.random.randn() * 0.5, # sepal length
71-
3.0 + np.random.randn() * 0.3, # sepal width
72-
5.5 + np.random.randn() * 0.5, # petal length
73-
2.0 + np.random.randn() * 0.3, # petal width
74-
]
75-
)
76-
77-
data = np.array(data)
78-
n_samples = len(labels)
79-
80-
# Compute hierarchical clustering using Ward's method
81-
linkage_matrix = linkage(data, method="ward")
45+
# Hierarchical clustering (Ward's method)
46+
linkage_matrix = linkage(features, method="ward")
8247

8348
# Build dendrogram coordinates from linkage matrix
8449
n = len(labels)
85-
leaf_positions = {i: i for i in range(n)}
86-
node_heights = dict.fromkeys(range(n), 0)
50+
leaf_positions = {i: float(i) for i in range(n)}
51+
node_heights = dict.fromkeys(range(n), 0.0)
8752
segments = []
8853

89-
# Color threshold for clustering (similar to matplotlib's default)
54+
# Color threshold — splits into 3 major clusters
9055
max_dist = linkage_matrix[:, 2].max()
9156
color_threshold = 0.7 * max_dist
9257

93-
# Process each merge in the linkage matrix
58+
# Curated palette: muted, publication-quality tones
59+
palette = {"above": "#5B7B9A", "Setosa": "#2D8E6F", "Versicolor": "#D4883B", "Virginica": "#8B6AAE"}
60+
cluster_display = {"above": "Cross-cluster", "Setosa": "Setosa", "Versicolor": "Versicolor", "Virginica": "Virginica"}
61+
node_cluster = {i: labels[i].split("-")[0] for i in range(n)}
62+
# Map short prefixes to full species names
63+
prefix_to_species = {"Set": "Setosa", "Ver": "Versicolor", "Vir": "Virginica"}
64+
node_cluster = {i: prefix_to_species[labels[i].split("-")[0]] for i in range(n)}
65+
9466
for i, (left, right, dist, _) in enumerate(linkage_matrix):
9567
left, right = int(left), int(right)
9668
new_node = n + i
9769

98-
# Get positions of children
9970
left_pos = leaf_positions[left]
10071
right_pos = leaf_positions[right]
101-
102-
# New node position is midpoint of children
103-
new_pos = (left_pos + right_pos) / 2
104-
leaf_positions[new_node] = new_pos
72+
leaf_positions[new_node] = (left_pos + right_pos) / 2
10573
node_heights[new_node] = dist
10674

107-
# Determine color based on height threshold
108-
color = "#306998" if dist >= color_threshold else "#FFD43B"
75+
left_cl, right_cl = node_cluster[left], node_cluster[right]
76+
node_cluster[new_node] = left_cl if left_cl == right_cl else "above"
77+
cluster_label = node_cluster[new_node] if dist < color_threshold else "above"
78+
color = palette[cluster_label]
79+
display = cluster_display[cluster_label]
10980

11081
left_height = node_heights[left]
11182
right_height = node_heights[right]
11283

113-
# Vertical segment from left child to merge height
114-
segments.append((left_pos, left_height, left_pos, dist, color))
115-
# Vertical segment from right child to merge height
116-
segments.append((right_pos, right_height, right_pos, dist, color))
117-
# Horizontal segment connecting the two
118-
segments.append((left_pos, dist, right_pos, dist, color))
119-
120-
# Create segment dataframe
121-
segment_df = pd.DataFrame(segments, columns=["x", "y", "xend", "yend", "color"])
122-
123-
# Create label dataframe for x-axis labels
124-
label_data = []
125-
for i, label in enumerate(labels):
126-
label_data.append({"x": leaf_positions[i], "y": -0.8, "label": label})
127-
label_df = pd.DataFrame(label_data)
84+
for seg in [
85+
(left_pos, left_height, left_pos, dist),
86+
(right_pos, right_height, right_pos, dist),
87+
(left_pos, dist, right_pos, dist),
88+
]:
89+
segments.append(
90+
{
91+
"x": seg[0],
92+
"y": seg[1],
93+
"xend": seg[2],
94+
"yend": seg[3],
95+
"color": color,
96+
"merge_dist": round(dist, 2),
97+
"cluster": display,
98+
}
99+
)
100+
101+
segment_df = pd.DataFrame(segments)
102+
103+
# Leaf labels and markers
104+
leaf_data = []
105+
for i in range(n):
106+
species = prefix_to_species[labels[i].split("-")[0]]
107+
leaf_data.append(
108+
{"x": leaf_positions[i], "y": 0, "label": labels[i], "color": palette[species], "species": species}
109+
)
110+
label_df = pd.DataFrame(leaf_data)
111+
112+
# Legend entries (manual via geom_point placed off-canvas, brought into legend via tooltips)
113+
legend_items = pd.DataFrame(
114+
[
115+
{"x": -99, "y": -99, "xend": -98, "yend": -99, "color": palette[s], "cluster": s, "merge_dist": 0}
116+
for s in ["Setosa", "Versicolor", "Virginica", "above"]
117+
]
118+
)
128119

129120
# Plot
130121
plot = (
131122
ggplot()
132-
+ geom_segment(aes(x="x", y="y", xend="xend", yend="yend", color="color"), data=segment_df, size=1.5)
133-
+ geom_text(aes(x="x", y="y", label="label"), data=label_df, angle=35, hjust=1, vjust=1, size=10, color="#333333")
134-
+ scale_color_manual(values={"#306998": "#306998", "#FFD43B": "#FFD43B"}, guide="none")
123+
+ geom_segment(
124+
aes(x="x", y="y", xend="xend", yend="yend", color="color"),
125+
data=segment_df,
126+
size=2.0,
127+
tooltips=layer_tooltips().title("@cluster").line("Merge distance|@merge_dist").min_width(180),
128+
)
129+
+ geom_point(
130+
aes(x="x", y="y", color="color"),
131+
data=label_df,
132+
size=5,
133+
shape=16,
134+
tooltips=layer_tooltips().title("@species").line("Sample|@label"),
135+
)
136+
+ geom_text(
137+
aes(x="x", y="y", label="label", color="color"),
138+
data=label_df.assign(y=-0.35),
139+
angle=45,
140+
hjust=1,
141+
vjust=1,
142+
size=13,
143+
family="monospace",
144+
)
145+
+ geom_hline(yintercept=color_threshold, linetype="dashed", color="#9EAAB8", size=0.8)
146+
+ geom_text(
147+
aes(x="x", y="y", label="label"),
148+
data=pd.DataFrame([{"x": n - 1.5, "y": color_threshold + 0.25, "label": f"threshold = {color_threshold:.1f}"}]),
149+
size=11,
150+
color="#7A8A9A",
151+
hjust=1,
152+
family="monospace",
153+
)
154+
+ scale_color_identity()
135155
+ scale_x_continuous(expand=[0.06, 0.02])
136-
+ scale_y_continuous(expand=[0.18, 0.02])
137-
+ labs(x="Sample", y="Distance (Ward)", title="dendrogram-basic · letsplot · pyplots.ai")
138-
+ theme_minimal()
156+
+ scale_y_continuous(name="Ward Linkage Distance", expand=[0.15, 0.01], breaks=[0, 2, 4, 6, 8, 10, 12])
157+
+ labs(x="", title="dendrogram-basic · letsplot · pyplots.ai")
158+
+ theme_void()
139159
+ theme(
140-
axis_title=element_text(size=20),
141-
axis_text=element_text(size=16),
160+
plot_title=element_text(size=24, face="bold", color="#2C3E50"),
161+
plot_background=element_rect(fill="white", color="white"),
162+
axis_title_y=element_text(size=20, color="#4A5568", margin=[0, 12, 0, 0]),
163+
axis_text_y=element_text(size=16, color="#6B7B8D"),
142164
axis_text_x=element_blank(),
143165
axis_ticks_x=element_blank(),
144-
plot_title=element_text(size=24),
145-
panel_grid_major_x=element_blank(),
146-
panel_grid_minor_x=element_blank(),
166+
axis_ticks_y=element_line(size=0.4, color="#D0D8E0"),
167+
axis_line_y=element_line(size=0.6, color="#CBD5E0"),
168+
panel_grid_major_y=element_line(size=0.3, color="#EDF2F7"),
169+
plot_margin=[50, 30, 30, 20],
147170
)
148171
+ ggsize(1600, 900)
149172
)
150173

151-
# Save PNG (scale=3 gives 4800x2700)
174+
# Save
152175
ggsave(plot, "plot.png", path=".", scale=3)
153-
154-
# Save HTML for interactivity
155176
ggsave(plot, "plot.html", path=".")

0 commit comments

Comments
 (0)