Skip to content
Merged
149 changes: 75 additions & 74 deletions plots/dendrogram-basic/implementations/bokeh.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
""" pyplots.ai
"""pyplots.ai
dendrogram-basic: Basic Dendrogram
Library: bokeh 3.8.1 | Python 3.13.11
Quality: 91/100 | Created: 2025-12-23
Library: bokeh 3.8.2 | Python 3.14.3
Quality: /100 | Updated: 2026-04-05
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The implementation header is malformed: Quality: /100 | Updated: ... is missing the numeric quality score (or a placeholder like pending). This breaks the standard 4-line header format used across plot implementations and can also break any tooling that parses the score/date from this line.

Suggested change
Quality: /100 | Updated: 2026-04-05
Quality: pending/100 | Updated: 2026-04-05

Copilot uses AI. Check for mistakes.
"""

import numpy as np
from bokeh.io import export_png
from bokeh.models import Label
from bokeh.models import ColumnDataSource, Label
from bokeh.plotting import figure, output_file, save
from scipy.cluster.hierarchy import leaves_list, linkage


# Data - Iris flower measurements (4 features for 15 samples)
np.random.seed(42)

# Simulate iris-like measurements: sepal length, sepal width, petal length, petal width
# Three species with distinct characteristics
samples_per_species = 5

labels = []
Expand All @@ -26,10 +24,10 @@
labels.append(f"Setosa-{i + 1}")
data.append(
[
5.0 + np.random.randn() * 0.3, # sepal length
3.4 + np.random.randn() * 0.3, # sepal width
1.5 + np.random.randn() * 0.2, # petal length
0.3 + np.random.randn() * 0.1, # petal width
5.0 + np.random.randn() * 0.3,
3.4 + np.random.randn() * 0.3,
1.5 + np.random.randn() * 0.2,
0.3 + np.random.randn() * 0.1,
]
)

Expand All @@ -38,10 +36,10 @@
labels.append(f"Versicolor-{i + 1}")
data.append(
[
5.9 + np.random.randn() * 0.4, # sepal length
2.8 + np.random.randn() * 0.3, # sepal width
4.3 + np.random.randn() * 0.4, # petal length
1.3 + np.random.randn() * 0.2, # petal width
5.9 + np.random.randn() * 0.4,
2.8 + np.random.randn() * 0.3,
4.3 + np.random.randn() * 0.4,
1.3 + np.random.randn() * 0.2,
]
)

Expand All @@ -50,10 +48,10 @@
labels.append(f"Virginica-{i + 1}")
data.append(
[
6.6 + np.random.randn() * 0.5, # sepal length
3.0 + np.random.randn() * 0.3, # sepal width
5.5 + np.random.randn() * 0.5, # petal length
2.0 + np.random.randn() * 0.3, # petal width
6.6 + np.random.randn() * 0.5,
3.0 + np.random.randn() * 0.3,
5.5 + np.random.randn() * 0.5,
2.0 + np.random.randn() * 0.3,
]
)

Expand All @@ -68,7 +66,6 @@
ordered_labels = [labels[i] for i in leaf_order]

# Build dendrogram structure manually
# Position of each node (leaf nodes get integer positions)
node_positions = {}
for idx, leaf_idx in enumerate(leaf_order):
node_positions[leaf_idx] = idx
Expand All @@ -77,103 +74,107 @@
max_dist = linkage_matrix[:, 2].max()
color_threshold = 0.7 * max_dist

# Collect line segments for drawing
line_xs = []
line_ys = []
line_colors = []
# Collect line segments grouped by color for multi_line rendering
above_xs, above_ys = [], []
below_xs, below_ys = [], []

# Process each merge in the linkage matrix
for i, (left, right, dist, _) in enumerate(linkage_matrix):
left, right = int(left), int(right)
new_node = n_samples + i

# Get x positions of children
left_x = node_positions[left]
right_x = node_positions[right]
left_y = 0 if left < n_samples else linkage_matrix[left - n_samples, 2]
right_y = 0 if right < n_samples else linkage_matrix[right - n_samples, 2]

# Get y positions (heights) of children
if left < n_samples:
left_y = 0
else:
left_y = linkage_matrix[left - n_samples, 2]

if right < n_samples:
right_y = 0
else:
right_y = linkage_matrix[right - n_samples, 2]

# New node position is midpoint of children
new_x = (left_x + right_x) / 2
node_positions[new_node] = new_x

# Determine color based on threshold
color = "#306998" if dist > color_threshold else "#FFD43B"

# Draw left vertical line
line_xs.append([left_x, left_x])
line_ys.append([left_y, dist])
line_colors.append(color)
# U-shaped connector: left vertical, horizontal, right vertical
xs = [left_x, left_x, right_x, right_x]
ys = [left_y, dist, dist, right_y]

# Draw right vertical line
line_xs.append([right_x, right_x])
line_ys.append([right_y, dist])
line_colors.append(color)

# Draw horizontal line connecting the two
line_xs.append([left_x, right_x])
line_ys.append([dist, dist])
line_colors.append(color)
if dist > color_threshold:
above_xs.append(xs)
above_ys.append(ys)
else:
below_xs.append(xs)
below_ys.append(ys)

# Create figure with extra space at bottom for labels
# Plot
p = figure(
width=4800,
height=2700,
title="dendrogram-basic · bokeh · pyplots.ai",
x_axis_label="Sample",
y_axis_label="Distance (Ward)",
x_range=(-0.5, n_samples - 0.5),
y_range=(-max_dist * 0.18, max_dist * 1.1),
title="Iris Species Clustering · dendrogram-basic · bokeh · pyplots.ai",
x_axis_label="Iris Sample",
y_axis_label="Distance (Ward's Method)",
x_range=(-0.8, n_samples - 0.2),
y_range=(-max_dist * 0.16, max_dist * 1.08),
toolbar_location=None,
)

# Draw dendrogram lines with thicker lines for visibility
for xs, ys, color in zip(line_xs, line_ys, line_colors, strict=True):
p.line(xs, ys, line_width=4, line_color=color)
# Draw dendrogram branches using multi_line with ColumnDataSource
if below_xs:
source_below = ColumnDataSource(data={"xs": below_xs, "ys": below_ys})
p.multi_line(
xs="xs", ys="ys", source=source_below, line_width=4, line_color="#D4A017", legend_label="Within-cluster"
)

# Add leaf labels with larger font
source_above = ColumnDataSource(data={"xs": above_xs, "ys": above_ys})
p.multi_line(xs="xs", ys="ys", source=source_above, line_width=4, line_color="#306998", legend_label="Between-cluster")

# Leaf labels
for idx, label in enumerate(ordered_labels):
label_obj = Label(
x=idx,
y=-max_dist * 0.02,
text=label,
text_font_size="20pt",
text_color="#444444",
text_align="right",
angle=0.785, # 45 degrees in radians
angle=0.785,
angle_units="rad",
y_offset=-15,
)
p.add_layout(label_obj)

# Style - larger fonts for 4800x2700 canvas
p.title.text_font_size = "32pt"
# Style
p.title.text_font_size = "30pt"
p.title.text_font_style = "normal"
p.title.text_color = "#333333"
p.xaxis.axis_label_text_font_size = "24pt"
p.yaxis.axis_label_text_font_size = "24pt"
p.xaxis.major_label_text_font_size = "0pt" # Hide default x-axis labels
p.xaxis.axis_label_text_color = "#555555"
p.yaxis.axis_label_text_color = "#555555"
p.xaxis.major_label_text_font_size = "0pt"
p.yaxis.major_label_text_font_size = "20pt"
p.yaxis.major_label_text_color = "#666666"

# Grid styling
p.xgrid.visible = False
p.ygrid.grid_line_alpha = 0.3
p.ygrid.grid_line_dash = "dashed"
p.ygrid.grid_line_alpha = 0.15
p.ygrid.grid_line_dash = [1, 0]
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

p.ygrid.grid_line_dash = [1, 0] is a non-obvious dash pattern (and includes a zero-length segment), which makes the intent unclear and can be rejected by validation depending on Bokeh’s dash-pattern constraints. Prefer using a named style (e.g., solid/dashed) or a conventional positive-length dash array to keep styling predictable.

Suggested change
p.ygrid.grid_line_dash = [1, 0]
p.ygrid.grid_line_dash = "solid"

Copilot uses AI. Check for mistakes.

# Remove tick marks on x-axis
p.xaxis.axis_line_color = "#CCCCCC"
p.yaxis.axis_line_color = "#CCCCCC"
p.xaxis.major_tick_line_color = None
p.xaxis.minor_tick_line_color = None

# Clean outline
p.yaxis.major_tick_line_color = "#CCCCCC"
p.yaxis.minor_tick_line_color = None
p.outline_line_color = None

# Save outputs
# Legend
p.legend.location = "top_right"
p.legend.label_text_font_size = "18pt"
p.legend.label_text_color = "#444444"
p.legend.glyph_width = 40
p.legend.glyph_height = 6
p.legend.spacing = 8
p.legend.padding = 15
p.legend.background_fill_alpha = 0.9
p.legend.border_line_color = "#DDDDDD"
p.legend.border_line_alpha = 0.5

# Save
export_png(p, filename="plot.png")
output_file("plot.html")
save(p)
10 changes: 5 additions & 5 deletions plots/dendrogram-basic/metadata/bokeh.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
library: bokeh
specification_id: dendrogram-basic
created: '2025-12-23T10:01:43Z'
updated: '2025-12-23T10:07:56Z'
generated_by: claude-opus-4-5-20251101
updated: '2026-04-05T20:00:00+00:00'
generated_by: claude-opus-4-6
workflow_run: 20457532402
issue: 0
python_version: 3.13.11
library_version: 3.8.1
python_version: '3.14.3'
library_version: 3.8.2
preview_url: https://storage.googleapis.com/pyplots-images/plots/dendrogram-basic/bokeh/plot.png
preview_html: https://storage.googleapis.com/pyplots-images/plots/dendrogram-basic/bokeh/plot.html
quality_score: 91
quality_score: null
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

quality_score is set to null, but other plot metadata files consistently store a numeric score here (even when a review exists). If the score is not yet available, consider keeping the previous score until the automated review updates it, or use the repository’s established placeholder (numeric) to avoid breaking downstream consumers that expect an int.

Suggested change
quality_score: null
quality_score: 0

Copilot uses AI. Check for mistakes.
impl_tags:
dependencies:
- scipy
Expand Down
Loading