Skip to content

Commit 6a4538a

Browse files
feat(letsplot): implement tree-phylogenetic (#3109)
## Implementation: `tree-phylogenetic` - letsplot Implements the **letsplot** version of `tree-phylogenetic`. **File:** `plots/tree-phylogenetic/implementations/letsplot.py` **Parent Issue:** #3070 --- :robot: *[impl-generate workflow](https://github.com/MarkusNeusinger/pyplots/actions/runs/20620338760)* --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent ccf10da commit 6a4538a

2 files changed

Lines changed: 240 additions & 0 deletions

File tree

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
""" pyplots.ai
2+
tree-phylogenetic: Phylogenetic Tree Diagram
3+
Library: letsplot 4.8.2 | Python 3.13.11
4+
Quality: 91/100 | Created: 2025-12-31
5+
"""
6+
7+
import re
8+
9+
import pandas as pd
10+
from lets_plot import *
11+
12+
13+
LetsPlot.setup_html()
14+
15+
16+
# Simple Newick parser for phylogenetic tree
17+
def parse_newick(newick_str):
18+
"""Parse Newick format string into tree structure."""
19+
newick_str = newick_str.strip().rstrip(";")
20+
node_id = [0]
21+
22+
def parse_node(s, parent_id=None, depth=0):
23+
nodes = []
24+
s = s.strip()
25+
26+
# Check if this is a leaf node (no parentheses)
27+
if "(" not in s:
28+
# Leaf: name:length or just name
29+
match = re.match(r"([^:]*):?([\d.]*)", s)
30+
name = match.group(1) if match else s
31+
length = float(match.group(2)) if match and match.group(2) else 0.1
32+
node_id[0] += 1
33+
return [
34+
{"id": node_id[0], "name": name, "length": length, "parent": parent_id, "depth": depth, "children": []}
35+
]
36+
37+
# Internal node: find matching parentheses
38+
if s.startswith("("):
39+
# Find the matching closing parenthesis
40+
level = 0
41+
children_str = ""
42+
remaining = ""
43+
for i, c in enumerate(s):
44+
if c == "(":
45+
level += 1
46+
elif c == ")":
47+
level -= 1
48+
if level == 0:
49+
children_str = s[1:i]
50+
remaining = s[i + 1 :]
51+
break
52+
53+
# Parse branch length for this internal node
54+
match = re.match(r":?([\d.]*)", remaining)
55+
length = float(match.group(1)) if match and match.group(1) else 0.1
56+
57+
node_id[0] += 1
58+
current_id = node_id[0]
59+
current_node = {
60+
"id": current_id,
61+
"name": "",
62+
"length": length,
63+
"parent": parent_id,
64+
"depth": depth,
65+
"children": [],
66+
}
67+
nodes.append(current_node)
68+
69+
# Split children by comma at level 0
70+
children = []
71+
level = 0
72+
current = ""
73+
for c in children_str:
74+
if c == "(":
75+
level += 1
76+
elif c == ")":
77+
level -= 1
78+
if c == "," and level == 0:
79+
children.append(current.strip())
80+
current = ""
81+
else:
82+
current += c
83+
if current.strip():
84+
children.append(current.strip())
85+
86+
# Parse each child
87+
for child_str in children:
88+
child_nodes = parse_node(child_str, current_id, depth + 1)
89+
nodes.extend(child_nodes)
90+
current_node["children"].extend([n["id"] for n in child_nodes if n["parent"] == current_id])
91+
92+
return nodes
93+
94+
return parse_node(newick_str)
95+
96+
97+
# Primate phylogenetic tree (based on mitochondrial DNA)
98+
newick = "((((Human:0.1,Chimpanzee:0.12):0.08,Gorilla:0.2):0.15,(Orangutan:0.25,Gibbon:0.28):0.1):0.2,(Macaque:0.35,(Baboon:0.3,Mandrill:0.32):0.05):0.15)"
99+
100+
nodes = parse_newick(newick)
101+
102+
# Build node dictionary for easy lookup
103+
node_dict = {n["id"]: n for n in nodes}
104+
105+
106+
# Calculate x positions (cumulative branch length from root)
107+
def calc_x_positions(node_dict):
108+
# Find root (node with no parent)
109+
root = [n for n in node_dict.values() if n["parent"] is None][0]
110+
111+
def assign_x(node_id, parent_x=0):
112+
node = node_dict[node_id]
113+
node["x"] = parent_x + node["length"]
114+
for child_id in node["children"]:
115+
assign_x(child_id, node["x"])
116+
117+
assign_x(root["id"], 0)
118+
119+
120+
# Calculate y positions (spacing for leaves, centered for internal nodes)
121+
def calc_y_positions(node_dict):
122+
# Get leaves in order
123+
leaves = [n for n in node_dict.values() if not n["children"]]
124+
leaves.sort(key=lambda n: n["id"])
125+
126+
# Assign y positions to leaves
127+
for i, leaf in enumerate(leaves):
128+
leaf["y"] = i
129+
130+
# Calculate y for internal nodes (average of children)
131+
def get_y(node_id):
132+
node = node_dict[node_id]
133+
if "y" in node:
134+
return node["y"]
135+
child_ys = [get_y(cid) for cid in node["children"]]
136+
node["y"] = sum(child_ys) / len(child_ys)
137+
return node["y"]
138+
139+
for node in node_dict.values():
140+
get_y(node["id"])
141+
142+
143+
calc_x_positions(node_dict)
144+
calc_y_positions(node_dict)
145+
146+
# Build segments for the tree (horizontal and vertical lines)
147+
segments = []
148+
for node in node_dict.values():
149+
if node["parent"] is not None:
150+
parent = node_dict[node["parent"]]
151+
# Horizontal segment from parent x to node x at node y
152+
segments.append({"x": parent["x"], "xend": node["x"], "y": node["y"], "yend": node["y"], "type": "horizontal"})
153+
# Vertical segment at parent x from parent y to node y
154+
segments.append(
155+
{"x": parent["x"], "xend": parent["x"], "y": parent["y"], "yend": node["y"], "type": "vertical"}
156+
)
157+
158+
df_segments = pd.DataFrame(segments)
159+
160+
# Get leaf labels
161+
leaves = [n for n in node_dict.values() if not n["children"]]
162+
df_labels = pd.DataFrame([{"x": n["x"] + 0.02, "y": n["y"], "label": n["name"]} for n in leaves])
163+
164+
# Get internal node points
165+
df_nodes = pd.DataFrame([{"x": n["x"], "y": n["y"]} for n in node_dict.values()])
166+
167+
# Define clade colors for visualization
168+
clade_colors = {
169+
"Human": "#306998",
170+
"Chimpanzee": "#306998",
171+
"Gorilla": "#306998",
172+
"Orangutan": "#FFD43B",
173+
"Gibbon": "#FFD43B",
174+
"Macaque": "#22C55E",
175+
"Baboon": "#22C55E",
176+
"Mandrill": "#22C55E",
177+
}
178+
179+
df_labels["color"] = df_labels["label"].map(clade_colors)
180+
181+
# Create the phylogenetic tree plot
182+
plot = (
183+
ggplot()
184+
+ geom_segment(aes(x="x", y="y", xend="xend", yend="yend"), data=df_segments, color="#306998", size=1.5)
185+
+ geom_point(aes(x="x", y="y"), data=df_nodes, color="#306998", size=4)
186+
+ geom_point(aes(x="x", y="y", color="color"), data=df_labels, size=6, show_legend=False)
187+
+ geom_text(aes(x="x", y="y", label="label"), data=df_labels, hjust=0, size=14, family="sans-serif")
188+
+ scale_color_identity()
189+
+ scale_x_continuous(limits=[0, 0.85])
190+
+ labs(
191+
title="Primate Evolution · tree-phylogenetic · letsplot · pyplots.ai",
192+
x="Evolutionary Distance (substitutions per site)",
193+
y="",
194+
)
195+
+ theme_minimal()
196+
+ theme(
197+
plot_title=element_text(size=24, face="bold"),
198+
axis_title_x=element_text(size=20),
199+
axis_title_y=element_blank(),
200+
axis_text_x=element_text(size=16),
201+
axis_text_y=element_blank(),
202+
axis_ticks_y=element_blank(),
203+
panel_grid_major_y=element_blank(),
204+
panel_grid_minor=element_blank(),
205+
panel_grid_major_x=element_line(color="#E5E5E5", size=0.5),
206+
)
207+
+ ggsize(1600, 900)
208+
)
209+
210+
# Save as PNG (scale 3x for 4800x2700)
211+
ggsave(plot, "plot.png", path=".", scale=3)
212+
213+
# Save as HTML for interactivity
214+
ggsave(plot, "plot.html", path=".")
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
library: letsplot
2+
specification_id: tree-phylogenetic
3+
created: '2025-12-31T13:55:48Z'
4+
updated: '2025-12-31T14:07:03Z'
5+
generated_by: claude-opus-4-5-20251101
6+
workflow_run: 20620338760
7+
issue: 3070
8+
python_version: 3.13.11
9+
library_version: 4.8.2
10+
preview_url: https://storage.googleapis.com/pyplots-images/plots/tree-phylogenetic/letsplot/plot.png
11+
preview_thumb: https://storage.googleapis.com/pyplots-images/plots/tree-phylogenetic/letsplot/plot_thumb.png
12+
preview_html: https://storage.googleapis.com/pyplots-images/plots/tree-phylogenetic/letsplot/plot.html
13+
quality_score: 91
14+
review:
15+
strengths:
16+
- Excellent implementation of rectangular phylogenetic tree using lets-plot grammar
17+
of graphics
18+
- Custom Newick parser handles complex nested structure correctly
19+
- Color-coded clades (great apes, lesser apes, Old World monkeys) add visual interest
20+
- Branch lengths accurately reflect evolutionary distances with proportional x-axis
21+
positioning
22+
- Clean tree layout with horizontal and vertical segments properly connected
23+
- Good use of theme_minimal with customized axis and grid settings
24+
weaknesses:
25+
- Missing legend to explain the three clade color groups (blue/yellow/green)
26+
- No scale bar to indicate branch length units

0 commit comments

Comments
 (0)