|
| 1 | +""" pyplots.ai |
| 2 | +tree-phylogenetic: Phylogenetic Tree Diagram |
| 3 | +Library: letsplot 4.8.2 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-31 |
| 5 | +""" |
| 6 | + |
| 7 | +import re |
| 8 | + |
| 9 | +import pandas as pd |
| 10 | +from lets_plot import * |
| 11 | + |
| 12 | + |
| 13 | +LetsPlot.setup_html() |
| 14 | + |
| 15 | + |
| 16 | +# Simple Newick parser for phylogenetic tree |
| 17 | +def parse_newick(newick_str): |
| 18 | + """Parse Newick format string into tree structure.""" |
| 19 | + newick_str = newick_str.strip().rstrip(";") |
| 20 | + node_id = [0] |
| 21 | + |
| 22 | + def parse_node(s, parent_id=None, depth=0): |
| 23 | + nodes = [] |
| 24 | + s = s.strip() |
| 25 | + |
| 26 | + # Check if this is a leaf node (no parentheses) |
| 27 | + if "(" not in s: |
| 28 | + # Leaf: name:length or just name |
| 29 | + match = re.match(r"([^:]*):?([\d.]*)", s) |
| 30 | + name = match.group(1) if match else s |
| 31 | + length = float(match.group(2)) if match and match.group(2) else 0.1 |
| 32 | + node_id[0] += 1 |
| 33 | + return [ |
| 34 | + {"id": node_id[0], "name": name, "length": length, "parent": parent_id, "depth": depth, "children": []} |
| 35 | + ] |
| 36 | + |
| 37 | + # Internal node: find matching parentheses |
| 38 | + if s.startswith("("): |
| 39 | + # Find the matching closing parenthesis |
| 40 | + level = 0 |
| 41 | + children_str = "" |
| 42 | + remaining = "" |
| 43 | + for i, c in enumerate(s): |
| 44 | + if c == "(": |
| 45 | + level += 1 |
| 46 | + elif c == ")": |
| 47 | + level -= 1 |
| 48 | + if level == 0: |
| 49 | + children_str = s[1:i] |
| 50 | + remaining = s[i + 1 :] |
| 51 | + break |
| 52 | + |
| 53 | + # Parse branch length for this internal node |
| 54 | + match = re.match(r":?([\d.]*)", remaining) |
| 55 | + length = float(match.group(1)) if match and match.group(1) else 0.1 |
| 56 | + |
| 57 | + node_id[0] += 1 |
| 58 | + current_id = node_id[0] |
| 59 | + current_node = { |
| 60 | + "id": current_id, |
| 61 | + "name": "", |
| 62 | + "length": length, |
| 63 | + "parent": parent_id, |
| 64 | + "depth": depth, |
| 65 | + "children": [], |
| 66 | + } |
| 67 | + nodes.append(current_node) |
| 68 | + |
| 69 | + # Split children by comma at level 0 |
| 70 | + children = [] |
| 71 | + level = 0 |
| 72 | + current = "" |
| 73 | + for c in children_str: |
| 74 | + if c == "(": |
| 75 | + level += 1 |
| 76 | + elif c == ")": |
| 77 | + level -= 1 |
| 78 | + if c == "," and level == 0: |
| 79 | + children.append(current.strip()) |
| 80 | + current = "" |
| 81 | + else: |
| 82 | + current += c |
| 83 | + if current.strip(): |
| 84 | + children.append(current.strip()) |
| 85 | + |
| 86 | + # Parse each child |
| 87 | + for child_str in children: |
| 88 | + child_nodes = parse_node(child_str, current_id, depth + 1) |
| 89 | + nodes.extend(child_nodes) |
| 90 | + current_node["children"].extend([n["id"] for n in child_nodes if n["parent"] == current_id]) |
| 91 | + |
| 92 | + return nodes |
| 93 | + |
| 94 | + return parse_node(newick_str) |
| 95 | + |
| 96 | + |
| 97 | +# Primate phylogenetic tree (based on mitochondrial DNA) |
| 98 | +newick = "((((Human:0.1,Chimpanzee:0.12):0.08,Gorilla:0.2):0.15,(Orangutan:0.25,Gibbon:0.28):0.1):0.2,(Macaque:0.35,(Baboon:0.3,Mandrill:0.32):0.05):0.15)" |
| 99 | + |
| 100 | +nodes = parse_newick(newick) |
| 101 | + |
| 102 | +# Build node dictionary for easy lookup |
| 103 | +node_dict = {n["id"]: n for n in nodes} |
| 104 | + |
| 105 | + |
| 106 | +# Calculate x positions (cumulative branch length from root) |
| 107 | +def calc_x_positions(node_dict): |
| 108 | + # Find root (node with no parent) |
| 109 | + root = [n for n in node_dict.values() if n["parent"] is None][0] |
| 110 | + |
| 111 | + def assign_x(node_id, parent_x=0): |
| 112 | + node = node_dict[node_id] |
| 113 | + node["x"] = parent_x + node["length"] |
| 114 | + for child_id in node["children"]: |
| 115 | + assign_x(child_id, node["x"]) |
| 116 | + |
| 117 | + assign_x(root["id"], 0) |
| 118 | + |
| 119 | + |
| 120 | +# Calculate y positions (spacing for leaves, centered for internal nodes) |
| 121 | +def calc_y_positions(node_dict): |
| 122 | + # Get leaves in order |
| 123 | + leaves = [n for n in node_dict.values() if not n["children"]] |
| 124 | + leaves.sort(key=lambda n: n["id"]) |
| 125 | + |
| 126 | + # Assign y positions to leaves |
| 127 | + for i, leaf in enumerate(leaves): |
| 128 | + leaf["y"] = i |
| 129 | + |
| 130 | + # Calculate y for internal nodes (average of children) |
| 131 | + def get_y(node_id): |
| 132 | + node = node_dict[node_id] |
| 133 | + if "y" in node: |
| 134 | + return node["y"] |
| 135 | + child_ys = [get_y(cid) for cid in node["children"]] |
| 136 | + node["y"] = sum(child_ys) / len(child_ys) |
| 137 | + return node["y"] |
| 138 | + |
| 139 | + for node in node_dict.values(): |
| 140 | + get_y(node["id"]) |
| 141 | + |
| 142 | + |
| 143 | +calc_x_positions(node_dict) |
| 144 | +calc_y_positions(node_dict) |
| 145 | + |
| 146 | +# Build segments for the tree (horizontal and vertical lines) |
| 147 | +segments = [] |
| 148 | +for node in node_dict.values(): |
| 149 | + if node["parent"] is not None: |
| 150 | + parent = node_dict[node["parent"]] |
| 151 | + # Horizontal segment from parent x to node x at node y |
| 152 | + segments.append({"x": parent["x"], "xend": node["x"], "y": node["y"], "yend": node["y"], "type": "horizontal"}) |
| 153 | + # Vertical segment at parent x from parent y to node y |
| 154 | + segments.append( |
| 155 | + {"x": parent["x"], "xend": parent["x"], "y": parent["y"], "yend": node["y"], "type": "vertical"} |
| 156 | + ) |
| 157 | + |
| 158 | +df_segments = pd.DataFrame(segments) |
| 159 | + |
| 160 | +# Get leaf labels |
| 161 | +leaves = [n for n in node_dict.values() if not n["children"]] |
| 162 | +df_labels = pd.DataFrame([{"x": n["x"] + 0.02, "y": n["y"], "label": n["name"]} for n in leaves]) |
| 163 | + |
| 164 | +# Get internal node points |
| 165 | +df_nodes = pd.DataFrame([{"x": n["x"], "y": n["y"]} for n in node_dict.values()]) |
| 166 | + |
| 167 | +# Define clade colors for visualization |
| 168 | +clade_colors = { |
| 169 | + "Human": "#306998", |
| 170 | + "Chimpanzee": "#306998", |
| 171 | + "Gorilla": "#306998", |
| 172 | + "Orangutan": "#FFD43B", |
| 173 | + "Gibbon": "#FFD43B", |
| 174 | + "Macaque": "#22C55E", |
| 175 | + "Baboon": "#22C55E", |
| 176 | + "Mandrill": "#22C55E", |
| 177 | +} |
| 178 | + |
| 179 | +df_labels["color"] = df_labels["label"].map(clade_colors) |
| 180 | + |
| 181 | +# Create the phylogenetic tree plot |
| 182 | +plot = ( |
| 183 | + ggplot() |
| 184 | + + geom_segment(aes(x="x", y="y", xend="xend", yend="yend"), data=df_segments, color="#306998", size=1.5) |
| 185 | + + geom_point(aes(x="x", y="y"), data=df_nodes, color="#306998", size=4) |
| 186 | + + geom_point(aes(x="x", y="y", color="color"), data=df_labels, size=6, show_legend=False) |
| 187 | + + geom_text(aes(x="x", y="y", label="label"), data=df_labels, hjust=0, size=14, family="sans-serif") |
| 188 | + + scale_color_identity() |
| 189 | + + scale_x_continuous(limits=[0, 0.85]) |
| 190 | + + labs( |
| 191 | + title="Primate Evolution · tree-phylogenetic · letsplot · pyplots.ai", |
| 192 | + x="Evolutionary Distance (substitutions per site)", |
| 193 | + y="", |
| 194 | + ) |
| 195 | + + theme_minimal() |
| 196 | + + theme( |
| 197 | + plot_title=element_text(size=24, face="bold"), |
| 198 | + axis_title_x=element_text(size=20), |
| 199 | + axis_title_y=element_blank(), |
| 200 | + axis_text_x=element_text(size=16), |
| 201 | + axis_text_y=element_blank(), |
| 202 | + axis_ticks_y=element_blank(), |
| 203 | + panel_grid_major_y=element_blank(), |
| 204 | + panel_grid_minor=element_blank(), |
| 205 | + panel_grid_major_x=element_line(color="#E5E5E5", size=0.5), |
| 206 | + ) |
| 207 | + + ggsize(1600, 900) |
| 208 | +) |
| 209 | + |
| 210 | +# Save as PNG (scale 3x for 4800x2700) |
| 211 | +ggsave(plot, "plot.png", path=".", scale=3) |
| 212 | + |
| 213 | +# Save as HTML for interactivity |
| 214 | +ggsave(plot, "plot.html", path=".") |
0 commit comments