|
| 1 | +""" pyplots.ai |
| 2 | +tree-phylogenetic: Phylogenetic Tree Diagram |
| 3 | +Library: altair 6.0.0 | Python 3.13.11 |
| 4 | +Quality: 91/100 | Created: 2025-12-31 |
| 5 | +""" |
| 6 | + |
| 7 | +import altair as alt |
| 8 | +import numpy as np |
| 9 | +import pandas as pd |
| 10 | + |
| 11 | + |
| 12 | +# Primate phylogenetic tree data (simplified example) |
| 13 | +# Based on approximate evolutionary relationships from mitochondrial DNA studies |
| 14 | +np.random.seed(42) |
| 15 | + |
| 16 | +# Define tree structure manually with (parent, child, branch_length) |
| 17 | +# Tree structure: Primates -> (Hominidae, Cercopithecidae) |
| 18 | +# Hominidae -> (Homininae, Pongo) |
| 19 | +# Homininae -> (Homo, Pan) |
| 20 | +# Pan -> (P.troglodytes, P.paniscus) |
| 21 | +# Cercopithecidae -> (Macaca, Papio) |
| 22 | + |
| 23 | +edges = [ |
| 24 | + ("Root", "Hominoidea", 0.15), |
| 25 | + ("Root", "Cercopithecidae", 0.18), |
| 26 | + ("Hominoidea", "Hominidae", 0.08), |
| 27 | + ("Hominoidea", "Hylobatidae", 0.12), |
| 28 | + ("Hominidae", "Homininae", 0.05), |
| 29 | + ("Hominidae", "Pongo pygmaeus", 0.09), |
| 30 | + ("Homininae", "Homo sapiens", 0.03), |
| 31 | + ("Homininae", "Pan", 0.02), |
| 32 | + ("Pan", "Pan troglodytes", 0.015), |
| 33 | + ("Pan", "Pan paniscus", 0.015), |
| 34 | + ("Hylobatidae", "Hylobates lar", 0.06), |
| 35 | + ("Cercopithecidae", "Macaca mulatta", 0.10), |
| 36 | + ("Cercopithecidae", "Papio anubis", 0.11), |
| 37 | +] |
| 38 | + |
| 39 | +# Species labels (leaf nodes) |
| 40 | +leaf_nodes = { |
| 41 | + "Homo sapiens": "Human", |
| 42 | + "Pan troglodytes": "Chimpanzee", |
| 43 | + "Pan paniscus": "Bonobo", |
| 44 | + "Pongo pygmaeus": "Orangutan", |
| 45 | + "Hylobates lar": "Gibbon", |
| 46 | + "Macaca mulatta": "Rhesus Macaque", |
| 47 | + "Papio anubis": "Olive Baboon", |
| 48 | +} |
| 49 | + |
| 50 | +# Build adjacency list |
| 51 | +children = {} |
| 52 | +branch_lengths = {} |
| 53 | +for parent, child, length in edges: |
| 54 | + if parent not in children: |
| 55 | + children[parent] = [] |
| 56 | + children[parent].append(child) |
| 57 | + branch_lengths[(parent, child)] = length |
| 58 | + |
| 59 | + |
| 60 | +# Calculate y-positions for leaf nodes (spread evenly) |
| 61 | +def get_leaves(node): |
| 62 | + if node not in children: |
| 63 | + return [node] |
| 64 | + leaves = [] |
| 65 | + for child in children[node]: |
| 66 | + leaves.extend(get_leaves(child)) |
| 67 | + return leaves |
| 68 | + |
| 69 | + |
| 70 | +all_leaves = get_leaves("Root") |
| 71 | +n_leaves = len(all_leaves) |
| 72 | +leaf_y = {leaf: i for i, leaf in enumerate(all_leaves)} |
| 73 | + |
| 74 | + |
| 75 | +# Calculate x-positions based on cumulative branch lengths from root |
| 76 | +def calc_x_positions(node, current_x=0): |
| 77 | + positions = {node: current_x} |
| 78 | + if node in children: |
| 79 | + for child in children[node]: |
| 80 | + child_x = current_x + branch_lengths[(node, child)] |
| 81 | + positions.update(calc_x_positions(child, child_x)) |
| 82 | + return positions |
| 83 | + |
| 84 | + |
| 85 | +x_positions = calc_x_positions("Root") |
| 86 | + |
| 87 | + |
| 88 | +# Calculate y-positions (internal nodes = average of children) |
| 89 | +def calc_y_positions(node): |
| 90 | + if node not in children: |
| 91 | + return {node: leaf_y[node]} |
| 92 | + positions = {} |
| 93 | + child_ys = [] |
| 94 | + for child in children[node]: |
| 95 | + child_positions = calc_y_positions(child) |
| 96 | + positions.update(child_positions) |
| 97 | + child_ys.append(child_positions[child]) |
| 98 | + positions[node] = np.mean(child_ys) |
| 99 | + return positions |
| 100 | + |
| 101 | + |
| 102 | +y_positions = calc_y_positions("Root") |
| 103 | + |
| 104 | +# Create line segments for the tree (horizontal and vertical lines) |
| 105 | +lines_data = [] |
| 106 | +for parent, child, _length in edges: |
| 107 | + parent_x = x_positions[parent] |
| 108 | + parent_y = y_positions[parent] |
| 109 | + child_x = x_positions[child] |
| 110 | + child_y = y_positions[child] |
| 111 | + |
| 112 | + # Horizontal line from parent to child's x |
| 113 | + lines_data.append({"x": parent_x, "y": parent_y, "x2": parent_x, "y2": child_y, "type": "vertical"}) |
| 114 | + # Vertical line at child's y from parent_x to child_x |
| 115 | + lines_data.append({"x": parent_x, "y": child_y, "x2": child_x, "y2": child_y, "type": "horizontal"}) |
| 116 | + |
| 117 | +lines_df = pd.DataFrame(lines_data) |
| 118 | + |
| 119 | +# Create node points for leaf labels |
| 120 | +nodes_data = [] |
| 121 | +for node in all_leaves: |
| 122 | + label = leaf_nodes.get(node, node) |
| 123 | + nodes_data.append({"x": x_positions[node], "y": y_positions[node], "label": label, "species": node}) |
| 124 | + |
| 125 | +nodes_df = pd.DataFrame(nodes_data) |
| 126 | + |
| 127 | +# Create internal node points |
| 128 | +internal_nodes = [n for n in x_positions.keys() if n not in all_leaves and n != "Root"] |
| 129 | +internal_data = [{"x": x_positions[n], "y": y_positions[n], "name": n} for n in internal_nodes] |
| 130 | +internal_df = pd.DataFrame(internal_data) |
| 131 | + |
| 132 | +# Define color palette - Python colors |
| 133 | +branch_color = "#306998" # Python Blue |
| 134 | +node_color = "#FFD43B" # Python Yellow |
| 135 | +text_color = "#2d2d2d" # Dark gray for text |
| 136 | + |
| 137 | +# Create the tree branches using rule marks |
| 138 | +branches = ( |
| 139 | + alt.Chart(lines_df).mark_rule(strokeWidth=4, color=branch_color).encode(x="x:Q", y="y:Q", x2="x2:Q", y2="y2:Q") |
| 140 | +) |
| 141 | + |
| 142 | +# Create leaf node points |
| 143 | +leaf_points = ( |
| 144 | + alt.Chart(nodes_df) |
| 145 | + .mark_circle(size=400, color=node_color, stroke=branch_color, strokeWidth=2) |
| 146 | + .encode(x=alt.X("x:Q"), y=alt.Y("y:Q"), tooltip=["species:N", "label:N"]) |
| 147 | +) |
| 148 | + |
| 149 | +# Create leaf labels |
| 150 | +leaf_labels = ( |
| 151 | + alt.Chart(nodes_df) |
| 152 | + .mark_text(align="left", baseline="middle", dx=15, fontSize=20, fontWeight="bold", color=text_color) |
| 153 | + .encode(x="x:Q", y="y:Q", text="label:N") |
| 154 | +) |
| 155 | + |
| 156 | +# Create internal node points (smaller) |
| 157 | +internal_points = ( |
| 158 | + alt.Chart(internal_df) |
| 159 | + .mark_circle(size=200, color=branch_color, stroke="#ffffff", strokeWidth=2) |
| 160 | + .encode(x=alt.X("x:Q"), y=alt.Y("y:Q"), tooltip=["name:N"]) |
| 161 | +) |
| 162 | + |
| 163 | +# Create scale bar data |
| 164 | +max_x = max(x_positions.values()) |
| 165 | +scale_bar_length = 0.05 # 0.05 substitutions per site |
| 166 | +scale_bar_data = pd.DataFrame([{"x": 0.02, "y": -0.8, "x2": 0.02 + scale_bar_length, "y2": -0.8}]) |
| 167 | + |
| 168 | +scale_bar = ( |
| 169 | + alt.Chart(scale_bar_data).mark_rule(strokeWidth=4, color=text_color).encode(x="x:Q", y="y:Q", x2="x2:Q", y2="y2:Q") |
| 170 | +) |
| 171 | + |
| 172 | +scale_bar_label = ( |
| 173 | + alt.Chart(pd.DataFrame([{"x": 0.02 + scale_bar_length / 2, "y": -1.2, "text": "0.05 subs/site"}])) |
| 174 | + .mark_text(fontSize=16, color=text_color) |
| 175 | + .encode(x="x:Q", y="y:Q", text="text:N") |
| 176 | +) |
| 177 | + |
| 178 | +# Combine all layers |
| 179 | +chart = ( |
| 180 | + alt.layer(branches, internal_points, leaf_points, leaf_labels, scale_bar, scale_bar_label) |
| 181 | + .properties( |
| 182 | + width=1400, |
| 183 | + height=800, |
| 184 | + title=alt.Title( |
| 185 | + "Primate Evolution · tree-phylogenetic · altair · pyplots.ai", |
| 186 | + fontSize=28, |
| 187 | + anchor="middle", |
| 188 | + color=text_color, |
| 189 | + subtitle="Phylogenetic tree based on mitochondrial DNA divergence", |
| 190 | + subtitleFontSize=18, |
| 191 | + subtitleColor="#666666", |
| 192 | + ), |
| 193 | + ) |
| 194 | + .configure_axis(labelFontSize=16, titleFontSize=20, gridColor="#e0e0e0", gridOpacity=0.3, domainColor=text_color) |
| 195 | + .configure_view(strokeWidth=0) |
| 196 | +) |
| 197 | + |
| 198 | +# Customize axes |
| 199 | +chart = chart.encode( |
| 200 | + x=alt.X( |
| 201 | + "x:Q", title="Evolutionary Distance (substitutions per site)", scale=alt.Scale(domain=[-0.02, max_x + 0.15]) |
| 202 | + ), |
| 203 | + y=alt.Y( |
| 204 | + "y:Q", |
| 205 | + title="", |
| 206 | + scale=alt.Scale(domain=[-1.5, n_leaves - 0.5]), |
| 207 | + axis=alt.Axis(labels=False, ticks=False, domain=False), |
| 208 | + ), |
| 209 | +) |
| 210 | + |
| 211 | +# Save as PNG and HTML |
| 212 | +chart.save("plot.png", scale_factor=3.0) |
| 213 | +chart.save("plot.html") |
0 commit comments