@@ -25,6 +25,105 @@ tips, as in the example below:
2525``` {code-cell}
2626:"tags": ["hide-input"]
2727import tskit
28+ import numpy as np
29+
30+ def collapse_tree_for_plot(tree, max_tips=20, style=None):
31+ """
32+ Return a condensed version of a single tree by collapsing large clades until
33+ at most `max_tips` labelled tips remain.
34+
35+ The returned tuple is:
36+ (plot_tree, node_labels, style)
37+
38+ where `plot_tree` is a tree suitable for plotting with `draw_svg`, `node_labels`
39+ labels the retained sample tips plus the collapsed clades, and `style` highlights
40+ the collapsed clades.
41+
42+ This helper is intended for plotting a single large tree. It requires the tree to
43+ come from a tree sequence containing exactly one tree.
44+ """
45+ ts = tree.tree_sequence
46+ if ts.num_trees != 1:
47+ raise ValueError("collapse_tree_for_plot currently expects a single-tree tree sequence")
48+ if tree.num_roots != 1:
49+ raise ValueError("collapse_tree_for_plot currently expects a single-root tree")
50+ if max_tips < 2:
51+ raise ValueError("max_tips must be at least 2")
52+
53+ postorder = list(tree.nodes(order="postorder"))
54+ counts = np.zeros(ts.num_nodes, dtype=int)
55+ for u in postorder:
56+ if tree.is_sample(u):
57+ counts[u] = 1
58+ else:
59+ counts[u] = sum(counts[v] for v in tree.children(u))
60+
61+ displayed_tips = tree.num_samples()
62+ candidate_nodes = [
63+ u for u in postorder
64+ if counts[u] > 1 and tree.parent(u) != tskit.NULL
65+ ]
66+ collapsed_nodes = set()
67+ forbidden = set()
68+
69+ while displayed_tips > max_tips:
70+ needed_reduction = displayed_tips - max_tips
71+ candidates = [u for u in candidate_nodes if u not in collapsed_nodes and u not in forbidden]
72+ if len(candidates) == 0:
73+ break
74+
75+ # Prefer a clade that gets us close to the target without overshooting.
76+ acceptable = [u for u in candidates if counts[u] - 1 <= needed_reduction]
77+ if len(acceptable) > 0:
78+ u = max(acceptable, key=lambda x: counts[x])
79+ else:
80+ u = min(candidates, key=lambda x: counts[x])
81+
82+ collapsed_nodes.add(u)
83+ displayed_tips -= counts[u] - 1
84+
85+ # Do not collapse descendants or ancestors of an already-collapsed node.
86+ stack = list(tree.children(u))
87+ while len(stack) > 0:
88+ v = stack.pop()
89+ forbidden.add(v)
90+ stack.extend(tree.children(v))
91+ v = tree.parent(u)
92+ while v != tskit.NULL:
93+ forbidden.add(v)
94+ v = tree.parent(v)
95+
96+ keep_nodes = []
97+ stack = list(tree.roots)
98+ while len(stack) > 0:
99+ u = stack.pop()
100+ if u in collapsed_nodes:
101+ keep_nodes.append(u)
102+ elif tree.is_sample(u):
103+ keep_nodes.append(u)
104+ else:
105+ stack.extend(tree.children(u))
106+
107+ plot_ts = ts.simplify(sorted(keep_nodes), filter_nodes=False)
108+ plot_tree = plot_ts.first()
109+
110+ node_labels = {}
111+ style = [style or ""]
112+ for u in plot_tree.nodes():
113+ if u in collapsed_nodes:
114+ node_labels[u] = f"{counts[u]:,} tips"
115+ scale = min(8.0, 1.0 + counts[u] / max_tips)
116+ style.append(
117+ f".n{u} > .sym {{clip-path: polygon(50% 0%, 100% 100%, 0% 100%); "
118+ f"transform: scale({scale}, 3.5)}}"
119+ )
120+ style.append(
121+ f".n{u} > .lab {{transform: translateY(20px); font-size: 12px}}"
122+ )
123+ elif plot_tree.is_sample(u):
124+ node_labels[u] = str(u)
125+
126+ return plot_tree, node_labels, "".join(style)
28127```
29128
30129``` {code-cell}
@@ -36,11 +135,19 @@ print("Tree sequence takes up", big_tree.tree_sequence.nbytes / 1024**2, "Mb")
36135print(f"Generating a 'comb' (pectinate) tree of {num_tips} tips took:")
37136```
38137
39- :::{todo}
40- Display the million tip tree in condensed form, when
41- https://github.com/tskit-dev/tskit/issues/2372#issuecomment-1298518380 and
42- https://github.com/tskit-dev/tskit/issues/2628 are solved
43- :::
138+ Clearly, plotting a tree with a million tips is problematic, but we can draw a summary
139+ (see the {ref}` visualization tutorial<sec_tskit_viz_SVG_examples_larger_plots> ` for details):
140+
141+
142+ ``` {code-cell}
143+ plot_tree, labels, css = collapse_tree_for_plot( # function from the top of this notebook
144+ big_tree, max_tips=20, style=".y-axis .tick .lab {font-size: 8pt}"
145+ )
146+ plot_tree.draw_svg(
147+ size=(1000, 400), node_labels=labels, style=css, time_scale="rank", y_axis=True
148+ )
149+ ```
150+
44151
45152Calculations on these huge trees can be very efficient:
46153
@@ -322,11 +429,11 @@ print(
322429 tree.branch_length(node_id),
323430)
324431```
325- :::{todo}
326- The branch distance between two samples is also easy to calculate
327432
328- NB: Turn the following in to a code cell
329- ```
433+ The branch distance between two samples is also easy to calculate using
434+ {meth}` ~Tree.distance_between ` :
435+
436+ ``` {code-cell}
330437target_node_1 = 5
331438target_node_2 = 7
332439print(
@@ -335,9 +442,7 @@ print(
335442 "and",
336443 target_node_2,
337444 "is",
338- # See https://github.com/tskit-dev/tskit/issues/2627 - what should be call this
339- # so as not to get mixed up with tree.path_length which counts the number of edges
340- # tree.branch_distance(target_node_1, target_node_2),
445+ tree.distance_between(target_node_1, target_node_2), # beware: tree.path_length counts number of edges
341446)
342447```
343448
@@ -349,18 +454,15 @@ calculation is to use {meth}`TreeSequence.divergence`, part of the the standard
349454multiple sets of samples in {ref}` sec_phylogen_multiple_trees ` to be calculated
350455efficiently:
351456
352- NB: Turn the following in to a code cell
353- ```
457+ ``` {code-cell}
354458target_node_1 = 5
355459target_node_2 = 7
356460print(
357- "Branch distance using built-in stats framework:"
461+ "Branch distance using built-in stats framework:",
358462 tree.tree_sequence.divergence(([5], [7]), mode="branch", windows="trees")
359463)
360464```
361465
362- :::
363-
364466
365467(sec_phylogen_multiroot)=
366468### Roots and multiroot trees
@@ -503,4 +605,3 @@ The rest of the {program}`tskit` tutorials will lead you through the concepts in
503605storing and analysing sequences of many correlated trees. For a simple introduction, you
504606might want to start with {ref}` sec_what_is ` .
505607
506-
0 commit comments