Skip to content

Commit 8ba4e04

Browse files
Change stopping condition for stop_at_local_mrca=False
1 parent ec37d9f commit 8ba4e04

9 files changed

Lines changed: 253 additions & 390 deletions

File tree

algorithms.py

Lines changed: 19 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,13 +1049,6 @@ def initialise(self, ts):
10491049
seg = seg.next
10501050
self.add_lineage(lineage)
10511051

1052-
def ancestors_remain(self):
1053-
"""
1054-
Returns True if the simulation is not finished, i.e., there is some ancestral
1055-
material that has not fully coalesced.
1056-
"""
1057-
return sum(pop.get_num_ancestors() for pop in self.P) != 0
1058-
10591052
def change_population_size(self, pop_id, size):
10601053
self.P[pop_id].set_start_size(size)
10611054

@@ -1314,6 +1307,12 @@ def find_cleft_individual(self, label, cleft_value):
13141307
individual_index -= num_ancestors
13151308
raise AssertionError()
13161309

1310+
def is_completed(self):
1311+
for x in self.S.values():
1312+
if x > 1:
1313+
return False
1314+
return True
1315+
13171316
def hudson_simulate(self, end_time):
13181317
"""
13191318
Simulates the algorithm until all loci have coalesced.
@@ -1322,7 +1321,7 @@ def hudson_simulate(self, end_time):
13221321
potential_destinations = self.get_potential_destinations()
13231322

13241323
# only worried about label 0 below
1325-
while len(non_empty_pops) > 0:
1324+
while not self.is_completed():
13261325
self.verify()
13271326
if self.t >= end_time:
13281327
break
@@ -1367,9 +1366,7 @@ def hudson_simulate(self, end_time):
13671366
mig_source = j
13681367
mig_dest = k
13691368
min_time = min(t_re, t_ca, t_gcin, t_gc_left, t_mig)
1370-
assert (min_time != INFINITY) or (
1371-
(min_time == INFINITY) and not self.stop_at_local_mrca
1372-
)
1369+
assert min_time != INFINITY
13731370
if self.t + min_time > self.modifier_events[0][0]:
13741371
t, func, args = self.modifier_events.pop(0)
13751372
self.t = t
@@ -1383,14 +1380,7 @@ def hudson_simulate(self, end_time):
13831380
event = "MOD"
13841381
else:
13851382
self.t += min_time
1386-
if min_time >= INFINITY:
1387-
assert not self.stop_at_local_mrca
1388-
event = "END"
1389-
if end_time >= INFINITY:
1390-
end_time = self.t
1391-
else:
1392-
self.t = end_time
1393-
elif min_time == t_re:
1383+
if min_time == t_re:
13941384
event = "RE"
13951385
self.hudson_recombination_event(0)
13961386
elif min_time == t_gcin:
@@ -1453,7 +1443,7 @@ def single_sweep_simulate(self):
14531443
# main loop time
14541444
t_inc_orig = self.time_slice
14551445
e_time = 0.0
1456-
while self.ancestors_remain() and sweep_traj_step < len(times) - 1:
1446+
while sweep_traj_step < len(times) - 1 and not self.is_completed():
14571447
self.verify()
14581448
event_prob = 1.0
14591449
while event_prob > random.random() and sweep_traj_step < len(times) - 1:
@@ -1536,38 +1526,24 @@ def pedigree_simulate(self):
15361526
self.pedigree = Pedigree(self.tables)
15371527
self.dtwf_climb_pedigree()
15381528

1539-
def dtwf_above_root_simulations_stop(self, events_happened):
1540-
return (
1541-
not events_happened
1542-
and not self.stop_at_local_mrca
1543-
and sum(pop.get_num_ancestors() for pop in self.P) == 1
1544-
)
1545-
15461529
def dtwf_simulate(self, end_time):
15471530
"""
15481531
Simulates the algorithm until all loci have coalesced.
15491532
"""
1550-
while self.ancestors_remain() and self.t < end_time:
1551-
self.t += 1
1533+
while not self.is_completed():
15521534
self.verify()
1553-
events_happened = self.dtwf_generation()
1554-
1555-
if self.dtwf_above_root_simulations_stop(events_happened):
1556-
if end_time >= INFINITY:
1557-
end_time = self.t
1558-
else:
1559-
self.t = end_time
1535+
if self.t >= end_time:
15601536
break
1537+
self.t += 1
1538+
# print("DTWF", self.t)
1539+
self.dtwf_generation()
15611540

15621541
def dtwf_generation(self):
15631542
"""
15641543
Evolves one generation of a Wright Fisher population
15651544
Returns True if any events occurred, False otherwise.
15661545
"""
15671546
# Migration events happen at the rates in the matrix.
1568-
nodes = self.tables.nodes.num_rows
1569-
edges = self.tables.edges.num_rows
1570-
no_migration = True
15711547

15721548
for j in range(len(self.P)):
15731549
source_size = self.P[j].get_num_ancestors()
@@ -1580,7 +1556,6 @@ def dtwf_generation(self):
15801556
mig_source = j
15811557
mig_dest = k
15821558
self.migration_event(mig_source, mig_dest)
1583-
no_migration = False
15841559

15851560
for pop_idx, pop in enumerate(self.P):
15861561
# Cluster haploid inds by parent
@@ -1637,14 +1612,6 @@ def dtwf_generation(self):
16371612
h, pop_idx, 0, parent_nodes[ploid]
16381613
) # label 0 only
16391614
self.verify()
1640-
if (
1641-
self.tables.nodes.num_rows == nodes
1642-
and self.tables.edges.num_rows == edges
1643-
and no_migration
1644-
):
1645-
# dtwf generation had no events
1646-
return False
1647-
return True
16481615

16491616
def process_pedigree_common_ancestors(self, ind, ploid):
16501617
"""
@@ -2305,7 +2272,7 @@ def merge_ancestors(self, H, pop_id, label, new_node_id=-1):
23052272
else:
23062273
right = left
23072274
while right < r_max and self.S[right] != min_overlap:
2308-
self.S[right] -= min_overlap - 1
2275+
self.S[right] -= len(X) - 1
23092276
right = self.S.succ_key(right)
23102277
alpha = self.alloc_segment(left, right, new_node_id, pop_id)
23112278
# Update the heaps and make the record.
@@ -2775,10 +2742,10 @@ def verify(self):
27752742
Checks that the state of the simulator is consistent.
27762743
"""
27772744
self.verify_segments()
2778-
if self.model not in ["fixed_pedigree", "dtwf"]:
2779-
# The fixed_pedigree model doesn't maintain a bunch of stuff.
2780-
# It would probably be simpler if it did.
2745+
# The fixed_pedigree model doesn't maintain a bunch of stuff.
2746+
if self.model != "fixed_pedigree":
27812747
self.verify_overlaps()
2748+
if self.model not in ["fixed_pedigree", "dtwf"]:
27822749
for label in range(self.num_labels):
27832750
if self.recomb_mass_index is None:
27842751
assert self.recomb_map.total_mass == 0

docs/ancestry.md

Lines changed: 38 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,47 +1129,6 @@ However, if `coalescing_segments_only=False`, then the edges recorded
11291129
would be from `m` to `p` over the entire segment `[a, b)`, and from `n` to `p` over the entire segment `[c,d)`.
11301130
The nodes `m` and `n` coalesce (in `p`) on only the overlapping segment `[c,b)`, and so the node `p` will be a unary node in the flanking regions: above `m` on the segment `[a,c)` and above `n` on the segment `[b,d)`.
11311131

1132-
(sec_simulation_after_mrca)=
1133-
1134-
### Simulations after local MRCA
1135-
1136-
By default, msprime stops simulating local trees when a local most recent common ancestor (MRCA) is found. This is because events that occur
1137-
above the common ancestor are shared across all samples, and we are usually interested in differences between samples.
1138-
```{code-cell}
1139-
import msprime
1140-
1141-
ts = msprime.sim_ancestry(2, recombination_rate=0.1, sequence_length=10, random_seed=21)
1142-
ts.draw_svg(time_scale='rank')
1143-
```
1144-
Here, for example, we see that simulations in the middle tree stopped at node 6, which is much younger than nodes 7 and 8.
1145-
And if we look closely, we can also see that simulations stopped at node 7 in the rightmost tree at a younger time than
1146-
node 8 in the leftmost tree.
1147-
1148-
1149-
However, for some specialised applications, simulations after the local MRCA might be needed. In this case, we set the
1150-
parameter `stop_at_local_mrca` of {func}`.sim_ancestry` to `False`, and we set the `end_time` parameter to stop simulations
1151-
when they reach this end time.
1152-
1153-
1154-
```{code-cell}
1155-
import msprime
1156-
1157-
ts = msprime.sim_ancestry(
1158-
2, recombination_rate=0.1,
1159-
sequence_length=10,
1160-
random_seed=21,
1161-
stop_at_local_mrca=False,
1162-
end_time=10)
1163-
1164-
ts.draw_svg(time_scale='rank')
1165-
```
1166-
It is possible to run the simulations without setting an `end_time`. In that case, simulations will
1167-
stop when no more events (recombination, coalescence, migration, etc.) could happen. However, since in many cases this state is impossible to reach, we recommend setting an `end_time`.
1168-
1169-
Note that you might want to set an `end_time` that is larger than possible end times for your simulations to avoid stopping
1170-
your simulations before full coalescence. However, a larger `end_time` means that simulations will take longer and consume
1171-
more memory.
1172-
11731132

11741133
(sec_additional_nodes_ca)=
11751134

@@ -1522,6 +1481,44 @@ Migrations nodes are also recorded in the ARG using the
15221481
`NodeType.MIGRANT` flag. See the {ref}`sec_api_node_flags`
15231482
section for more details.
15241483

1484+
(sec_simulation_after_mrca)=
1485+
1486+
### Simulations after local MRCA
1487+
1488+
By default, msprime stops simulating local trees when a local most recent
1489+
common ancestor (MRCA) is found. This is because events that occur above the
1490+
common ancestor are shared across all samples, and we are usually interested in
1491+
differences between samples.
1492+
1493+
However, for some specialised applications, simulations after the local MRCA
1494+
might be needed. In this case, we set the parameter `stop_at_local_mrca` of
1495+
{func}`.sim_ancestry` to `False`.
1496+
1497+
```{code-cell}
1498+
ts = msprime.sim_ancestry(
1499+
2, recombination_rate=0.1, sequence_length=10, random_seed=75,
1500+
stop_at_local_mrca=False, record_full_arg=True)
1501+
ts.draw_svg(time_scale='rank')
1502+
```
1503+
1504+
In this example we've continued the simulation *after* we've found roots for the local
1505+
trees, and we can see, for example, that node 14 is still the ancestor of 11
1506+
in the rightmost tree. This would be not recorded with the default ``stop_at_local_mrca=False``
1507+
because 11 is the root of that tree.
1508+
1509+
The stopping condition remains the same---we keep simulating until an MRCA has been
1510+
found at *all* local trees. The interpretation of the the final "root" nodes
1511+
is a little subtle
1512+
1513+
:::{note}
1514+
When using the ``stop_at_local_mrca=False`` option you usually need to specify an
1515+
``end_time`` argument (see {ref}`sec_ancestry_end_time`) or the simulation will
1516+
run indefinitely! It is in general not possible to know what the "right" end
1517+
time for a given simulation is, and you may need to specify a "large" value to be
1518+
sure that all trees have coalesced locally.
1519+
:::
1520+
1521+
15251522
## Manipulating simulation time
15261523

15271524
(sec_ancestry_end_time)=

0 commit comments

Comments
 (0)