Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,14 @@ def _make_segment(self, left, right, count):
return seg


# The SMC(k) implementation here differs in a few details to the
# version in the C code. Most of this is incidental detail related
# to memory management and differences in AVL tree implementations.
# The main difference is that we have an implementation of
# reset_hull_right here which could be worth porting into the C
# code at some point.


class Hull:
"""
A hull keeps track of the outermost boundaries (left, right) of
Expand Down Expand Up @@ -1952,7 +1960,9 @@ def wiuf_gene_conversion_within_event(self, label):
elif head is not None:
new_individual_head = head
if new_individual_head is not None:
# FIXME when doing the smc_k update
# NOTE: this is not done very nicely and there's likely
# ways that would improve performance a little. See
# https://github.com/tskit-dev/msprime/issues/2386
lineage.reset_segments()
self.update_lineage_right(lineage)
new_lineage = self.alloc_lineage(new_individual_head, pop)
Expand Down Expand Up @@ -2018,7 +2028,7 @@ def wiuf_gene_conversion_left_event(self, label):
y.prev = None
alpha = y

# FIXME
# See https://github.com/tskit-dev/msprime/issues/2386
lineage.reset_segments()
self.update_lineage_right(lineage)

Expand Down
21 changes: 17 additions & 4 deletions lib/msprime.c
Original file line number Diff line number Diff line change
Expand Up @@ -2038,9 +2038,6 @@ msp_verify_hulls(msp_t *self)
io = 0;
}
}
/* printf("hull %d io=%d insertion_order=%d\n", */
/* (int) hull->id, (int) io, (int) hull->right_insertion_order);
*/
tsk_bug_assert(io == hull->right_insertion_order);
pos = hull->right;
}
Expand Down Expand Up @@ -3251,6 +3248,19 @@ msp_store_arg_gene_conversion(
* there was specific logic for this. In an attempt to simplify the logic
* here we just do the straightforward thing, but this is definitely somewhere
* we could get some benefit if looking for optimisations.
*
* In some profiling on a simulation that takes about a minute, we see
* the following breakdown:
* --49.65%--msp_smc_k_common_ancestor_event
* --40.93%--msp_recombination_event
* |--17.03%--msp_insert_individual
* |--16.79%--msp_reset_hull_right
*
* So, in this case msp_reset_hull_right represents about 17% of the overall
* processing time, and I think we could at best expect to halve that if
* we did things better. A ~5% improvement in performance doesn't seem
* worth the effort or complexity currently. See the Python implementation
* of the git history though, for how this used to be done.
*/
static int
msp_reset_hull_right(msp_t *self, lineage_t *lineage)
Expand Down Expand Up @@ -3624,7 +3634,9 @@ msp_gene_conversion_event(msp_t *self, label_id_t label)
* lineage and the SMC(k) state as the code paths are quite convoluted.
* It seems unlikely the segment iteration will be a bottleneck in
* practise, but unconditionaly removing and reinserting hulls could
* become significant. */
* become significant.
* See https://github.com/tskit-dev/msprime/issues/2386
*/
lineage_reset_segments(lineage);
if (is_smc_k) {
ret = msp_insert_individual_hull(self, lineage);
Expand Down Expand Up @@ -5028,6 +5040,7 @@ msp_gene_conversion_left_event(msp_t *self, label_id_t label)
msp_set_segment_mass(self, alpha);
tsk_bug_assert(alpha->prev == NULL);

// This isn't optimal - see https://github.com/tskit-dev/msprime/issues/2386
lineage_reset_segments(new_lineage);
lineage_reset_segments(lineage);

Expand Down
6 changes: 5 additions & 1 deletion lib/msprime.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ typedef struct lineage_t_t {
segment_t *head;
segment_t *tail;
avl_node_t avl_node;
/* The hull is only used for the SMCK, and is NULL otherwise */
struct hull_t_t *hull;
} lineage_t;

Expand Down Expand Up @@ -129,7 +130,10 @@ typedef struct {
avl_tree_t *ancestors;
tsk_size_t num_potential_destinations;
tsk_id_t *potential_destinations;
/* These three indexes are only used in the SMCK model */
/* These three indexes are only used in the SMCK model. We maintain
* two AVL trees in which the hulls are indexed by the left and right
* coordinates, respectively, along with a Fenwick tree which maintains
* the cumulative count of coalescable lineages */
avl_tree_t *hulls_left;
avl_tree_t *hulls_right;
fenwick_t *coal_mass_index;
Expand Down
Loading