Skip to content

Commit d50953d

Browse files
Replace SMCk regression test with a scripted-RNG unit test
The prior test_smc_k_bug_repro replayed the user-reported failing simulation (50 diploid samples, seq_length 2e7, ~75k events) to hit the bug after about one second. Replace it with test_smc_k_zero_random_mass, which uses a custom GSL rng_type whose get_double callback returns successive doubles from a fixed array. With the script {0.5, 0.0} and a 2-sample SMCk simulation, the first common-ancestor event has gsl_ran_flat return 0 deterministically, making remaining_mass == x_hull->count exactly — the same trigger the clamp at lib/msprime.c handles. Pre-clamp the test aborts at line 7694; post-clamp it completes in about a millisecond. The scripted RNG type is defined as static helpers in test_ancestry.c next to the test that uses it. Add chained-event SMCk clamp test test_smc_k_zero_random_mass triggers the clamp at lib/msprime.c on the very first CA event of a fresh 2-sample simulation, only walking the loop once. Add a sibling test_smc_k_zero_random_mass_chained that runs one CA event first (coalescing one pair of 4 lineages, freeing and reusing hull slots, rearranging AVL trees) and then drives gsl_ran_flat to 0 on the second CA event. Pre-fix the second event aborts at line 7694; post-fix both complete in about a millisecond. The test covers the clamp working from a non-trivial post-event state. We initially hoped to also drive the second event to a hull with count > 1 via the fenwick zero-skip path, but fenwick_find always stops at the first non-zero slot and any non-zero predecessor blocks the route to a higher-count hull. The honest scope (clamp in chained events, count == 1) is documented in the test comment. Update CHANGELOG
1 parent 36ca5db commit d50953d

2 files changed

Lines changed: 144 additions & 29 deletions

File tree

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44

55
In development
66

7+
**Bug fixes**
8+
9+
- Fix edge case in SMC(k) model where a random value of exactly zero triggered an assert.
10+
({issue}`2523` {pr}`2525`, {user}`currocam`, {user}`jeromekelleher`).
11+
12+
713
## [1.4.1] - 2026-03-05
814

915
**Maintenance**

lib/tests/test_ancestry.c

Lines changed: 138 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4656,59 +4656,166 @@ test_smc_k_gc(void)
46564656
}
46574657
}
46584658

4659-
/* Regression test: with this seed and parameters the SMCk common-ancestor
4660-
* walk used to abort via tsk_bug_assert at lib/msprime.c when gsl_ran_flat
4661-
* drew random_mass == 0 and the resulting remaining_mass equalled
4662-
* x_hull->count exactly, causing the walk to over-iterate by one and run
4663-
* off the head of the AVL tree. The fix clamps remaining_mass below count. */
4659+
/* A static GSL rng_type whose draws are scripted from a fixed array.
4660+
* gsl_rng_uniform calls get_double directly, so providing only that
4661+
* callback is enough to control every uniform draw the simulator makes
4662+
* (gsl_ran_flat, gsl_ran_exponential, gsl_rng_uniform_int all funnel
4663+
* through gsl_rng_uniform). */
4664+
typedef struct {
4665+
size_t call_count;
4666+
const double *u_values;
4667+
size_t n_u_values;
4668+
} scripted_rng_state_t;
4669+
46644670
static void
4665-
test_smc_k_bug_repro(void)
4671+
scripted_rng_set(void *state, unsigned long int seed)
4672+
{
4673+
scripted_rng_state_t *s = (scripted_rng_state_t *) state;
4674+
(void) seed;
4675+
s->call_count = 0;
4676+
}
4677+
4678+
static unsigned long int
4679+
scripted_rng_get(void *state)
4680+
{
4681+
/* Not normally called when get_double is set, but provide a sane
4682+
* fallback in case some code path uses it. */
4683+
scripted_rng_state_t *s = (scripted_rng_state_t *) state;
4684+
if (s->call_count >= s->n_u_values) {
4685+
return 1UL;
4686+
}
4687+
return (unsigned long int) (s->u_values[s->call_count++] * 4294967296.0);
4688+
}
4689+
4690+
static double
4691+
scripted_rng_get_double(void *state)
4692+
{
4693+
scripted_rng_state_t *s = (scripted_rng_state_t *) state;
4694+
if (s->call_count >= s->n_u_values) {
4695+
/* Past the end of the script, return a benign default. */
4696+
return 0.5;
4697+
}
4698+
return s->u_values[s->call_count++];
4699+
}
4700+
4701+
static const gsl_rng_type scripted_rng_type = {
4702+
"scripted", /* name */
4703+
0xffffffffUL, /* max */
4704+
0UL, /* min */
4705+
sizeof(scripted_rng_state_t),
4706+
scripted_rng_set,
4707+
scripted_rng_get,
4708+
scripted_rng_get_double,
4709+
};
4710+
4711+
/* Regression test for the SMCk common-ancestor walk: when gsl_ran_flat
4712+
* draws random_mass == 0, remaining_mass starts at exactly x_hull->count.
4713+
* Pre-fix the >= 0 walk predicate over-iterated by one and ran off the
4714+
* head of the AVL tree, tripping tsk_bug_assert. Uses a scripted RNG to
4715+
* hit the trigger condition deterministically on the first CA event of
4716+
* a 2-sample SMCk simulation. */
4717+
static void
4718+
test_smc_k_zero_random_mass(void)
46664719
{
46674720
int ret;
4668-
uint32_t n = 100; /* 50 diploid individuals = 100 sample nodes */
4669-
sample_t *samples = malloc(n * sizeof(sample_t));
4721+
uint32_t n = 2;
4722+
sample_t samples[2] = { { 0, 0.0 }, { 0, 0.0 } };
46704723
msp_t msp;
4671-
gsl_rng *rng = safe_rng_alloc();
46724724
tsk_table_collection_t tables;
46734725

4674-
CU_ASSERT_FATAL(samples != NULL);
4675-
memset(samples, 0, n * sizeof(sample_t));
4676-
/* All samples in pop 0 at time 0 (already zeroed). */
4677-
4678-
gsl_rng_set(rng, 3940783591UL);
4726+
/* Script:
4727+
* [0] = 0.5 -> gsl_ran_exponential for the CA wait time (any
4728+
* positive value < 1 yields a finite wait).
4729+
* [1] = 0.0 -> gsl_ran_flat(rng, 0, num_pairs) inside
4730+
* msp_smc_k_common_ancestor_event returns 0,
4731+
* making remaining_mass == x_hull->count exactly.
4732+
* Subsequent draws fall through to the 0.5 default. */
4733+
static const double script[] = { 0.5, 0.0 };
4734+
scripted_rng_state_t rng_state
4735+
= { .call_count = 0, .u_values = script, .n_u_values = 2 };
4736+
gsl_rng rng = { .type = &scripted_rng_type, .state = &rng_state };
46794737

4680-
/* sequence_length = 2e7, num_populations = 1 */
4681-
ret = build_sim(&msp, &tables, rng, 2e7, 1, samples, n);
4738+
ret = build_sim(&msp, &tables, &rng, 100.0, 1, samples, n);
46824739
CU_ASSERT_EQUAL_FATAL(ret, 0);
46834740

4684-
ret = msp_set_recombination_rate(&msp, 1e-8);
4741+
ret = msp_set_simulation_model_smc_k(&msp, 1.0);
46854742
CU_ASSERT_EQUAL_FATAL(ret, 0);
46864743

4687-
/* pop 0: initial_size=3600, growth_rate=-0.03 */
4688-
ret = msp_set_population_configuration(&msp, 0, 3600.0, -0.03, true);
4744+
ret = msp_initialise(&msp);
46894745
CU_ASSERT_EQUAL_FATAL(ret, 0);
46904746

4691-
/* At t=30, change pop 0 to initial_size=10000, growth=0 */
4692-
ret = msp_add_population_parameters_change(&msp, 30.0, 0, 10000.0, 0.0);
4747+
/* One event is enough to coalesce the only pair. Pre-fix this aborts
4748+
* via tsk_bug_assert; post-fix it returns 0. */
4749+
ret = msp_run(&msp, DBL_MAX, 1);
4750+
CU_ASSERT_EQUAL(ret, 0);
4751+
CU_ASSERT_TRUE(msp_is_completed(&msp));
4752+
4753+
msp_verify(&msp, 0);
4754+
4755+
ret = msp_free(&msp);
4756+
CU_ASSERT_EQUAL(ret, 0);
4757+
tsk_table_collection_free(&tables);
4758+
}
4759+
4760+
/* Sibling test for the SMCk clamp in a non-trivial state. The base
4761+
* test_smc_k_zero_random_mass triggers the clamp on the very first CA
4762+
* event of a fresh 2-sample simulation. This test exercises the same
4763+
* clamp on the SECOND CA event, after one prior coalescence has freed
4764+
* and reused hull slots, decremented insertion orders for sibling
4765+
* same-left hulls, and rearranged the hulls_left / hulls_right AVL
4766+
* trees. Pre-fix the second event aborts via tsk_bug_assert; post-fix
4767+
* both events succeed.
4768+
*
4769+
* (We tried to engineer the second event to land on a hull with
4770+
* count > 1 via the fenwick zero-skip path, but `fenwick_find` always
4771+
* stops at the FIRST non-zero slot — and in an all-overlapping setup
4772+
* any non-zero "predecessor" slot blocks the path to a higher-count
4773+
* hull. So the second event also exercises count == 1, just from a
4774+
* post-event state rather than the initial state.) */
4775+
static void
4776+
test_smc_k_zero_random_mass_chained(void)
4777+
{
4778+
int ret;
4779+
uint32_t n = 4;
4780+
sample_t samples[4]
4781+
= { { 0, 0.0 }, { 0, 0.0 }, { 0, 0.0 }, { 0, 0.0 } };
4782+
msp_t msp;
4783+
tsk_table_collection_t tables;
4784+
4785+
/* Script:
4786+
* [0] = 0.5 -> event 1 gsl_ran_exponential, finite wait.
4787+
* [1] = 0.0833 -> event 1 gsl_ran_flat: random_mass = 0.0833 * 6 ≈ 0.5.
4788+
* Selects a hull whose count is non-zero; walk back
4789+
* coalesces with one of its matching predecessors.
4790+
* [2] = 0.5 -> event 2 gsl_ran_exponential, finite wait.
4791+
* [3] = 0.0 -> event 2 gsl_ran_flat: random_mass = 0. The clamp
4792+
* at lib/msprime.c:7691-7700 fires. */
4793+
static const double script[] = { 0.5, 0.0833, 0.5, 0.0 };
4794+
scripted_rng_state_t rng_state
4795+
= { .call_count = 0, .u_values = script, .n_u_values = 4 };
4796+
gsl_rng rng = { .type = &scripted_rng_type, .state = &rng_state };
4797+
4798+
ret = build_sim(&msp, &tables, &rng, 100.0, 1, samples, n);
46934799
CU_ASSERT_EQUAL_FATAL(ret, 0);
46944800

4695-
/* SMC_K with hull_offset=1 (k=1) */
4696-
ret = msp_set_simulation_model_smc_k(&msp, 1.0);
4801+
/* hull_offset = 0 keeps hull right == lineage tail->right. */
4802+
ret = msp_set_simulation_model_smc_k(&msp, 0.0);
46974803
CU_ASSERT_EQUAL_FATAL(ret, 0);
46984804

46994805
ret = msp_initialise(&msp);
47004806
CU_ASSERT_EQUAL_FATAL(ret, 0);
47014807

4702-
ret = msp_run(&msp, DBL_MAX, ULONG_MAX);
4703-
CU_ASSERT_EQUAL(ret, 0);
4704-
CU_ASSERT_TRUE(msp_is_completed(&msp));
4808+
/* Two CA events, hitting the max_events cap. Pre-fix the second
4809+
* event aborts via tsk_bug_assert; post-fix msp_run returns
4810+
* MSP_EXIT_MAX_EVENTS and we've coalesced 2 pairs (4 -> 2 ancestors). */
4811+
ret = msp_run(&msp, DBL_MAX, 2);
4812+
CU_ASSERT_EQUAL(ret, MSP_EXIT_MAX_EVENTS);
4813+
CU_ASSERT_EQUAL(msp_get_num_ancestors(&msp), 2);
47054814

47064815
msp_verify(&msp, 0);
47074816

47084817
ret = msp_free(&msp);
47094818
CU_ASSERT_EQUAL(ret, 0);
4710-
gsl_rng_free(rng);
4711-
free(samples);
47124819
tsk_table_collection_free(&tables);
47134820
}
47144821

@@ -4827,7 +4934,9 @@ main(int argc, char **argv)
48274934
{ "test_mixed_model_smc_k_large", test_mixed_model_smc_k_large },
48284935
{ "test_fenwick_rebuild_smc_k", test_fenwick_rebuild_smc_k },
48294936
{ "test_smc_k_gc", test_smc_k_gc },
4830-
{ "test_smc_k_bug_repro", test_smc_k_bug_repro },
4937+
{ "test_smc_k_zero_random_mass", test_smc_k_zero_random_mass },
4938+
{ "test_smc_k_zero_random_mass_chained",
4939+
test_smc_k_zero_random_mass_chained },
48314940
CU_TEST_INFO_NULL,
48324941
};
48334942

0 commit comments

Comments
 (0)