Skip to content

Commit 7a49bc6

Browse files
authored
Merge pull request #1802 from nikbaya/update-climb-pedigrees
Update setup of WF pedigree model for table-based approach
2 parents 48046a1 + 37c0ba8 commit 7a49bc6

3 files changed

Lines changed: 50 additions & 84 deletions

File tree

lib/msprime.c

Lines changed: 47 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,7 @@ msp_free_pedigree(msp_t *self)
844844
for (i = 0; i < self->pedigree->num_inds; i++) {
845845
msp_safe_free(ind->parents);
846846
msp_safe_free(ind->segments);
847+
msp_safe_free(ind->nodes);
847848
ind++;
848849
}
849850
}
@@ -2122,7 +2123,8 @@ alloc_individual(individual_t *ind, size_t ploidy)
21222123
// Better to allocate these as a block?
21232124
ind->parents = malloc(ploidy * sizeof(individual_t *));
21242125
ind->segments = malloc(ploidy * sizeof(avl_tree_t));
2125-
if (ind->parents == NULL || ind->segments == NULL) {
2126+
ind->nodes = malloc(ploidy * sizeof(*ind->nodes));
2127+
if (ind->parents == NULL || ind->segments == NULL || ind->nodes == NULL) {
21262128
ret = MSP_ERR_NO_MEMORY;
21272129
goto out;
21282130
}
@@ -2223,45 +2225,6 @@ msp_alloc_pedigree(msp_t *self, size_t num_inds, size_t ploidy)
22232225
return ret;
22242226
}
22252227

2226-
/* TODO merge this method into where it's called - we don't really need these
2227-
* arrays any more. */
2228-
static int MSP_WARN_UNUSED
2229-
msp_set_pedigree(msp_t *self, tsk_id_t *parents, double *times, tsk_flags_t *is_sample)
2230-
{
2231-
size_t i, j;
2232-
tsk_id_t parent_ix;
2233-
tsk_flags_t sample_flag;
2234-
size_t sample_num;
2235-
individual_t *ind = NULL;
2236-
2237-
tsk_bug_assert(self->pedigree != NULL);
2238-
2239-
ind = self->pedigree->inds;
2240-
sample_num = 0;
2241-
for (i = 0; i < self->pedigree->num_inds; i++) {
2242-
ind = &self->pedigree->inds[i];
2243-
ind->id = (tsk_id_t) i;
2244-
ind->time = times[i];
2245-
2246-
// Link individuals to parents
2247-
for (j = 0; j < self->ploidy; j++) {
2248-
parent_ix = parents[i * self->ploidy + j];
2249-
if (parent_ix != TSK_NULL) {
2250-
ind->parents[j] = &self->pedigree->inds[parent_ix];
2251-
}
2252-
}
2253-
// Set samples
2254-
sample_flag = is_sample[i];
2255-
if (sample_flag != 0) {
2256-
tsk_bug_assert(sample_flag == 1);
2257-
self->pedigree->samples[sample_num] = ind;
2258-
sample_num++;
2259-
}
2260-
}
2261-
self->pedigree->num_samples = sample_num;
2262-
return 0;
2263-
}
2264-
22652228
static void
22662229
msp_check_samples(msp_t *self)
22672230
{
@@ -7230,13 +7193,20 @@ msp_set_simulation_model_wf_ped(msp_t *self)
72307193
{
72317194
int ret;
72327195
tsk_size_t j, k;
7196+
tsk_treeseq_t ts;
72337197
tsk_size_t num_individuals = self->tables->individuals.num_rows;
7234-
tsk_id_t *parents = NULL;
7235-
const tsk_id_t *ind_parents;
7236-
tsk_individual_t ind;
7237-
tsk_node_t node;
7238-
double *time = NULL;
7239-
tsk_flags_t *is_sample = NULL;
7198+
tsk_individual_t ind_obj;
7199+
individual_t *ind;
7200+
tsk_node_t node_obj;
7201+
tsk_id_t node_id;
7202+
tsk_size_t sample_num = 0;
7203+
bool sample_flag;
7204+
7205+
ret = tsk_treeseq_init(&ts, self->tables, TSK_BUILD_INDEXES);
7206+
if (ret != 0) {
7207+
ret = msp_set_tsk_error(ret);
7208+
goto out;
7209+
}
72407210

72417211
/* TODO better error codes here */
72427212
if (self->ploidy != 2) {
@@ -7253,52 +7223,47 @@ msp_set_simulation_model_wf_ped(msp_t *self)
72537223
goto out;
72547224
}
72557225

7256-
parents = malloc(num_individuals * self->ploidy * sizeof(*parents));
7257-
time = malloc(num_individuals * sizeof(*time));
7258-
is_sample = malloc(num_individuals * sizeof(*is_sample));
7259-
if (parents == NULL || time == NULL || is_sample == NULL) {
7260-
ret = MSP_ERR_NO_MEMORY;
7261-
goto out;
7262-
}
7263-
72647226
for (j = 0; j < num_individuals; j++) {
7265-
ret = tsk_individual_table_get_row(
7266-
&self->tables->individuals, (tsk_id_t) j, &ind);
7227+
ret = tsk_treeseq_get_individual(&ts, (tsk_id_t) j, &ind_obj);
72677228
tsk_bug_assert(ret == 0);
7268-
ind_parents = ind.parents;
7269-
tsk_bug_assert(ind.parents_length == self->ploidy);
7229+
ind = &self->pedigree->inds[j];
7230+
ind->id = (tsk_id_t) j;
7231+
tsk_bug_assert(ind_obj.parents_length == self->ploidy);
72707232
for (k = 0; k < self->ploidy; k++) {
7271-
if (ind_parents[k] < TSK_NULL
7272-
|| ind_parents[k] >= (tsk_id_t) num_individuals) {
7233+
if (ind_obj.parents[k] < TSK_NULL
7234+
|| ind_obj.parents[k] >= (tsk_id_t) num_individuals) {
72737235
ret = TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS;
72747236
goto out;
72757237
}
7276-
parents[j * self->ploidy + k] = ind_parents[k];
7238+
if (ind_obj.parents[k] != TSK_NULL) {
7239+
ind->parents[k] = &self->pedigree->inds[ind_obj.parents[k]];
7240+
}
7241+
}
7242+
sample_flag = false;
7243+
7244+
for (k = 0; k < ind_obj.nodes_length; k++) {
7245+
// ret = tsk_treeseq_get_node(&ts, ind_obj.nodes[k], &node_obj);
7246+
node_id
7247+
= (int) j * (int) ind_obj.nodes_length
7248+
+ (int)
7249+
k; // ugly way to access each node associated with an individual
7250+
ret = tsk_treeseq_get_node(&ts, node_id, &node_obj);
7251+
assert(ret == 0);
7252+
assert(node_obj.individual == (tsk_id_t) j);
7253+
ind->time = node_obj.time;
7254+
ind->nodes[k] = node_obj.id;
7255+
sample_flag = node_obj.flags & TSK_NODE_IS_SAMPLE;
7256+
}
7257+
7258+
if (sample_flag) {
7259+
self->pedigree->samples[sample_num] = ind;
7260+
sample_num++;
72777261
}
72787262
}
7279-
/* Every individual should have ploidy nodes associated with it in the
7280-
* tables. We get the time from there - we're not really doing any error
7281-
* checking here, just getting it sort-of working for now. */
7282-
for (j = 0; j < self->tables->nodes.num_rows; j++) {
7283-
ret = tsk_node_table_get_row(&self->tables->nodes, (tsk_id_t) j, &node);
7284-
tsk_bug_assert(ret == 0);
7285-
tsk_bug_assert(node.individual != TSK_NULL);
7286-
is_sample[node.individual] = !!(node.flags & TSK_NODE_IS_SAMPLE);
7287-
time[node.individual] = node.time;
7288-
}
7289-
/* JK: This next method isn't really needed, I'm just calling into
7290-
* the existing code to minimise changes at this point. The pedigree
7291-
* code needs a full going over to take the new table-based approach
7292-
* into account */
7293-
ret = msp_set_pedigree(self, parents, time, is_sample);
7294-
if (ret != 0) {
7295-
goto out;
7296-
}
7263+
self->pedigree->num_samples = sample_num;
72977264
ret = msp_set_simulation_model(self, MSP_MODEL_WF_PED);
72987265
out:
7299-
msp_safe_free(parents);
7300-
msp_safe_free(time);
7301-
msp_safe_free(is_sample);
7266+
tsk_treeseq_free(&ts);
73027267
return ret;
73037268
}
73047269

lib/msprime.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ typedef struct individual_t_t {
121121
int sex;
122122
double time;
123123
bool queued;
124+
tsk_id_t *nodes;
124125
// For debugging, to ensure we only merge once.
125126
bool merged;
126127
} individual_t;

lib/tests/test_pedigrees.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,15 +176,15 @@ test_pedigree_errors(void)
176176
parents[0] = -2;
177177
ret = build_pedigree_sim(
178178
&msp, &tables, rng, 100, ploidy, num_inds, parents, time, is_sample);
179-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
179+
CU_ASSERT_EQUAL_FATAL(ret, msp_set_tsk_error(TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS));
180180
ret = msp_initialise(&msp);
181181
tsk_table_collection_free(&tables);
182182
msp_free(&msp);
183183

184184
parents[0] = 100;
185185
ret = build_pedigree_sim(
186186
&msp, &tables, rng, 100, ploidy, num_inds, parents, time, is_sample);
187-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
187+
CU_ASSERT_EQUAL_FATAL(ret, msp_set_tsk_error(TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS));
188188
ret = msp_initialise(&msp);
189189
tsk_table_collection_free(&tables);
190190
msp_free(&msp);

0 commit comments

Comments
 (0)