Skip to content

Commit cd129aa

Browse files
committed
enable span_normalise for genetic_relatedness_vector; closes #3241
1 parent f43ab1f commit cd129aa

4 files changed

Lines changed: 98 additions & 14 deletions

File tree

c/tests/test_stats.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2068,7 +2068,7 @@ test_empty_genetic_relatedness_vector(void)
20682068
int ret;
20692069
tsk_treeseq_t ts;
20702070
tsk_size_t num_samples;
2071-
double *weights, *result;
2071+
double *weights, *result, *result2;
20722072
tsk_size_t j;
20732073
tsk_size_t num_weights = 2;
20742074
double windows[] = { 0, 0 };
@@ -2079,6 +2079,7 @@ test_empty_genetic_relatedness_vector(void)
20792079
windows[1] = tsk_treeseq_get_sequence_length(&ts);
20802080
weights = tsk_malloc(num_weights * num_samples * sizeof(double));
20812081
result = tsk_malloc(num_weights * num_samples * sizeof(double));
2082+
result2 = tsk_malloc(num_weights * num_samples * sizeof(double));
20822083
for (j = 0; j < num_samples; j++) {
20832084
weights[j] = 1.0;
20842085
}
@@ -2099,10 +2100,17 @@ test_empty_genetic_relatedness_vector(void)
20992100
ret = tsk_treeseq_genetic_relatedness_vector(
21002101
&ts, num_weights, weights, 1, windows, num_samples, ts.samples, result, 0);
21012102
CU_ASSERT_EQUAL_FATAL(ret, 0);
2103+
ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 1, windows,
2104+
num_samples, ts.samples, result2, TSK_STAT_SPAN_NORMALISE);
2105+
CU_ASSERT_EQUAL_FATAL(ret, 0);
2106+
for (j = 0; j < num_samples * num_weights; j++) {
2107+
CU_ASSERT_EQUAL_FATAL(result[j] / (windows[1] - windows[0]), result2[j]);
2108+
}
21022109

21032110
tsk_treeseq_free(&ts);
21042111
free(weights);
21052112
free(result);
2113+
free(result2);
21062114
}
21072115

21082116
static void

c/tskit/trees.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10771,6 +10771,9 @@ tsk_matvec_calculator_run(tsk_matvec_calculator_t *self)
1077110771
tsk_matvec_calculator_print_state(self, tsk_get_debug_stream());
1077210772
}
1077310773
}
10774+
if (!!(self->options & TSK_STAT_SPAN_NORMALISE)) {
10775+
span_normalise(self->num_windows, windows, out_size, self->result);
10776+
}
1077410777

1077510778
/* out: */
1077610779
return ret;
@@ -10807,6 +10810,9 @@ tsk_treeseq_genetic_relatedness_vector(const tsk_treeseq_t *self, tsk_size_t num
1080710810
tsk_matvec_calculator_print_state(&calc, tsk_get_debug_stream());
1080810811
}
1080910812
ret = tsk_matvec_calculator_run(&calc);
10813+
if (ret != 0) {
10814+
goto out;
10815+
}
1081010816
out:
1081110817
tsk_matvec_calculator_free(&calc);
1081210818
return ret;

python/CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@
5959
(ensuring that parent mutations occur before children), then node, then
6060
their original order in the tables. (:user:`benjeffery`, :pr:`3257`, :issue:`3253`)
6161

62+
- Fix bug in ``TreeSequence.genetic_relatedness_vector`` that previously ignored
63+
``span_normalise``: previously, ``span_normalise`` was always set to ``False``;
64+
now the default is ``True`` in agreement with other statistics, so the returned
65+
values will change. (:user:`petrelharp`, :pr:`3300`, :issue:`3241`)
66+
6267
- Fix bug in ``TreeSequence.pair_coalescence_counts`` when ``span_normalise=True``
6368
and a window breakpoint falls within an internal missing interval.
6469
(:user:`nspope`, :pr:`3176`, :issue:`3175`)

python/tests/test_relatedness_vector.py

Lines changed: 78 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def __init__(
5757
verbosity=0,
5858
internal_checks=False,
5959
centre=True,
60+
span_normalise=True,
6061
):
6162
self.sample_weights = np.asarray(sample_weights, dtype=np.float64)
6263
self.num_weights = self.sample_weights.shape[1]
@@ -80,6 +81,7 @@ def __init__(
8081
self.verbosity = verbosity
8182
self.internal_checks = internal_checks
8283
self.centre = centre
84+
self.span_normalise = span_normalise
8385

8486
if self.centre:
8587
self.sample_weights -= np.mean(self.sample_weights, axis=0)
@@ -285,6 +287,12 @@ def run(self):
285287
if self.centre:
286288
for m in range(num_windows):
287289
out[m] -= np.mean(out[m], axis=0)
290+
291+
if self.span_normalise:
292+
window_lengths = np.diff(self.windows)
293+
for m in range(num_windows):
294+
out[m] /= window_lengths[m]
295+
288296
return out
289297

290298

@@ -326,7 +334,7 @@ def relatedness_vector(ts, sample_weights, windows=None, nodes=None, **kwargs):
326334
return out
327335

328336

329-
def relatedness_matrix(ts, windows, centre, nodes=None):
337+
def relatedness_matrix(ts, windows, centre, nodes=None, span_normalise=True):
330338
if nodes is None:
331339
keep_rows = np.arange(ts.num_samples)
332340
keep_cols = np.arange(ts.num_samples)
@@ -356,7 +364,7 @@ def relatedness_matrix(ts, windows, centre, nodes=None):
356364
indexes=[(i, j) for i in range(ts.num_samples) for j in range(ts.num_samples)],
357365
windows=use_windows,
358366
mode="branch",
359-
span_normalise=False,
367+
span_normalise=span_normalise,
360368
proportion=False,
361369
centre=centre,
362370
)
@@ -375,7 +383,15 @@ def relatedness_matrix(ts, windows, centre, nodes=None):
375383

376384

377385
def verify_relatedness_vector(
378-
ts, w, windows, *, internal_checks=False, verbosity=0, centre=True, nodes=None
386+
ts,
387+
w,
388+
windows,
389+
*,
390+
internal_checks=False,
391+
verbosity=0,
392+
centre=True,
393+
nodes=None,
394+
span_normalise=True,
379395
):
380396
R1 = relatedness_vector(
381397
ts,
@@ -385,18 +401,26 @@ def verify_relatedness_vector(
385401
verbosity=verbosity,
386402
centre=centre,
387403
nodes=nodes,
404+
span_normalise=span_normalise,
388405
)
389406
nrows = ts.num_samples if nodes is None else len(nodes)
390407
wvec = w if len(w.shape) > 1 else w[:, np.newaxis]
391-
Sigma = relatedness_matrix(ts, windows=windows, centre=centre, nodes=nodes)
408+
Sigma = relatedness_matrix(
409+
ts, windows=windows, centre=centre, nodes=nodes, span_normalise=span_normalise
410+
)
392411
if windows is None:
393412
R2 = Sigma.dot(wvec)
394413
else:
395414
R2 = np.zeros((len(windows) - 1, nrows, wvec.shape[1]))
396415
for k in range(len(windows) - 1):
397416
R2[k] = Sigma[k].dot(wvec)
398417
R3 = ts.genetic_relatedness_vector(
399-
w, windows=windows, mode="branch", centre=centre, nodes=nodes
418+
w,
419+
windows=windows,
420+
mode="branch",
421+
centre=centre,
422+
nodes=nodes,
423+
span_normalise=span_normalise,
400424
)
401425
if verbosity > 0:
402426
print(ts.draw_text())
@@ -425,6 +449,7 @@ def check_relatedness_vector(
425449
verbosity=0,
426450
seed=123,
427451
centre=True,
452+
span_normalise=True,
428453
do_nodes=True,
429454
):
430455
rng = np.random.default_rng(seed=seed)
@@ -568,8 +593,9 @@ def test_good_nodes(self):
568593
@pytest.mark.parametrize("n", [2, 3, 5])
569594
@pytest.mark.parametrize("seed", range(1, 4))
570595
@pytest.mark.parametrize("centre", (True, False))
596+
@pytest.mark.parametrize("span_normalise", (True, False))
571597
@pytest.mark.parametrize("num_windows", (0, 1, 2, 3))
572-
def test_small_internal_checks(self, n, seed, centre, num_windows):
598+
def test_small_internal_checks(self, n, seed, centre, span_normalise, num_windows):
573599
ts = msprime.sim_ancestry(
574600
n,
575601
ploidy=1,
@@ -579,14 +605,19 @@ def test_small_internal_checks(self, n, seed, centre, num_windows):
579605
)
580606
assert ts.num_trees >= 2
581607
check_relatedness_vector(
582-
ts, num_windows=num_windows, internal_checks=True, centre=centre
608+
ts,
609+
num_windows=num_windows,
610+
internal_checks=True,
611+
centre=centre,
612+
span_normalise=span_normalise,
583613
)
584614

585615
@pytest.mark.parametrize("n", [2, 3, 5, 15])
586616
@pytest.mark.parametrize("seed", range(1, 5))
587617
@pytest.mark.parametrize("centre", (True, False))
618+
@pytest.mark.parametrize("span_normalise", (True, False))
588619
@pytest.mark.parametrize("num_windows", (0, 1, 2, 3))
589-
def test_simple_sims(self, n, seed, centre, num_windows):
620+
def test_simple_sims(self, n, seed, centre, span_normalise, num_windows):
590621
ts = msprime.sim_ancestry(
591622
n,
592623
ploidy=1,
@@ -597,7 +628,11 @@ def test_simple_sims(self, n, seed, centre, num_windows):
597628
)
598629
assert ts.num_trees >= 2
599630
check_relatedness_vector(
600-
ts, num_windows=num_windows, centre=centre, verbosity=0
631+
ts,
632+
num_windows=num_windows,
633+
centre=centre,
634+
verbosity=0,
635+
span_normalise=span_normalise,
601636
)
602637

603638
def test_simple_sims_windows(self):
@@ -613,18 +648,42 @@ def test_simple_sims_windows(self):
613648
assert ts.num_trees >= 2
614649
W = np.linspace(0, 1, 2 * ts.num_samples).reshape((ts.num_samples, 2))
615650
kwargs = {"centre": False, "mode": "branch"}
616-
total = ts.genetic_relatedness_vector(W, **kwargs)
651+
total = ts.genetic_relatedness_vector(W, span_normalise=False, **kwargs)
617652
for windows in [[0, L], [0, L / 3, L / 2, L]]:
618-
pieces = ts.genetic_relatedness_vector(W, windows=windows, **kwargs)
653+
pieces = ts.genetic_relatedness_vector(
654+
W, windows=windows, span_normalise=False, **kwargs
655+
)
619656
np.testing.assert_allclose(total, pieces.sum(axis=0), atol=1e-13)
620657
assert len(pieces) == len(windows) - 1
621658
for k in range(len(pieces)):
622659
piece = ts.genetic_relatedness_vector(
623-
W, windows=windows[k : k + 2], **kwargs
660+
W, windows=windows[k : k + 2], span_normalise=False, **kwargs
624661
)
625662
assert piece.shape[0] == 1
626663
np.testing.assert_allclose(piece[0], pieces[k], atol=1e-13)
627664

665+
@pytest.mark.parametrize("mode", ("branch",))
666+
def test_simple_span_normalise(self, mode):
667+
ts = msprime.sim_ancestry(
668+
4,
669+
ploidy=1,
670+
population_size=20,
671+
sequence_length=100,
672+
recombination_rate=0.01,
673+
random_seed=123,
674+
)
675+
assert ts.num_trees >= 2
676+
x = np.linspace(0, 1, ts.num_samples)
677+
w = np.linspace(0, ts.sequence_length, 11)[[0, 3, 4, -1]]
678+
a = ts.genetic_relatedness_vector(x, windows=w, mode=mode) # default is True
679+
ax = ts.genetic_relatedness_vector(x, windows=w, mode=mode, span_normalise=True)
680+
assert np.all(ax == a)
681+
b = ts.genetic_relatedness_vector(x, windows=w, span_normalise=False, mode=mode)
682+
assert a.shape == b.shape
683+
dw = np.diff(w)
684+
for aa, bb, ww in zip(a, b, dw, strict=True):
685+
assert np.allclose(aa * ww, bb)
686+
628687
@pytest.mark.parametrize("n", [2, 3, 5, 15])
629688
@pytest.mark.parametrize("centre", (True, False))
630689
def test_single_balanced_tree(self, n, centre):
@@ -680,10 +739,16 @@ def test_missing_flanks(self, centre, num_windows):
680739

681740
@pytest.mark.parametrize("ts", get_example_tree_sequences())
682741
@pytest.mark.parametrize("centre", (True, False))
683-
def test_suite_examples(self, ts, centre):
742+
def test_suite_examples_centre(self, ts, centre):
684743
if ts.num_samples > 0:
685744
check_relatedness_vector(ts, centre=centre)
686745

746+
@pytest.mark.parametrize("ts", get_example_tree_sequences())
747+
@pytest.mark.parametrize("span_normalise", (True, False))
748+
def test_suite_examples_span_normalise(self, ts, span_normalise):
749+
if ts.num_samples > 0:
750+
check_relatedness_vector(ts, span_normalise=span_normalise)
751+
687752
@pytest.mark.parametrize("n", [2, 3, 10])
688753
def test_dangling_on_samples(self, n):
689754
# Adding non sample branches below the samples does not alter

0 commit comments

Comments
 (0)