Skip to content

Commit 7f59610

Browse files
committed
Add individuals nodes method
1 parent 5d91504 commit 7f59610

5 files changed

Lines changed: 202 additions & 0 deletions

File tree

python/CHANGELOG.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
--------------------
2+
[0.6.4] - 2025-XX-XX
3+
--------------------
4+
5+
**Features**
6+
7+
- Add ``TreeSequence.individuals_nodes`` attribute to return the nodes
8+
associated with each individual as a numpy array.
9+
(:user:`benjeffery`, :pr:`3153`)
10+
111
--------------------
212
[0.6.3] - 2025-04-28
313
--------------------

python/_tskitmodule.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8725,6 +8725,61 @@ TreeSequence_get_individuals_time(TreeSequence *self)
87258725
return ret;
87268726
}
87278727

8728+
static PyObject *
8729+
TreeSequence_get_individuals_nodes(TreeSequence *self)
8730+
{
8731+
PyObject *ret = NULL;
8732+
PyArrayObject *ret_array = NULL;
8733+
npy_intp dims[2];
8734+
tsk_size_t ploidy;
8735+
tsk_size_t max_ploidy = 0;
8736+
tsk_id_t *node_mem;
8737+
tsk_size_t j;
8738+
tsk_size_t num_individuals;
8739+
tsk_id_t *const *individual_nodes;
8740+
const tsk_size_t *individual_nodes_length;
8741+
8742+
if (TreeSequence_check_state(self) != 0) {
8743+
goto out;
8744+
}
8745+
8746+
num_individuals = tsk_treeseq_get_num_individuals(self->tree_sequence);
8747+
individual_nodes = self->tree_sequence->individual_nodes;
8748+
individual_nodes_length = self->tree_sequence->individual_nodes_length;
8749+
8750+
for (tsk_id_t i = 0; i < (tsk_id_t) num_individuals; i++) {
8751+
ploidy = individual_nodes_length[i];
8752+
if (ploidy > max_ploidy) {
8753+
max_ploidy = ploidy;
8754+
}
8755+
}
8756+
8757+
dims[0] = (npy_intp) num_individuals;
8758+
dims[1] = (npy_intp) max_ploidy;
8759+
ret_array = (PyArrayObject *) PyArray_SimpleNew(2, dims, NPY_INT32);
8760+
if (ret_array == NULL) {
8761+
goto out;
8762+
}
8763+
8764+
/* Fill with -1 (TSK_NULL) */
8765+
node_mem = (tsk_id_t *) PyArray_DATA(ret_array);
8766+
memset(node_mem, 0xFF, PyArray_NBYTES(ret_array));
8767+
8768+
for (tsk_id_t i = 0; i < (tsk_id_t) num_individuals; i++) {
8769+
ploidy = individual_nodes_length[i];
8770+
for (j = 0; j < ploidy; j++) {
8771+
node_mem[i * max_ploidy + j] = individual_nodes[i][j];
8772+
}
8773+
}
8774+
8775+
ret = (PyObject *) ret_array;
8776+
ret_array = NULL;
8777+
8778+
out:
8779+
Py_XDECREF(ret_array);
8780+
return ret;
8781+
}
8782+
87288783
static PyObject *
87298784
TreeSequence_genealogical_nearest_neighbours(
87308785
TreeSequence *self, PyObject *args, PyObject *kwds)
@@ -11425,6 +11480,10 @@ static PyMethodDef TreeSequence_methods[] = {
1142511480
.ml_meth = (PyCFunction) TreeSequence_get_individuals_time,
1142611481
.ml_flags = METH_NOARGS,
1142711482
.ml_doc = "Returns the vector of per-individual times." },
11483+
{ .ml_name = "get_individuals_nodes",
11484+
.ml_meth = (PyCFunction) TreeSequence_get_individuals_nodes,
11485+
.ml_flags = METH_NOARGS,
11486+
.ml_doc = "Returns an array of the node ids for each individual" },
1142811487
{ .ml_name = "genealogical_nearest_neighbours",
1142911488
.ml_meth = (PyCFunction) TreeSequence_genealogical_nearest_neighbours,
1143011489
.ml_flags = METH_VARARGS | METH_KEYWORDS,

python/tests/test_highlevel.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5491,3 +5491,96 @@ def test_error_if_no_schema(self, table_name):
54915491
ts = msprime.simulate(10)
54925492
with pytest.raises(NotImplementedError):
54935493
getattr(ts, f"{table_name}_metadata")
5494+
5495+
5496+
class TestIndividualsNodes:
5497+
def test_basic_individuals_nodes(self, tmp_path):
5498+
# Create a basic tree sequence with two individuals
5499+
tables = tskit.TableCollection(sequence_length=100)
5500+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"")
5501+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"")
5502+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5503+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5504+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)
5505+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)
5506+
ts = tables.tree_sequence()
5507+
5508+
result = ts.individuals_nodes
5509+
assert result.shape == (2, 2)
5510+
assert_array_equal(result, [[0, 1], [2, 3]])
5511+
5512+
def test_variable_ploidy(self, tmp_path):
5513+
tables = tskit.TableCollection(sequence_length=100)
5514+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"") # Diploid
5515+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"") # Haploid
5516+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"") # Triploid
5517+
5518+
# Diploid individual
5519+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5520+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5521+
5522+
# Haploid individual
5523+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)
5524+
5525+
# Triploid individual
5526+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=2)
5527+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=2)
5528+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=2)
5529+
5530+
ts = tables.tree_sequence()
5531+
5532+
result = ts.individuals_nodes
5533+
5534+
assert result.shape == (3, 3)
5535+
5536+
expected = np.array(
5537+
[[0, 1, -1], [2, -1, -1], [3, 4, 5]] # Diploid # Haploid # Triploid
5538+
)
5539+
assert_array_equal(result, expected)
5540+
5541+
def test_no_individuals(self):
5542+
tables = tskit.TableCollection(sequence_length=100)
5543+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
5544+
ts = tables.tree_sequence()
5545+
5546+
result = ts.individuals_nodes
5547+
expected = np.array([], dtype=np.int32).reshape(0, 0)
5548+
assert result.shape == (0, 0)
5549+
assert_array_equal(result, expected)
5550+
5551+
def test_no_nodes_with_individuals(self):
5552+
tables = tskit.TableCollection(sequence_length=100)
5553+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"")
5554+
# Node without individual reference
5555+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
5556+
ts = tables.tree_sequence()
5557+
5558+
result = ts.individuals_nodes
5559+
expected = np.array([[]])
5560+
assert result.shape == (1, 0)
5561+
assert_array_equal(result, expected)
5562+
5563+
def test_individual_with_no_nodes(self):
5564+
tables = tskit.TableCollection(sequence_length=100)
5565+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"")
5566+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"")
5567+
# Only add nodes for first individual
5568+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5569+
ts = tables.tree_sequence()
5570+
5571+
result = ts.individuals_nodes
5572+
expected = np.array([[0], [-1]])
5573+
assert result.shape == (2, 1)
5574+
assert_array_equal(result, expected)
5575+
5576+
def test_mixed_sample_status(self):
5577+
tables = tskit.TableCollection(sequence_length=100)
5578+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"")
5579+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5580+
tables.nodes.add_row(flags=0, time=0, individual=0)
5581+
ts = tables.tree_sequence()
5582+
5583+
result = ts.individuals_nodes
5584+
expected = np.array([[0, 1]])
5585+
assert result.shape == (1, 2)
5586+
assert_array_equal(result, expected)

python/tests/test_lowlevel.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1949,6 +1949,31 @@ def test_array_lifetime(self, name, ts_fixture):
19491949
a2[:] = 0
19501950
assert a3 is not a2
19511951

1952+
def test_individuals_nodes(self, ts_fixture):
1953+
ts_fixture = ts_fixture.ll_tree_sequence
1954+
1955+
# Properties
1956+
a = ts_fixture.get_individuals_nodes()
1957+
assert a.flags.aligned
1958+
assert a.flags.c_contiguous
1959+
assert a.flags.owndata
1960+
b = ts_fixture.get_individuals_nodes()
1961+
assert a is not b
1962+
assert np.all(a == b)
1963+
1964+
# Lifetime
1965+
a1 = ts_fixture.get_individuals_nodes()
1966+
a2 = a1.copy()
1967+
assert a1 is not a2
1968+
del ts_fixture
1969+
# Do some memory operations
1970+
a3 = np.ones(10**6)
1971+
assert np.all(a1 == a2)
1972+
del a1
1973+
# Just do something to touch memory
1974+
a2[:] = 0
1975+
assert a3 is not a2
1976+
19521977

19531978
class StatsInterfaceMixin:
19541979
"""

python/tskit/trees.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4123,6 +4123,7 @@ def __init__(self, ll_tree_sequence):
41234123
self._individuals_time = None
41244124
self._individuals_population = None
41254125
self._individuals_location = None
4126+
self._individuals_nodes = None
41264127
# NOTE: when we've implemented read-only access via the underlying
41274128
# tables we can replace these arrays with reference to the read-only
41284129
# tables here (and remove the low-level boilerplate).
@@ -5814,6 +5815,20 @@ def individuals_metadata(self):
58145815
self._individuals_metadata
58155816
)
58165817

5818+
@property
5819+
def individuals_nodes(self):
5820+
"""
5821+
Return an array of node IDs for each individual in the tree sequence.
5822+
5823+
:return: Array of shape (num_individuals, max_ploidy) containing node IDs.
5824+
Values of -1 indicate unused slots for individuals with ploidy
5825+
less than the maximum.
5826+
:rtype: numpy.ndarray (dtype=np.int32)
5827+
"""
5828+
if self._individuals_nodes is None:
5829+
self._individuals_nodes = self._ll_tree_sequence.get_individuals_nodes()
5830+
return self._individuals_nodes
5831+
58175832
@property
58185833
def nodes_metadata(self):
58195834
"""

0 commit comments

Comments
 (0)