Skip to content

Commit f77a799

Browse files
committed
Add individual nodes method
1 parent b6d7eab commit f77a799

2 files changed

Lines changed: 127 additions & 0 deletions

File tree

python/tests/test_highlevel.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5491,3 +5491,91 @@ def test_error_if_no_schema(self, table_name):
54915491
ts = msprime.simulate(10)
54925492
with pytest.raises(NotImplementedError):
54935493
getattr(ts, f"{table_name}_metadata")
5494+
5495+
5496+
class TestIndividualNodes:
5497+
def test_basic_individual_nodes(self, tmp_path):
5498+
# Create a basic tree sequence with two individuals
5499+
tables = tskit.TableCollection(sequence_length=100)
5500+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"")
5501+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"")
5502+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5503+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5504+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)
5505+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)
5506+
ts = tables.tree_sequence()
5507+
5508+
result = ts.individual_nodes()
5509+
assert result.shape == (2, 2)
5510+
assert np.array_equal(result, [[0, 1], [2, 3]])
5511+
5512+
def test_variable_ploidy(self, tmp_path):
5513+
tables = tskit.TableCollection(sequence_length=100)
5514+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"") # Diploid
5515+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"") # Haploid
5516+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"") # Triploid
5517+
5518+
# Diploid individual
5519+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5520+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5521+
5522+
# Haploid individual
5523+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=1)
5524+
5525+
# Triploid individual
5526+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=2)
5527+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=2)
5528+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=2)
5529+
5530+
ts = tables.tree_sequence()
5531+
5532+
result = ts.individual_nodes()
5533+
5534+
assert result.shape == (3, 3)
5535+
5536+
expected = np.array(
5537+
[[0, 1, -1], [2, -1, -1], [3, 4, 5]] # Diploid # Haploid # Triploid
5538+
)
5539+
assert np.array_equal(result, expected)
5540+
5541+
def test_no_individuals(self):
5542+
tables = tskit.TableCollection(sequence_length=100)
5543+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
5544+
ts = tables.tree_sequence()
5545+
5546+
with pytest.raises(ValueError, match="Tree sequence has no individuals"):
5547+
_ = ts.individual_nodes()
5548+
5549+
def test_no_samples_with_individuals(self):
5550+
tables = tskit.TableCollection(sequence_length=100)
5551+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"")
5552+
# Node without individual reference
5553+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
5554+
ts = tables.tree_sequence()
5555+
5556+
with pytest.raises(ValueError, match="No nodes refer to individuals"):
5557+
_ = ts.individual_nodes()
5558+
5559+
def test_individual_with_no_nodes(self):
5560+
tables = tskit.TableCollection(sequence_length=100)
5561+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"")
5562+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"")
5563+
# Only add nodes for first individual
5564+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5565+
ts = tables.tree_sequence()
5566+
5567+
with pytest.raises(
5568+
ValueError, match="Individual 1 not associated with any nodes"
5569+
):
5570+
_ = ts.individual_nodes()
5571+
5572+
def test_mixed_sample_status(self):
5573+
tables = tskit.TableCollection(sequence_length=100)
5574+
tables.individuals.add_row(flags=0, location=(0, 0), metadata=b"")
5575+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, individual=0)
5576+
tables.nodes.add_row(flags=0, time=0, individual=0)
5577+
ts = tables.tree_sequence()
5578+
with pytest.raises(
5579+
ValueError, match="has nodes that are sample and non-samples"
5580+
):
5581+
_ = ts.individual_nodes()

python/tskit/trees.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10476,6 +10476,45 @@ def ld_matrix(
1047610476
mode=mode,
1047710477
)
1047810478

10479+
def individual_nodes(self):
10480+
"""
10481+
Return an array of node IDs for each individual in the tree sequence.
10482+
Errors if the tree sequence has no individuals, if any sample doesn't
10483+
have an individual, if individuals have nodes that are both samples
10484+
and non-samples, or if an individual has no samples.
10485+
10486+
:return: Array of shape (num_individuals, max_ploidy) containing node IDs.
10487+
Values of -1 indicate unused slots for individuals with ploidy
10488+
less than the maximum.
10489+
:rtype: numpy.ndarray (dtype=np.int32)
10490+
"""
10491+
if self.num_individuals == 0:
10492+
raise ValueError("Tree sequence has no individuals")
10493+
10494+
max_ploidy = 0
10495+
for i in range(self.num_individuals):
10496+
ind = self.individual(i)
10497+
max_ploidy = max(max_ploidy, len(ind.nodes))
10498+
if max_ploidy == 0:
10499+
raise ValueError("No nodes refer to individuals")
10500+
10501+
# Initialize output array with -1 (indicating no node)
10502+
result = np.full((self.num_individuals, max_ploidy), -1, dtype=np.int32)
10503+
10504+
for i in range(self.num_individuals):
10505+
ind = self.individual(i)
10506+
if len(ind.nodes) == 0:
10507+
raise ValueError(f"Individual {i} not associated with any nodes")
10508+
is_sample = {self.node(u).is_sample() for u in ind.nodes}
10509+
if len(is_sample) != 1:
10510+
raise ValueError(
10511+
f"Individual {ind.id} has nodes that are sample and non-samples"
10512+
)
10513+
for j, node_id in enumerate(ind.nodes):
10514+
result[i, j] = node_id
10515+
10516+
return result
10517+
1047910518
############################################
1048010519
#
1048110520
# Deprecated APIs. These are either already unsupported, or will be unsupported in a

0 commit comments

Comments
 (0)