Skip to content

Commit 5b915eb

Browse files
committed
Fix up docs
1 parent a60764c commit 5b915eb

2 files changed

Lines changed: 35 additions & 16 deletions

File tree

python/tests/test_highlevel.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5685,6 +5685,12 @@ def test_no_individuals_default_ploidy(self):
56855685
for i in range(4):
56865686
assert result.individuals_name[i] == f"tsk_{i}"
56875687

5688+
with pytest.raises(
5689+
ValueError,
5690+
match="Cannot include non-sample nodes when individuals are not present",
5691+
):
5692+
ts.map_to_vcf_model(include_non_sample_nodes=True)
5693+
56885694
def test_no_individuals_custom_ploidy(self):
56895695
ts = tskit.Tree.generate_balanced(6).tree_sequence
56905696
assert ts.num_individuals == 0

python/tskit/trees.py

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6432,20 +6432,21 @@ def write_vcf(
64326432
:ref:`sec_export_vcf_constructing_gt` section for more details
64336433
and examples.
64346434
6435-
If individuals that are associated with sample nodes are defined in the
6435+
If individuals are defined in the
64366436
data model (see :ref:`sec_individual_table_definition`), the genotypes
6437-
for each of the individual's samples are combined into a phased
6438-
multiploid values at each site. By default, all individuals associated
6439-
with only sample nodes are included in increasing order of individual ID.
6437+
for each of the individual's nodes are combined into a phased
6438+
multiploid values at each site. By default, all individuals are
6439+
included with their sample nodes, individuals with no nodes are
6440+
omitted. The ``include_non_sample_nodes`` argument can be used to
6441+
included non-sample nodes in the output VCF.
64406442
64416443
Subsets or permutations of the sample individuals may be specified
6442-
using the ``individuals`` argument. It is an error to specify any
6443-
individuals that are not associated with any nodes.
6444+
using the ``individuals`` argument.
64446445
64456446
Mixed-sample individuals (e.g., those associated with one node
64466447
that is a sample and another that is not) in the data model will
6447-
be ignored by default. However, such individuals can be
6448-
excluded using the ``individuals`` argument.
6448+
only have the sample nodes output by default. However, non-sample
6449+
nodes can be included using the ``include_non_sample_nodes`` argument.
64496450
64506451
If there are no individuals in the tree sequence,
64516452
synthetic individuals are created by combining adjacent samples, and
@@ -6546,6 +6547,8 @@ def write_vcf(
65466547
output to the VCF, otherwise if one is present an error will be raised.
65476548
The VCF spec does not allow for sites at position 0. However, in practise
65486549
many tools will be fine with this. Default: False.
6550+
:param bool include_non_sample_nodes: If True, include non-sample nodes
6551+
in the output VCF. By default, only sample nodes are included.
65496552
"""
65506553
if allow_position_zero is None:
65516554
allow_position_zero = False
@@ -10545,20 +10548,21 @@ def map_to_vcf_model(
1054510548
mapping as a 2D array of (individuals, nodes) and the individual names. The
1054610549
mapping is created by first checking if the tree sequence contains individuals.
1054710550
If it does, the mapping is created using the individuals in the tree sequence.
10548-
If it does not, the mapping is created using the sample nodes and the
10549-
specified ploidy.
10551+
Be default only the sample nodes of the individuals are included in the mapping,
10552+
unless `include_non_sample_nodes` is set to True, in which case all nodes
10553+
belonging to the individuals are included. Any individuals without any nodes
10554+
are ignored and not included in the mapping.
10555+
If no individuals are present, the mapping is created using only the sample nodes
10556+
and the specified ploidy.
1055010557
1055110558
If neither `name_metadata_key` nor `individual_names` is not specified, the
1055210559
individual names are set to "tsk_{individual_id}" for each individual.
1055310560
10554-
Warnings are emmitted if any sample nodes do not have an individual ID, or if
10555-
individuals are not specified and the tree sequence contains individuals
10556-
that have no nodes associated with them, or individuals have a mix of sample
10557-
and non-sample nodes.
10561+
Warnings are emmitted if any sample nodes do not have an individual ID.
1055810562
1055910563
:param list individuals: Specific individual IDs to include in the VCF. If not
10560-
specified and the tree sequence contains individuals, all individuals whose
10561-
nodes all have the flag NODE_IS_SAMPLE set are included.
10564+
specified and the tree sequence contains individuals, all individuals are
10565+
included at least one node.
1056210566
:param int ploidy: The ploidy, or number of nodes per individual. Only used when
1056310567
the tree sequence does not contain individuals. Cannot be used if the tree
1056410568
sequence contains individuals. Defaults to 1 if not specified.
@@ -10567,6 +10571,9 @@ def map_to_vcf_model(
1056710571
individual_names.
1056810572
:param list individual_names: The names to use for each individual. Cannot
1056910573
be specified simultaneously with name_metadata_key.
10574+
:param bool include_non_sample_nodes: If True, include all nodes belonging to
10575+
the individuals in the mapping. If False, only include sample nodes.
10576+
Deafults to False.
1057010577
:return: A VcfModelMapping containing the node-to-individual mapping and
1057110578
individual names.
1057210579
:raises ValueError: If both name_metadata_key and individual_names are specified,
@@ -10587,6 +10594,12 @@ def map_to_vcf_model(
1058710594
"Cannot specify ploidy when individuals are present in the tree sequence"
1058810595
)
1058910596

10597+
if self.num_individuals == 0 and include_non_sample_nodes:
10598+
raise ValueError(
10599+
"Cannot include non-sample nodes when individuals are not present in "
10600+
"the tree sequence"
10601+
)
10602+
1059010603
if self.num_individuals > 0 and np.any(
1059110604
np.logical_and(
1059210605
self.nodes_individual == tskit.NULL,

0 commit comments

Comments
 (0)