@@ -6432,20 +6432,21 @@ def write_vcf(
64326432 :ref:`sec_export_vcf_constructing_gt` section for more details
64336433 and examples.
64346434
6435- If individuals that are associated with sample nodes are defined in the
6435+ If individuals are defined in the
64366436 data model (see :ref:`sec_individual_table_definition`), the genotypes
6437- for each of the individual's samples are combined into a phased
6438- multiploid values at each site. By default, all individuals associated
6439- with only sample nodes are included in increasing order of individual ID.
6437+ for each of the individual's nodes are combined into a phased
6438+ multiploid values at each site. By default, all individuals are
6439+ included with their sample nodes, individuals with no nodes are
6440+ omitted. The ``include_non_sample_nodes`` argument can be used to
6441+ included non-sample nodes in the output VCF.
64406442
64416443 Subsets or permutations of the sample individuals may be specified
6442- using the ``individuals`` argument. It is an error to specify any
6443- individuals that are not associated with any nodes.
6444+ using the ``individuals`` argument.
64446445
64456446 Mixed-sample individuals (e.g., those associated with one node
64466447 that is a sample and another that is not) in the data model will
6447- be ignored by default. However, such individuals can be
6448- excluded using the ``individuals `` argument.
6448+ only have the sample nodes output by default. However, non-sample
6449+ nodes can be included using the ``include_non_sample_nodes `` argument.
64496450
64506451 If there are no individuals in the tree sequence,
64516452 synthetic individuals are created by combining adjacent samples, and
@@ -6546,6 +6547,8 @@ def write_vcf(
65466547 output to the VCF, otherwise if one is present an error will be raised.
65476548 The VCF spec does not allow for sites at position 0. However, in practise
65486549 many tools will be fine with this. Default: False.
6550+ :param bool include_non_sample_nodes: If True, include non-sample nodes
6551+ in the output VCF. By default, only sample nodes are included.
65496552 """
65506553 if allow_position_zero is None :
65516554 allow_position_zero = False
@@ -10545,20 +10548,21 @@ def map_to_vcf_model(
1054510548 mapping as a 2D array of (individuals, nodes) and the individual names. The
1054610549 mapping is created by first checking if the tree sequence contains individuals.
1054710550 If it does, the mapping is created using the individuals in the tree sequence.
10548- If it does not, the mapping is created using the sample nodes and the
10549- specified ploidy.
10551+ Be default only the sample nodes of the individuals are included in the mapping,
10552+ unless `include_non_sample_nodes` is set to True, in which case all nodes
10553+ belonging to the individuals are included. Any individuals without any nodes
10554+ are ignored and not included in the mapping.
10555+ If no individuals are present, the mapping is created using only the sample nodes
10556+ and the specified ploidy.
1055010557
1055110558 If neither `name_metadata_key` nor `individual_names` is not specified, the
1055210559 individual names are set to "tsk_{individual_id}" for each individual.
1055310560
10554- Warnings are emmitted if any sample nodes do not have an individual ID, or if
10555- individuals are not specified and the tree sequence contains individuals
10556- that have no nodes associated with them, or individuals have a mix of sample
10557- and non-sample nodes.
10561+ Warnings are emmitted if any sample nodes do not have an individual ID.
1055810562
1055910563 :param list individuals: Specific individual IDs to include in the VCF. If not
10560- specified and the tree sequence contains individuals, all individuals whose
10561- nodes all have the flag NODE_IS_SAMPLE set are included .
10564+ specified and the tree sequence contains individuals, all individuals are
10565+ included at least one node .
1056210566 :param int ploidy: The ploidy, or number of nodes per individual. Only used when
1056310567 the tree sequence does not contain individuals. Cannot be used if the tree
1056410568 sequence contains individuals. Defaults to 1 if not specified.
@@ -10567,6 +10571,9 @@ def map_to_vcf_model(
1056710571 individual_names.
1056810572 :param list individual_names: The names to use for each individual. Cannot
1056910573 be specified simultaneously with name_metadata_key.
10574+ :param bool include_non_sample_nodes: If True, include all nodes belonging to
10575+ the individuals in the mapping. If False, only include sample nodes.
10576+ Deafults to False.
1057010577 :return: A VcfModelMapping containing the node-to-individual mapping and
1057110578 individual names.
1057210579 :raises ValueError: If both name_metadata_key and individual_names are specified,
@@ -10587,6 +10594,12 @@ def map_to_vcf_model(
1058710594 "Cannot specify ploidy when individuals are present in the tree sequence"
1058810595 )
1058910596
10597+ if self .num_individuals == 0 and include_non_sample_nodes :
10598+ raise ValueError (
10599+ "Cannot include non-sample nodes when individuals are not present in "
10600+ "the tree sequence"
10601+ )
10602+
1059010603 if self .num_individuals > 0 and np .any (
1059110604 np .logical_and (
1059210605 self .nodes_individual == tskit .NULL ,
0 commit comments