@@ -5668,3 +5668,245 @@ def test_different_node_flags(self):
56685668 result = ts .sample_nodes_by_ploidy (2 )
56695669 assert result .shape == (1 , 2 )
56705670 assert_array_equal (result , np .array ([[0 , 2 ]]))
5671+
5672+
5673+ class TestMapToVcfModel :
5674+ def test_no_individuals_default_ploidy (self ):
5675+ ts = tskit .Tree .generate_balanced (4 ).tree_sequence
5676+ assert ts .num_individuals == 0
5677+
5678+ # Default ploidy should be 1
5679+ result = ts .map_to_vcf_model ()
5680+ assert isinstance (result , tskit .VcfModelMapping )
5681+ assert result .individuals_nodes .shape == (4 , 1 )
5682+ for i in range (4 ):
5683+ assert result .individuals_nodes [i , 0 ] == i
5684+ assert result .individuals_name .shape == (4 ,)
5685+ for i in range (4 ):
5686+ assert result .individuals_name [i ] == f"tsk_{ i } "
5687+
5688+ with pytest .raises (
5689+ ValueError ,
5690+ match = "Cannot include non-sample nodes when individuals are not present" ,
5691+ ):
5692+ ts .map_to_vcf_model (include_non_sample_nodes = True )
5693+
5694+ def test_no_individuals_custom_ploidy (self ):
5695+ ts = tskit .Tree .generate_balanced (6 ).tree_sequence
5696+ assert ts .num_individuals == 0
5697+
5698+ # Use ploidy = 2
5699+ result = ts .map_to_vcf_model (ploidy = 2 )
5700+ assert isinstance (result , tskit .VcfModelMapping )
5701+ assert result .individuals_nodes .shape == (3 , 2 )
5702+ for i in range (3 ):
5703+ assert result .individuals_nodes [i , 0 ] == i * 2
5704+ assert result .individuals_nodes [i , 1 ] == i * 2 + 1
5705+ assert result .individuals_name .shape == (3 ,)
5706+ for i in range (3 ):
5707+ assert result .individuals_name [i ] == f"tsk_{ i } "
5708+
5709+ def test_no_individuals_uneven_ploidy (self ):
5710+ ts = tskit .Tree .generate_balanced (5 ).tree_sequence
5711+ # This tree sequence has no individuals
5712+ assert ts .num_individuals == 0
5713+
5714+ # 5 samples cannot be evenly divided into ploidy=2
5715+ with pytest .raises (ValueError , match = "not a multiple" ):
5716+ ts .map_to_vcf_model (ploidy = 2 )
5717+
5718+ def test_with_individuals (self ):
5719+ ts = msprime .sim_ancestry (
5720+ 5 ,
5721+ random_seed = 42 ,
5722+ )
5723+ result = ts .map_to_vcf_model ()
5724+ assert isinstance (result , tskit .VcfModelMapping )
5725+ assert result .individuals_nodes .shape == (5 , 2 )
5726+ assert np .array_equal (
5727+ result .individuals_nodes ,
5728+ np .array ([[0 , 1 ], [2 , 3 ], [4 , 5 ], [6 , 7 ], [8 , 9 ]]),
5729+ )
5730+ assert result .individuals_name .shape == (5 ,)
5731+ for i in range (5 ):
5732+ assert result .individuals_name [i ] == f"tsk_{ i } "
5733+
5734+ def test_with_individuals_and_ploidy_error (self ):
5735+ tables = tskit .TableCollection (1.0 )
5736+ tables .individuals .add_row ()
5737+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5738+ ts = tables .tree_sequence ()
5739+
5740+ with pytest .raises (ValueError , match = "Cannot specify ploidy when individuals" ):
5741+ ts .map_to_vcf_model (ploidy = 2 )
5742+
5743+ def test_specific_individuals (self ):
5744+ tables = tskit .TableCollection (1.0 )
5745+ # Create 5 individuals with varying ploidy
5746+ for i in range (5 ):
5747+ tables .individuals .add_row ()
5748+ # Individuals have ploidy i+1
5749+ for _ in range (i + 1 ):
5750+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = i )
5751+ ts = tables .tree_sequence ()
5752+
5753+ result = ts .map_to_vcf_model (individuals = [1 , 3 ])
5754+ assert isinstance (result , tskit .VcfModelMapping )
5755+ # Individual 1 has ploidy 2, individual 3 has ploidy 4
5756+ assert result .individuals_nodes .shape == (2 , 5 )
5757+ assert np .array_equal (result .individuals_nodes [0 ], [1 , 2 , - 1 , - 1 , - 1 ])
5758+ assert np .array_equal (result .individuals_nodes [1 ], [6 , 7 , 8 , 9 , - 1 ])
5759+
5760+ assert result .individuals_name .shape == (2 ,)
5761+ assert result .individuals_name [0 ] == "tsk_1"
5762+ assert result .individuals_name [1 ] == "tsk_3"
5763+
5764+ def test_individual_with_no_nodes (self ):
5765+ tables = tskit .TableCollection (1.0 )
5766+ # Individual with no nodes
5767+ tables .individuals .add_row ()
5768+ # Individual with nodes
5769+ tables .individuals .add_row ()
5770+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5771+ ts = tables .tree_sequence ()
5772+
5773+ result = ts .map_to_vcf_model ()
5774+ assert result .individuals_nodes .shape == (2 , 1 )
5775+ assert np .array_equal (result .individuals_nodes , [[- 1 ], [0 ]])
5776+
5777+ def test_individual_with_no_nodes_only (self ):
5778+ tables = tskit .TableCollection (1.0 )
5779+ # Individual with no nodes
5780+ tables .individuals .add_row ()
5781+ # Individual with nodes
5782+ tables .individuals .add_row ()
5783+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5784+ ts = tables .tree_sequence ()
5785+
5786+ result = ts .map_to_vcf_model (individuals = [0 ])
5787+ assert result .individuals_nodes .shape == (1 , 1 )
5788+ assert np .array_equal (result .individuals_nodes , [[- 1 ]])
5789+
5790+ def test_invalid_individual_id (self ):
5791+ tables = tskit .TableCollection (1.0 )
5792+ tables .individuals .add_row ()
5793+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5794+ ts = tables .tree_sequence ()
5795+
5796+ with pytest .raises (ValueError , match = "Invalid individual ID" ):
5797+ ts .map_to_vcf_model (individuals = [- 1 ])
5798+
5799+ with pytest .raises (ValueError , match = "Invalid individual ID" ):
5800+ ts .map_to_vcf_model (individuals = [1 ])
5801+
5802+ def test_mixed_sample_non_sample_ordering (self ):
5803+ tables = tskit .TableCollection (1.0 )
5804+ tables .individuals .add_row ()
5805+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5806+ tables .nodes .add_row (flags = 0 , time = 0 , individual = 0 ) # Non-sample node
5807+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5808+ tables .nodes .add_row (flags = 0 , time = 0 , individual = 0 ) # Non-sample node
5809+ tables .individuals .add_row ()
5810+ tables .nodes .add_row (flags = 0 , time = 0 , individual = 1 ) # Non-sample node
5811+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5812+ ts = tables .tree_sequence ()
5813+
5814+ result = ts .map_to_vcf_model ()
5815+ assert result .individuals_nodes .shape == (2 , 4 )
5816+ assert np .array_equal (
5817+ result .individuals_nodes ,
5818+ np .array ([[0 , 2 , - 1 , - 1 ], [5 , - 1 , - 1 , - 1 ]]),
5819+ )
5820+
5821+ result = ts .map_to_vcf_model (include_non_sample_nodes = True )
5822+ assert result .individuals_nodes .shape == (2 , 4 )
5823+ assert np .array_equal (
5824+ result .individuals_nodes ,
5825+ np .array ([[0 , 1 , 2 , 3 ], [4 , 5 , - 1 , - 1 ]]),
5826+ )
5827+
5828+ def test_samples_without_individuals_warning (self ):
5829+ tables = tskit .TableCollection (1.0 )
5830+ tables .individuals .add_row ()
5831+ # Node with individual
5832+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5833+ # Node without individual
5834+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = tskit .NULL )
5835+ ts = tables .tree_sequence ()
5836+
5837+ with warnings .catch_warnings (record = True ) as w :
5838+ ts .map_to_vcf_model ()
5839+ assert len (w ) == 1
5840+ assert "At least one sample node does not have an individual ID" in str (
5841+ w [0 ].message
5842+ )
5843+
5844+ def test_metadata_key_for_names (self ):
5845+ tables = tskit .TableCollection (1.0 )
5846+
5847+ # Add individuals with metadata
5848+ tables .individuals .metadata_schema = tskit .MetadataSchema (
5849+ {
5850+ "codec" : "json" ,
5851+ "type" : "object" ,
5852+ "properties" : {"name" : {"type" : "string" }},
5853+ }
5854+ )
5855+ tables .individuals .add_row (metadata = {"name" : "ind1" })
5856+ tables .individuals .add_row (metadata = {"name" : "ind2" })
5857+
5858+ # Add nodes
5859+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5860+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5861+ ts = tables .tree_sequence ()
5862+
5863+ result = ts .map_to_vcf_model (name_metadata_key = "name" )
5864+ assert result .individuals_name .shape == (2 ,)
5865+ assert result .individuals_name [0 ] == "ind1"
5866+ assert result .individuals_name [1 ] == "ind2"
5867+
5868+ def test_custom_individual_names (self ):
5869+ tables = tskit .TableCollection (1.0 )
5870+ tables .individuals .add_row ()
5871+ tables .individuals .add_row ()
5872+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5873+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5874+ ts = tables .tree_sequence ()
5875+
5876+ custom_names = ["individual_A" , "individual_B" ]
5877+ result = ts .map_to_vcf_model (individual_names = custom_names )
5878+ assert result .individuals_name .shape == (2 ,)
5879+ assert result .individuals_name [0 ] == "individual_A"
5880+ assert result .individuals_name [1 ] == "individual_B"
5881+
5882+ def test_name_conflict_error (self ):
5883+ tables = tskit .TableCollection (1.0 )
5884+ ts = tables .tree_sequence ()
5885+ with pytest .raises (
5886+ ValueError ,
5887+ match = "Cannot specify both name_metadata_key and individual_names" ,
5888+ ):
5889+ ts .map_to_vcf_model (
5890+ name_metadata_key = "name" , individual_names = ["custom_name" ]
5891+ )
5892+
5893+ def test_name_count_mismatch_error (self ):
5894+ tables = tskit .TableCollection (1.0 )
5895+ tables .individuals .add_row ()
5896+ tables .individuals .add_row ()
5897+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5898+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5899+ ts = tables .tree_sequence ()
5900+
5901+ with pytest .raises (
5902+ ValueError , match = "number of individuals does not match the number of names"
5903+ ):
5904+ ts .map_to_vcf_model (individual_names = ["only_one_name" ])
5905+
5906+ def test_all_individuals_no_nodes (self ):
5907+ tables = tskit .TableCollection (1.0 )
5908+ tables .individuals .add_row ()
5909+ tables .individuals .add_row ()
5910+ ts = tables .tree_sequence ()
5911+ result = ts .map_to_vcf_model ()
5912+ assert result .individuals_nodes .shape == (2 , 0 )
0 commit comments