@@ -5668,3 +5668,233 @@ def test_different_node_flags(self):
56685668 result = ts .sample_nodes_by_ploidy (2 )
56695669 assert result .shape == (1 , 2 )
56705670 assert_array_equal (result , np .array ([[0 , 2 ]]))
5671+
5672+
5673+ class TestMapToVcfModel :
5674+ def test_no_individuals_default_ploidy (self ):
5675+ ts = tskit .Tree .generate_balanced (4 ).tree_sequence
5676+ assert ts .num_individuals == 0
5677+
5678+ # Default ploidy should be 1
5679+ result = ts .map_to_vcf_model ()
5680+ assert isinstance (result , tskit .VcfModelMapping )
5681+ assert result .individuals_nodes .shape == (4 , 1 )
5682+ for i in range (4 ):
5683+ assert result .individuals_nodes [i , 0 ] == i
5684+ assert result .individuals_name .shape == (4 ,)
5685+ for i in range (4 ):
5686+ assert result .individuals_name [i ] == f"tsk_{ i } "
5687+
5688+ def test_no_individuals_custom_ploidy (self ):
5689+ ts = tskit .Tree .generate_balanced (6 ).tree_sequence
5690+ assert ts .num_individuals == 0
5691+
5692+ # Use ploidy = 2
5693+ result = ts .map_to_vcf_model (ploidy = 2 )
5694+ assert isinstance (result , tskit .VcfModelMapping )
5695+ assert result .individuals_nodes .shape == (3 , 2 )
5696+ for i in range (3 ):
5697+ assert result .individuals_nodes [i , 0 ] == i * 2
5698+ assert result .individuals_nodes [i , 1 ] == i * 2 + 1
5699+ assert result .individuals_name .shape == (3 ,)
5700+ for i in range (3 ):
5701+ assert result .individuals_name [i ] == f"tsk_{ i } "
5702+
5703+ def test_no_individuals_uneven_ploidy (self ):
5704+ ts = tskit .Tree .generate_balanced (5 ).tree_sequence
5705+ # This tree sequence has no individuals
5706+ assert ts .num_individuals == 0
5707+
5708+ # 5 samples cannot be evenly divided into ploidy=2
5709+ with pytest .raises (ValueError , match = "not a multiple" ):
5710+ ts .map_to_vcf_model (ploidy = 2 )
5711+
5712+ def test_with_individuals (self ):
5713+ ts = msprime .sim_ancestry (
5714+ 5 ,
5715+ random_seed = 42 ,
5716+ )
5717+ result = ts .map_to_vcf_model ()
5718+ assert isinstance (result , tskit .VcfModelMapping )
5719+ assert result .individuals_nodes .shape == (5 , 2 )
5720+ assert np .array_equal (
5721+ result .individuals_nodes ,
5722+ np .array ([[0 , 1 ], [2 , 3 ], [4 , 5 ], [6 , 7 ], [8 , 9 ]]),
5723+ )
5724+ assert result .individuals_name .shape == (5 ,)
5725+ for i in range (5 ):
5726+ assert result .individuals_name [i ] == f"tsk_{ i } "
5727+
5728+ def test_with_individuals_and_ploidy_error (self ):
5729+ tables = tskit .TableCollection (1.0 )
5730+ tables .individuals .add_row ()
5731+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5732+ ts = tables .tree_sequence ()
5733+
5734+ with pytest .raises (ValueError , match = "Cannot specify ploidy when individuals" ):
5735+ ts .map_to_vcf_model (ploidy = 2 )
5736+
5737+ def test_specific_individuals (self ):
5738+ tables = tskit .TableCollection (1.0 )
5739+ # Create 5 individuals with varying ploidy
5740+ for i in range (5 ):
5741+ tables .individuals .add_row ()
5742+ # Individuals have ploidy i+1
5743+ for _ in range (i + 1 ):
5744+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = i )
5745+ ts = tables .tree_sequence ()
5746+
5747+ result = ts .map_to_vcf_model (individuals = [1 , 3 ])
5748+ assert isinstance (result , tskit .VcfModelMapping )
5749+ # Individual 1 has ploidy 2, individual 3 has ploidy 4
5750+ assert result .individuals_nodes .shape == (2 , 4 )
5751+
5752+ assert result .individuals_nodes [0 , 0 ] == 1
5753+ assert result .individuals_nodes [0 , 1 ] == 2
5754+ assert result .individuals_nodes [0 , 2 ] == - 1
5755+ assert result .individuals_nodes [0 , 3 ] == - 1
5756+
5757+ assert result .individuals_nodes [1 , 0 ] == 6
5758+ assert result .individuals_nodes [1 , 1 ] == 7
5759+ assert result .individuals_nodes [1 , 2 ] == 8
5760+ assert result .individuals_nodes [1 , 3 ] == 9
5761+
5762+ assert result .individuals_name .shape == (2 ,)
5763+ assert result .individuals_name [0 ] == "tsk_1"
5764+ assert result .individuals_name [1 ] == "tsk_3"
5765+
5766+ def test_individual_with_no_nodes_warning (self ):
5767+ tables = tskit .TableCollection (1.0 )
5768+ # Individual with no nodes
5769+ tables .individuals .add_row ()
5770+ # Individual with nodes
5771+ tables .individuals .add_row ()
5772+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5773+ ts = tables .tree_sequence ()
5774+
5775+ with warnings .catch_warnings (record = True ) as w :
5776+ result = ts .map_to_vcf_model ()
5777+ assert len (w ) == 1
5778+ assert "Individual 0 has no nodes" in str (w [0 ].message )
5779+
5780+ # Should only include individual 1
5781+ assert result .individuals_nodes .shape == (1 , 1 )
5782+ assert result .individuals_nodes [0 , 0 ] == 0
5783+ assert result .individuals_name .shape == (1 ,)
5784+ assert result .individuals_name [0 ] == "tsk_1"
5785+
5786+ def test_individual_with_no_nodes_error (self ):
5787+ tables = tskit .TableCollection (1.0 )
5788+ # Individual with no nodes
5789+ tables .individuals .add_row ()
5790+ # Individual with nodes
5791+ tables .individuals .add_row ()
5792+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5793+ ts = tables .tree_sequence ()
5794+
5795+ with pytest .raises (ValueError , match = "Individual 0 has no nodes" ):
5796+ ts .map_to_vcf_model (individuals = [0 ])
5797+
5798+ def test_invalid_individual_id (self ):
5799+ tables = tskit .TableCollection (1.0 )
5800+ tables .individuals .add_row ()
5801+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5802+ ts = tables .tree_sequence ()
5803+
5804+ with pytest .raises (ValueError , match = "Invalid individual ID" ):
5805+ ts .map_to_vcf_model (individuals = [- 1 ])
5806+
5807+ with pytest .raises (ValueError , match = "Invalid individual ID" ):
5808+ ts .map_to_vcf_model (individuals = [1 ])
5809+
5810+ def test_mixed_sample_non_sample_warning (self ):
5811+ tables = tskit .TableCollection (1.0 )
5812+ tables .individuals .add_row ()
5813+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5814+ tables .nodes .add_row (flags = 0 , time = 0 , individual = 0 ) # Non-sample node
5815+ ts = tables .tree_sequence ()
5816+
5817+ with warnings .catch_warnings (record = True ) as w :
5818+ ts .map_to_vcf_model ()
5819+ assert len (w ) == 1
5820+ assert "Individual 0 has both sample and non-sample nodes" in str (
5821+ w [0 ].message
5822+ )
5823+
5824+ def test_samples_without_individuals_warning (self ):
5825+ tables = tskit .TableCollection (1.0 )
5826+ tables .individuals .add_row ()
5827+ # Node with individual
5828+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5829+ # Node without individual
5830+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = tskit .NULL )
5831+ ts = tables .tree_sequence ()
5832+
5833+ with warnings .catch_warnings (record = True ) as w :
5834+ ts .map_to_vcf_model ()
5835+ assert len (w ) == 1
5836+ assert "At least one sample node does not have an individual ID" in str (
5837+ w [0 ].message
5838+ )
5839+
5840+ def test_metadata_key_for_names (self ):
5841+ tables = tskit .TableCollection (1.0 )
5842+
5843+ # Add individuals with metadata
5844+ tables .individuals .metadata_schema = tskit .MetadataSchema (
5845+ {
5846+ "codec" : "json" ,
5847+ "type" : "object" ,
5848+ "properties" : {"name" : {"type" : "string" }},
5849+ }
5850+ )
5851+ tables .individuals .add_row (metadata = {"name" : "ind1" })
5852+ tables .individuals .add_row (metadata = {"name" : "ind2" })
5853+
5854+ # Add nodes
5855+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5856+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5857+ ts = tables .tree_sequence ()
5858+
5859+ result = ts .map_to_vcf_model (name_metadata_key = "name" )
5860+ assert result .individuals_name .shape == (2 ,)
5861+ assert result .individuals_name [0 ] == "ind1"
5862+ assert result .individuals_name [1 ] == "ind2"
5863+
5864+ def test_custom_individual_names (self ):
5865+ tables = tskit .TableCollection (1.0 )
5866+ tables .individuals .add_row ()
5867+ tables .individuals .add_row ()
5868+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5869+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5870+ ts = tables .tree_sequence ()
5871+
5872+ custom_names = ["individual_A" , "individual_B" ]
5873+ result = ts .map_to_vcf_model (individual_names = custom_names )
5874+ assert result .individuals_name .shape == (2 ,)
5875+ assert result .individuals_name [0 ] == "individual_A"
5876+ assert result .individuals_name [1 ] == "individual_B"
5877+
5878+ def test_name_conflict_error (self ):
5879+ tables = tskit .TableCollection (1.0 )
5880+ ts = tables .tree_sequence ()
5881+ with pytest .raises (
5882+ ValueError ,
5883+ match = "Cannot specify both name_metadata_key and individual_names" ,
5884+ ):
5885+ ts .map_to_vcf_model (
5886+ name_metadata_key = "name" , individual_names = ["custom_name" ]
5887+ )
5888+
5889+ def test_name_count_mismatch_error (self ):
5890+ tables = tskit .TableCollection (1.0 )
5891+ tables .individuals .add_row ()
5892+ tables .individuals .add_row ()
5893+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5894+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5895+ ts = tables .tree_sequence ()
5896+
5897+ with pytest .raises (
5898+ ValueError , match = "number of individuals does not match the number of names"
5899+ ):
5900+ ts .map_to_vcf_model (individual_names = ["only_one_name" ])
0 commit comments