Skip to content

Commit addbb4a

Browse files
committed
Remove unused imports and obsolete test cases related to HDF5 metadata and upgrade functionality
1 parent feeecb5 commit addbb4a

6 files changed

Lines changed: 5 additions & 957 deletions

File tree

python/tests/test_cli.py

Lines changed: 1 addition & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# MIT License
22
#
3-
# Copyright (c) 2018-2024 Tskit Developers
3+
# Copyright (c) 2018-2025 Tskit Developers
44
# Copyright (c) 2017 University of Oxford
55
#
66
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -30,7 +30,6 @@
3030
import unittest
3131
from unittest import mock
3232

33-
import h5py
3433
import msprime
3534
import pytest
3635

@@ -655,68 +654,3 @@ def test_migrations(self):
655654

656655
def test_provenances(self):
657656
self.verify("provenances")
658-
659-
660-
class TestUpgrade(TestCli):
661-
"""
662-
Tests the results of the upgrade operation to ensure they are
663-
correct.
664-
"""
665-
666-
def setUp(self):
667-
fd, self.legacy_file_name = tempfile.mkstemp(prefix="msp_cli", suffix=".trees")
668-
os.close(fd)
669-
fd, self.current_file_name = tempfile.mkstemp(prefix="msp_cli", suffix=".trees")
670-
os.close(fd)
671-
672-
def tearDown(self):
673-
os.unlink(self.legacy_file_name)
674-
os.unlink(self.current_file_name)
675-
676-
def test_conversion(self):
677-
ts1 = msprime.simulate(10)
678-
for version in [2, 3]:
679-
tskit.dump_legacy(ts1, self.legacy_file_name, version=version)
680-
stdout, stderr = capture_output(
681-
cli.tskit_main,
682-
["upgrade", self.legacy_file_name, self.current_file_name],
683-
)
684-
ts2 = tskit.load(self.current_file_name)
685-
assert stdout == ""
686-
assert stderr == ""
687-
# Quick checks to ensure we have the right tree sequence.
688-
# More thorough checks are done elsewhere.
689-
assert ts1.get_sample_size() == ts2.get_sample_size()
690-
assert ts1.num_edges == ts2.num_edges
691-
assert ts1.get_num_trees() == ts2.get_num_trees()
692-
693-
def test_duplicate_positions(self):
694-
ts = msprime.simulate(10, mutation_rate=10)
695-
for version in [2, 3]:
696-
tskit.dump_legacy(ts, self.legacy_file_name, version=version)
697-
root = h5py.File(self.legacy_file_name, "r+")
698-
root["mutations/position"][:] = 0
699-
root.close()
700-
stdout, stderr = capture_output(
701-
cli.tskit_main,
702-
["upgrade", "-d", self.legacy_file_name, self.current_file_name],
703-
)
704-
assert stdout == ""
705-
tsp = tskit.load(self.current_file_name)
706-
assert tsp.sample_size == ts.sample_size
707-
assert tsp.num_sites == 1
708-
709-
def test_duplicate_positions_error(self):
710-
ts = msprime.simulate(10, mutation_rate=10)
711-
for version in [2, 3]:
712-
tskit.dump_legacy(ts, self.legacy_file_name, version=version)
713-
root = h5py.File(self.legacy_file_name, "r+")
714-
root["mutations/position"][:] = 0
715-
root.close()
716-
with mock.patch("sys.exit", side_effect=TestException) as mocked_exit:
717-
with pytest.raises(TestException):
718-
capture_output(
719-
cli.tskit_main,
720-
["upgrade", self.legacy_file_name, self.current_file_name],
721-
)
722-
assert mocked_exit.call_count == 1

python/tests/test_file_format.py

Lines changed: 1 addition & 236 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# MIT License
22
#
3-
# Copyright (c) 2018-2023 Tskit Developers
3+
# Copyright (c) 2018-2025 Tskit Developers
44
# Copyright (c) 2016-2018 University of Oxford
55
#
66
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -23,13 +23,10 @@
2323
"""
2424
Test cases for tskit's file format.
2525
"""
26-
import json
2726
import os
28-
import sys
2927
import tempfile
3028
import unittest
3129
import uuid as _uuid
32-
from unittest import mock
3330

3431
import h5py
3532
import kastore
@@ -274,249 +271,17 @@ def test_format_too_old_raised_for_hdf5(self):
274271
):
275272
tskit.TableCollection.load(path)
276273

277-
def test_msprime_v_0_5_0(self):
278-
path = os.path.join(test_data_dir, "hdf5-formats", "msprime-0.5.0_v10.0.hdf5")
279-
ts = tskit.load_legacy(path)
280-
self.verify_tree_sequence(ts)
281-
282-
def test_msprime_v_0_4_0(self):
283-
path = os.path.join(test_data_dir, "hdf5-formats", "msprime-0.4.0_v3.1.hdf5")
284-
ts = tskit.load_legacy(path)
285-
self.verify_tree_sequence(ts)
286-
287-
def test_msprime_v_0_3_0(self):
288-
path = os.path.join(test_data_dir, "hdf5-formats", "msprime-0.3.0_v2.0.hdf5")
289-
ts = tskit.load_legacy(path)
290-
self.verify_tree_sequence(ts)
291-
292274
def test_tskit_v_0_3_3(self):
293275
path = os.path.join(test_data_dir, "old-formats", "tskit-0.3.3.trees")
294276
ts = tskit.load(path)
295277
self.verify_tree_sequence(ts)
296278

297279

298-
class TestRoundTrip(TestFileFormat):
299-
"""
300-
Tests if we can round trip convert a tree sequence in memory
301-
through a V2 file format and a V3 format.
302-
"""
303-
304-
def verify_tree_sequences_equal(self, ts, tsp, simplify=True):
305-
assert ts.sequence_length == tsp.sequence_length
306-
t1 = ts.dump_tables()
307-
# We need to sort and squash the edges in the new format because it
308-
# has gone through an edgesets representation. Simplest way to do this
309-
# is to call simplify.
310-
if simplify:
311-
t2 = tsp.simplify().tables
312-
else:
313-
t2 = tsp.tables
314-
assert t1.nodes == t2.nodes
315-
assert t1.edges == t2.edges
316-
assert t1.sites == t2.sites
317-
# The old formats can't represent mutation times so null them out.
318-
t1.mutations.time = np.full_like(t1.mutations.time, tskit.UNKNOWN_TIME)
319-
assert t1.mutations == t2.mutations
320-
321-
def verify_round_trip(self, ts, version):
322-
tskit.dump_legacy(ts, self.temp_file, version=version)
323-
tsp = tskit.load_legacy(self.temp_file)
324-
simplify = version < 10
325-
self.verify_tree_sequences_equal(ts, tsp, simplify=simplify)
326-
tsp.dump(self.temp_file)
327-
tsp = tskit.load(self.temp_file)
328-
self.verify_tree_sequences_equal(ts, tsp, simplify=simplify)
329-
for provenance in tsp.provenances():
330-
tskit.validate_provenance(json.loads(provenance.record))
331-
332-
def verify_round_trip_no_legacy(self, ts):
333-
ts.dump(self.temp_file)
334-
tsp = tskit.load(self.temp_file)
335-
self.verify_tree_sequences_equal(ts, tsp, simplify=False)
336-
for provenance in tsp.provenances():
337-
tskit.validate_provenance(json.loads(provenance.record))
338-
339-
def verify_malformed_json_v2(self, ts, group_name, attr, bad_json):
340-
tskit.dump_legacy(ts, self.temp_file, 2)
341-
# Write some bad JSON to the provenance string.
342-
root = h5py.File(self.temp_file, "r+")
343-
group = root[group_name]
344-
group.attrs[attr] = bad_json
345-
root.close()
346-
tsp = tskit.load_legacy(self.temp_file)
347-
self.verify_tree_sequences_equal(ts, tsp)
348-
349-
def test_malformed_json_v2(self):
350-
ts = multi_locus_with_mutation_example()
351-
for group_name in ["trees", "mutations"]:
352-
for attr in ["environment", "parameters"]:
353-
for bad_json in ["", "{", "{},"]:
354-
self.verify_malformed_json_v2(ts, group_name, attr, bad_json)
355-
356-
def test_single_locus_no_mutation(self):
357-
self.verify_round_trip(single_locus_no_mutation_example(), 2)
358-
self.verify_round_trip(single_locus_no_mutation_example(), 3)
359-
self.verify_round_trip(single_locus_no_mutation_example(), 10)
360-
361-
def test_single_locus_with_mutation(self):
362-
self.verify_round_trip(single_locus_with_mutation_example(), 2)
363-
self.verify_round_trip(single_locus_with_mutation_example(), 3)
364-
self.verify_round_trip(single_locus_with_mutation_example(), 10)
365-
366-
def test_multi_locus_with_mutation(self):
367-
self.verify_round_trip(multi_locus_with_mutation_example(), 2)
368-
self.verify_round_trip(multi_locus_with_mutation_example(), 3)
369-
self.verify_round_trip(multi_locus_with_mutation_example(), 10)
370-
371-
def test_migration_example(self):
372-
self.verify_round_trip(migration_example(), 2)
373-
self.verify_round_trip(migration_example(), 3)
374-
self.verify_round_trip(migration_example(), 10)
375-
376-
def test_bottleneck_example(self):
377-
self.verify_round_trip(migration_example(), 3)
378-
self.verify_round_trip(migration_example(), 10)
379-
380-
def test_no_provenance(self):
381-
self.verify_round_trip(no_provenance_example(), 10)
382-
383-
def test_provenance_timestamp_only(self):
384-
self.verify_round_trip(provenance_timestamp_only_example(), 10)
385-
386-
def test_recurrent_mutation_example(self):
387-
ts = recurrent_mutation_example()
388-
for version in [2, 3]:
389-
with pytest.raises(ValueError):
390-
tskit.dump_legacy(ts, self.temp_file, version)
391-
self.verify_round_trip(ts, 10)
392-
393-
def test_general_mutation_example(self):
394-
ts = general_mutation_example()
395-
for version in [2, 3]:
396-
with pytest.raises(ValueError):
397-
tskit.dump_legacy(ts, self.temp_file, version)
398-
self.verify_round_trip(ts, 10)
399-
400-
def test_node_metadata_example(self):
401-
self.verify_round_trip(node_metadata_example(), 10)
402-
403-
def test_site_metadata_example(self):
404-
self.verify_round_trip(site_metadata_example(), 10)
405-
406-
def test_mutation_metadata_example(self):
407-
self.verify_round_trip(mutation_metadata_example(), 10)
408-
409-
def test_migration_metadata_example(self):
410-
self.verify_round_trip(migration_metadata_example(), 10)
411-
412-
def test_edge_metadata_example(self):
413-
# metadata for edges was introduced
414-
self.verify_round_trip_no_legacy(edge_metadata_example())
415-
416-
def test_multichar_mutation_example(self):
417-
self.verify_round_trip(multichar_mutation_example(), 10)
418-
419-
def test_empty_file(self):
420-
tables = tskit.TableCollection(sequence_length=3)
421-
self.verify_round_trip(tables.tree_sequence(), 10)
422-
423-
def test_zero_edges(self):
424-
tables = tskit.TableCollection(sequence_length=3)
425-
tables.nodes.add_row(time=0)
426-
self.verify_round_trip(tables.tree_sequence(), 10)
427-
428-
def test_v2_no_samples(self):
429-
ts = multi_locus_with_mutation_example()
430-
tskit.dump_legacy(ts, self.temp_file, version=2)
431-
root = h5py.File(self.temp_file, "r+")
432-
del root["samples"]
433-
root.close()
434-
tsp = tskit.load_legacy(self.temp_file)
435-
self.verify_tree_sequences_equal(ts, tsp)
436-
437-
def test_duplicate_mutation_positions_single_value(self):
438-
ts = multi_locus_with_mutation_example()
439-
for version in [2, 3]:
440-
tskit.dump_legacy(ts, self.temp_file, version=version)
441-
root = h5py.File(self.temp_file, "r+")
442-
root["mutations/position"][:] = 0
443-
root.close()
444-
with pytest.raises(tskit.DuplicatePositionsError):
445-
tskit.load_legacy(self.temp_file)
446-
tsp = tskit.load_legacy(self.temp_file, remove_duplicate_positions=True)
447-
assert tsp.num_sites == 1
448-
sites = list(tsp.sites())
449-
assert sites[0].position == 0
450-
451-
def test_duplicate_mutation_positions(self):
452-
ts = multi_locus_with_mutation_example()
453-
for version in [2, 3]:
454-
tskit.dump_legacy(ts, self.temp_file, version=version)
455-
root = h5py.File(self.temp_file, "r+")
456-
position = np.array(root["mutations/position"])
457-
position[0] = position[1]
458-
root["mutations/position"][:] = position
459-
root.close()
460-
with pytest.raises(tskit.DuplicatePositionsError):
461-
tskit.load_legacy(self.temp_file)
462-
tsp = tskit.load_legacy(self.temp_file, remove_duplicate_positions=True)
463-
assert tsp.num_sites == position.shape[0] - 1
464-
position_after = list(s.position for s in tsp.sites())
465-
assert list(position[1:]) == position_after
466-
467-
468280
class TestErrors(TestFileFormat):
469281
"""
470282
Test various API errors.
471283
"""
472284

473-
def test_v2_non_binary_records(self):
474-
demographic_events = [
475-
msprime.SimpleBottleneck(time=0.01, population=0, proportion=1)
476-
]
477-
ts = msprime.simulate(
478-
sample_size=10, demographic_events=demographic_events, random_seed=1
479-
)
480-
with pytest.raises(ValueError):
481-
tskit.dump_legacy(ts, self.temp_file, 2)
482-
483-
def test_unsupported_version(self):
484-
ts = msprime.simulate(10)
485-
with pytest.raises(ValueError):
486-
tskit.dump_legacy(ts, self.temp_file, version=4)
487-
# Cannot read current files.
488-
ts.dump(self.temp_file)
489-
# Catch Exception here because h5py throws different exceptions on py2 and py3
490-
with pytest.raises(Exception): # noqa B017
491-
tskit.load_legacy(self.temp_file)
492-
493-
def test_no_version_number(self):
494-
root = h5py.File(self.temp_file, "w")
495-
root.attrs["x"] = 0
496-
root.close()
497-
with pytest.raises(ValueError):
498-
tskit.load_legacy(self.temp_file)
499-
500-
def test_unknown_legacy_version(self):
501-
root = h5py.File(self.temp_file, "w")
502-
root.attrs["format_version"] = (1024, 0) # Arbitrary unknown version
503-
root.close()
504-
with pytest.raises(ValueError):
505-
tskit.load_legacy(self.temp_file)
506-
507-
def test_no_h5py(self):
508-
ts = msprime.simulate(10)
509-
path = os.path.join(test_data_dir, "hdf5-formats", "msprime-0.3.0_v2.0.hdf5")
510-
msg = (
511-
"Legacy formats require h5py. Install via `pip install h5py` or"
512-
" `conda install h5py`"
513-
)
514-
with mock.patch.dict(sys.modules, {"h5py": None}):
515-
with pytest.raises(ImportError, match=msg):
516-
tskit.load_legacy(path)
517-
with pytest.raises(ImportError, match=msg):
518-
tskit.dump_legacy(ts, path)
519-
520285
def test_tszip_file(self):
521286
ts = msprime.simulate(5)
522287
tszip.compress(ts, self.temp_file)

0 commit comments

Comments
 (0)