|
1 | 1 | # MIT License |
2 | 2 | # |
3 | | -# Copyright (c) 2018-2023 Tskit Developers |
| 3 | +# Copyright (c) 2018-2025 Tskit Developers |
4 | 4 | # Copyright (c) 2016-2018 University of Oxford |
5 | 5 | # |
6 | 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy |
|
23 | 23 | """ |
24 | 24 | Test cases for tskit's file format. |
25 | 25 | """ |
26 | | -import json |
27 | 26 | import os |
28 | | -import sys |
29 | 27 | import tempfile |
30 | 28 | import unittest |
31 | 29 | import uuid as _uuid |
32 | | -from unittest import mock |
33 | 30 |
|
34 | 31 | import h5py |
35 | 32 | import kastore |
@@ -274,249 +271,17 @@ def test_format_too_old_raised_for_hdf5(self): |
274 | 271 | ): |
275 | 272 | tskit.TableCollection.load(path) |
276 | 273 |
|
277 | | - def test_msprime_v_0_5_0(self): |
278 | | - path = os.path.join(test_data_dir, "hdf5-formats", "msprime-0.5.0_v10.0.hdf5") |
279 | | - ts = tskit.load_legacy(path) |
280 | | - self.verify_tree_sequence(ts) |
281 | | - |
282 | | - def test_msprime_v_0_4_0(self): |
283 | | - path = os.path.join(test_data_dir, "hdf5-formats", "msprime-0.4.0_v3.1.hdf5") |
284 | | - ts = tskit.load_legacy(path) |
285 | | - self.verify_tree_sequence(ts) |
286 | | - |
287 | | - def test_msprime_v_0_3_0(self): |
288 | | - path = os.path.join(test_data_dir, "hdf5-formats", "msprime-0.3.0_v2.0.hdf5") |
289 | | - ts = tskit.load_legacy(path) |
290 | | - self.verify_tree_sequence(ts) |
291 | | - |
292 | 274 | def test_tskit_v_0_3_3(self): |
293 | 275 | path = os.path.join(test_data_dir, "old-formats", "tskit-0.3.3.trees") |
294 | 276 | ts = tskit.load(path) |
295 | 277 | self.verify_tree_sequence(ts) |
296 | 278 |
|
297 | 279 |
|
298 | | -class TestRoundTrip(TestFileFormat): |
299 | | - """ |
300 | | - Tests if we can round trip convert a tree sequence in memory |
301 | | - through a V2 file format and a V3 format. |
302 | | - """ |
303 | | - |
304 | | - def verify_tree_sequences_equal(self, ts, tsp, simplify=True): |
305 | | - assert ts.sequence_length == tsp.sequence_length |
306 | | - t1 = ts.dump_tables() |
307 | | - # We need to sort and squash the edges in the new format because it |
308 | | - # has gone through an edgesets representation. Simplest way to do this |
309 | | - # is to call simplify. |
310 | | - if simplify: |
311 | | - t2 = tsp.simplify().tables |
312 | | - else: |
313 | | - t2 = tsp.tables |
314 | | - assert t1.nodes == t2.nodes |
315 | | - assert t1.edges == t2.edges |
316 | | - assert t1.sites == t2.sites |
317 | | - # The old formats can't represent mutation times so null them out. |
318 | | - t1.mutations.time = np.full_like(t1.mutations.time, tskit.UNKNOWN_TIME) |
319 | | - assert t1.mutations == t2.mutations |
320 | | - |
321 | | - def verify_round_trip(self, ts, version): |
322 | | - tskit.dump_legacy(ts, self.temp_file, version=version) |
323 | | - tsp = tskit.load_legacy(self.temp_file) |
324 | | - simplify = version < 10 |
325 | | - self.verify_tree_sequences_equal(ts, tsp, simplify=simplify) |
326 | | - tsp.dump(self.temp_file) |
327 | | - tsp = tskit.load(self.temp_file) |
328 | | - self.verify_tree_sequences_equal(ts, tsp, simplify=simplify) |
329 | | - for provenance in tsp.provenances(): |
330 | | - tskit.validate_provenance(json.loads(provenance.record)) |
331 | | - |
332 | | - def verify_round_trip_no_legacy(self, ts): |
333 | | - ts.dump(self.temp_file) |
334 | | - tsp = tskit.load(self.temp_file) |
335 | | - self.verify_tree_sequences_equal(ts, tsp, simplify=False) |
336 | | - for provenance in tsp.provenances(): |
337 | | - tskit.validate_provenance(json.loads(provenance.record)) |
338 | | - |
339 | | - def verify_malformed_json_v2(self, ts, group_name, attr, bad_json): |
340 | | - tskit.dump_legacy(ts, self.temp_file, 2) |
341 | | - # Write some bad JSON to the provenance string. |
342 | | - root = h5py.File(self.temp_file, "r+") |
343 | | - group = root[group_name] |
344 | | - group.attrs[attr] = bad_json |
345 | | - root.close() |
346 | | - tsp = tskit.load_legacy(self.temp_file) |
347 | | - self.verify_tree_sequences_equal(ts, tsp) |
348 | | - |
349 | | - def test_malformed_json_v2(self): |
350 | | - ts = multi_locus_with_mutation_example() |
351 | | - for group_name in ["trees", "mutations"]: |
352 | | - for attr in ["environment", "parameters"]: |
353 | | - for bad_json in ["", "{", "{},"]: |
354 | | - self.verify_malformed_json_v2(ts, group_name, attr, bad_json) |
355 | | - |
356 | | - def test_single_locus_no_mutation(self): |
357 | | - self.verify_round_trip(single_locus_no_mutation_example(), 2) |
358 | | - self.verify_round_trip(single_locus_no_mutation_example(), 3) |
359 | | - self.verify_round_trip(single_locus_no_mutation_example(), 10) |
360 | | - |
361 | | - def test_single_locus_with_mutation(self): |
362 | | - self.verify_round_trip(single_locus_with_mutation_example(), 2) |
363 | | - self.verify_round_trip(single_locus_with_mutation_example(), 3) |
364 | | - self.verify_round_trip(single_locus_with_mutation_example(), 10) |
365 | | - |
366 | | - def test_multi_locus_with_mutation(self): |
367 | | - self.verify_round_trip(multi_locus_with_mutation_example(), 2) |
368 | | - self.verify_round_trip(multi_locus_with_mutation_example(), 3) |
369 | | - self.verify_round_trip(multi_locus_with_mutation_example(), 10) |
370 | | - |
371 | | - def test_migration_example(self): |
372 | | - self.verify_round_trip(migration_example(), 2) |
373 | | - self.verify_round_trip(migration_example(), 3) |
374 | | - self.verify_round_trip(migration_example(), 10) |
375 | | - |
376 | | - def test_bottleneck_example(self): |
377 | | - self.verify_round_trip(migration_example(), 3) |
378 | | - self.verify_round_trip(migration_example(), 10) |
379 | | - |
380 | | - def test_no_provenance(self): |
381 | | - self.verify_round_trip(no_provenance_example(), 10) |
382 | | - |
383 | | - def test_provenance_timestamp_only(self): |
384 | | - self.verify_round_trip(provenance_timestamp_only_example(), 10) |
385 | | - |
386 | | - def test_recurrent_mutation_example(self): |
387 | | - ts = recurrent_mutation_example() |
388 | | - for version in [2, 3]: |
389 | | - with pytest.raises(ValueError): |
390 | | - tskit.dump_legacy(ts, self.temp_file, version) |
391 | | - self.verify_round_trip(ts, 10) |
392 | | - |
393 | | - def test_general_mutation_example(self): |
394 | | - ts = general_mutation_example() |
395 | | - for version in [2, 3]: |
396 | | - with pytest.raises(ValueError): |
397 | | - tskit.dump_legacy(ts, self.temp_file, version) |
398 | | - self.verify_round_trip(ts, 10) |
399 | | - |
400 | | - def test_node_metadata_example(self): |
401 | | - self.verify_round_trip(node_metadata_example(), 10) |
402 | | - |
403 | | - def test_site_metadata_example(self): |
404 | | - self.verify_round_trip(site_metadata_example(), 10) |
405 | | - |
406 | | - def test_mutation_metadata_example(self): |
407 | | - self.verify_round_trip(mutation_metadata_example(), 10) |
408 | | - |
409 | | - def test_migration_metadata_example(self): |
410 | | - self.verify_round_trip(migration_metadata_example(), 10) |
411 | | - |
412 | | - def test_edge_metadata_example(self): |
413 | | - # metadata for edges was introduced |
414 | | - self.verify_round_trip_no_legacy(edge_metadata_example()) |
415 | | - |
416 | | - def test_multichar_mutation_example(self): |
417 | | - self.verify_round_trip(multichar_mutation_example(), 10) |
418 | | - |
419 | | - def test_empty_file(self): |
420 | | - tables = tskit.TableCollection(sequence_length=3) |
421 | | - self.verify_round_trip(tables.tree_sequence(), 10) |
422 | | - |
423 | | - def test_zero_edges(self): |
424 | | - tables = tskit.TableCollection(sequence_length=3) |
425 | | - tables.nodes.add_row(time=0) |
426 | | - self.verify_round_trip(tables.tree_sequence(), 10) |
427 | | - |
428 | | - def test_v2_no_samples(self): |
429 | | - ts = multi_locus_with_mutation_example() |
430 | | - tskit.dump_legacy(ts, self.temp_file, version=2) |
431 | | - root = h5py.File(self.temp_file, "r+") |
432 | | - del root["samples"] |
433 | | - root.close() |
434 | | - tsp = tskit.load_legacy(self.temp_file) |
435 | | - self.verify_tree_sequences_equal(ts, tsp) |
436 | | - |
437 | | - def test_duplicate_mutation_positions_single_value(self): |
438 | | - ts = multi_locus_with_mutation_example() |
439 | | - for version in [2, 3]: |
440 | | - tskit.dump_legacy(ts, self.temp_file, version=version) |
441 | | - root = h5py.File(self.temp_file, "r+") |
442 | | - root["mutations/position"][:] = 0 |
443 | | - root.close() |
444 | | - with pytest.raises(tskit.DuplicatePositionsError): |
445 | | - tskit.load_legacy(self.temp_file) |
446 | | - tsp = tskit.load_legacy(self.temp_file, remove_duplicate_positions=True) |
447 | | - assert tsp.num_sites == 1 |
448 | | - sites = list(tsp.sites()) |
449 | | - assert sites[0].position == 0 |
450 | | - |
451 | | - def test_duplicate_mutation_positions(self): |
452 | | - ts = multi_locus_with_mutation_example() |
453 | | - for version in [2, 3]: |
454 | | - tskit.dump_legacy(ts, self.temp_file, version=version) |
455 | | - root = h5py.File(self.temp_file, "r+") |
456 | | - position = np.array(root["mutations/position"]) |
457 | | - position[0] = position[1] |
458 | | - root["mutations/position"][:] = position |
459 | | - root.close() |
460 | | - with pytest.raises(tskit.DuplicatePositionsError): |
461 | | - tskit.load_legacy(self.temp_file) |
462 | | - tsp = tskit.load_legacy(self.temp_file, remove_duplicate_positions=True) |
463 | | - assert tsp.num_sites == position.shape[0] - 1 |
464 | | - position_after = list(s.position for s in tsp.sites()) |
465 | | - assert list(position[1:]) == position_after |
466 | | - |
467 | | - |
468 | 280 | class TestErrors(TestFileFormat): |
469 | 281 | """ |
470 | 282 | Test various API errors. |
471 | 283 | """ |
472 | 284 |
|
473 | | - def test_v2_non_binary_records(self): |
474 | | - demographic_events = [ |
475 | | - msprime.SimpleBottleneck(time=0.01, population=0, proportion=1) |
476 | | - ] |
477 | | - ts = msprime.simulate( |
478 | | - sample_size=10, demographic_events=demographic_events, random_seed=1 |
479 | | - ) |
480 | | - with pytest.raises(ValueError): |
481 | | - tskit.dump_legacy(ts, self.temp_file, 2) |
482 | | - |
483 | | - def test_unsupported_version(self): |
484 | | - ts = msprime.simulate(10) |
485 | | - with pytest.raises(ValueError): |
486 | | - tskit.dump_legacy(ts, self.temp_file, version=4) |
487 | | - # Cannot read current files. |
488 | | - ts.dump(self.temp_file) |
489 | | - # Catch Exception here because h5py throws different exceptions on py2 and py3 |
490 | | - with pytest.raises(Exception): # noqa B017 |
491 | | - tskit.load_legacy(self.temp_file) |
492 | | - |
493 | | - def test_no_version_number(self): |
494 | | - root = h5py.File(self.temp_file, "w") |
495 | | - root.attrs["x"] = 0 |
496 | | - root.close() |
497 | | - with pytest.raises(ValueError): |
498 | | - tskit.load_legacy(self.temp_file) |
499 | | - |
500 | | - def test_unknown_legacy_version(self): |
501 | | - root = h5py.File(self.temp_file, "w") |
502 | | - root.attrs["format_version"] = (1024, 0) # Arbitrary unknown version |
503 | | - root.close() |
504 | | - with pytest.raises(ValueError): |
505 | | - tskit.load_legacy(self.temp_file) |
506 | | - |
507 | | - def test_no_h5py(self): |
508 | | - ts = msprime.simulate(10) |
509 | | - path = os.path.join(test_data_dir, "hdf5-formats", "msprime-0.3.0_v2.0.hdf5") |
510 | | - msg = ( |
511 | | - "Legacy formats require h5py. Install via `pip install h5py` or" |
512 | | - " `conda install h5py`" |
513 | | - ) |
514 | | - with mock.patch.dict(sys.modules, {"h5py": None}): |
515 | | - with pytest.raises(ImportError, match=msg): |
516 | | - tskit.load_legacy(path) |
517 | | - with pytest.raises(ImportError, match=msg): |
518 | | - tskit.dump_legacy(ts, path) |
519 | | - |
520 | 285 | def test_tszip_file(self): |
521 | 286 | ts = msprime.simulate(5) |
522 | 287 | tszip.compress(ts, self.temp_file) |
|
0 commit comments