diff --git a/.gitignore b/.gitignore index a93006ef94..626e358c0d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ build-gcc .DS_Store -python/benchmark/*.trees +python/benchmark/*.arg python/benchmark/*.json python/benchmark/*.html .venv diff --git a/c/examples/cpp_sorting_example.cpp b/c/examples/cpp_sorting_example.cpp index c9e5bf48b5..e85ffa75c6 100644 --- a/c/examples/cpp_sorting_example.cpp +++ b/c/examples/cpp_sorting_example.cpp @@ -73,7 +73,7 @@ int main(int argc, char **argv) { if (argc != 3) { - std::cerr << "Usage: " << argv[0] << " input.trees output.trees\n"; + std::cerr << "Usage: " << argv[0] << " input.arg output.arg\n"; std::exit(0); } const char *infile = argv[1]; diff --git a/c/tskit/tables.h b/c/tskit/tables.h index 204e39e2db..8676c63f15 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -4001,7 +4001,7 @@ Options can be specified by providing one or more of the following bitwise int ret; tsk_table_collection_t tables; - ret = tsk_table_collection_load(&tables, "data.trees", 0); + ret = tsk_table_collection_load(&tables, "data.arg", 0); if (ret != 0) { fprintf(stderr, "Load error:%s\n", tsk_strerror(ret)); exit(EXIT_FAILURE); @@ -4096,7 +4096,7 @@ and well formed files will be written. error_check(ret); tables.sequence_length = 1.0; // Write out the empty tree sequence - ret = tsk_table_collection_dump(&tables, "empty.trees", 0); + ret = tsk_table_collection_dump(&tables, "empty.arg", 0); error_check(ret); @endrst diff --git a/c/tskit/trees.h b/c/tskit/trees.h index bef944fff3..3af77e779d 100644 --- a/c/tskit/trees.h +++ b/c/tskit/trees.h @@ -338,7 +338,7 @@ that function's documentation for details and options. int ret; tsk_treeseq_t ts; - ret = tsk_treeseq_load(&ts, "data.trees", 0); + ret = tsk_treeseq_load(&ts, "data.arg 0); if (ret != 0) { fprintf(stderr, "Load error:%s\n", tsk_strerror(ret)); exit(EXIT_FAILURE); diff --git a/docs/c-api.rst b/docs/c-api.rst index 83f19ca9c1..374886266c 100644 --- a/docs/c-api.rst +++ b/docs/c-api.rst @@ -23,7 +23,7 @@ Do I need the C API? The ``tskit`` C API is generally useful in the following situations: - You want to use the ``tskit`` API in a larger C/C++ application (e.g., - in order to output data in the ``.trees`` format); + in order to output data in the ``.arg`` format); - You need to perform lots of tree traversals/loops etc. to analyse some data that is in tree sequence form. @@ -909,15 +909,15 @@ normally. Running this program on some tree sequence files we might get:: - $ cat tmp1.trees tmp2.trees | ./build/streaming > no_mutations.trees + $ cat tmp1.arg tmp2.arg | ./build/streaming > no_mutations.arg Tree sequence 0 had 38 mutations Tree sequence 1 had 132 mutations Then, running this program again on the output of the previous command, we see that we now have two tree sequences with their mutations removed -stored in the file ``no_mutations.trees``:: +stored in the file ``no_mutations.arg``:: - $ ./build/streaming < no_mutations.trees > /dev/null + $ ./build/streaming < no_mutations.arg > /dev/null Tree sequence 0 had 0 mutations Tree sequence 1 had 0 mutations diff --git a/docs/data/basic_tree_seq.trees b/docs/data/basic_tree_seq.arg similarity index 100% rename from docs/data/basic_tree_seq.trees rename to docs/data/basic_tree_seq.arg diff --git a/docs/export.md b/docs/export.md index 5616dee055..ea08d9ca1c 100644 --- a/docs/export.md +++ b/docs/export.md @@ -41,7 +41,7 @@ If we have a tree sequence file the convenient way to convert to VCF: :::{code-block} bash -$ tskit vcf example.trees > example.vcf +$ tskit vcf example.arg > example.vcf ::: See the {ref}`sec_export_vcf_compression` section for information @@ -137,14 +137,14 @@ The simplest way to compress the VCF output is to use the and pipe the output to `bgzip`: :::{code-block} bash -$ tskit vcf example.trees | bgzip -c > example.vcf.gz +$ tskit vcf example.arg | bgzip -c > example.vcf.gz ::: A general way to convert VCF data to various formats is to pipe the text produced by ``tskit`` into ``bcftools`` using the command line interface: :::{code-block} bash -$ tskit vcf example.trees | bcftools view -O b > example.bcf +$ tskit vcf example.arg | bcftools view -O b > example.bcf ::: If you need more control over the form of the output (or want to work diff --git a/docs/file-formats.md b/docs/file-formats.md index c17fbbb150..8b20f8f05b 100644 --- a/docs/file-formats.md +++ b/docs/file-formats.md @@ -29,8 +29,8 @@ data on file in a columnar, binary format. The format is based on the key-value store for numerical data. There is a one-to-one correspondence between the tables described above and the arrays stored in these files. -By convention, these files are given the `.trees` suffix (although this -is not enforced in any way), and we will sometimes refer to them as ".trees" +By convention, these files are given the `.arg` suffix (although this +is not enforced in any way), and we will sometimes refer to them as ".arg" files. We also refer to them as "tree sequence files". :::{todo} diff --git a/docs/provenance.md b/docs/provenance.md index d07da38b11..e506b1bf3e 100644 --- a/docs/provenance.md +++ b/docs/provenance.md @@ -30,7 +30,7 @@ This documentation serves two distinct purposes: 1. For developers using `tskit` in their own applications, it provides normative documentation for how provenance information should be stored. 2. For end-users of `tskit`, it provides documentation to allows them to inspect and interpret - the provenance information stored in `.trees` files. + the provenance information stored in `.arg` files. Provenance information is encoded using [JSON](https://www.json.org/). To standardise the provenance information produced by different software and improve @@ -167,7 +167,7 @@ here even though it was automatically generated. Consider the following invocation of a hypothetical command line program: ```bash -$ supersim --sample-size=10 --do-some-stuff -O out.trees +$ supersim --sample-size=10 --do-some-stuff -O out.arg ``` We recommend encoding the parameters provenance as follows (other fields omitted @@ -177,7 +177,7 @@ for clarity): { "parameters": { "command": "supersim", - "args": ["--sample-size=10", "--do-some-stuff", "-O", "out.trees"], + "args": ["--sample-size=10", "--do-some-stuff", "-O", "out.arg"], "random_seed": 56789 } } @@ -245,7 +245,7 @@ account for resource usage across pipelines of tools. ## Full schema This schema is formally defined using [JSON Schema](http://json-schema.org/) and -given in full here. Developers writing provenance information to `.trees` files +given in full here. Developers writing provenance information to `.arg` files should validate the output JSON against this schema. ```{eval-rst} diff --git a/docs/quickstart.md b/docs/quickstart.md index 20697b9645..dba11df044 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -27,7 +27,7 @@ def basic_sim(): recombination_rate=1e-7, random_seed=665) ts = msprime.sim_mutations(ts, rate=2e-7, random_seed=123) - ts.dump("data/basic_tree_seq.trees") + ts.dump("data/basic_tree_seq.arg") def create_notebook_data(): basic_sim() @@ -53,7 +53,7 @@ sites. ```{code-cell} import tskit -ts = tskit.load("data/basic_tree_seq.trees") # Or generate using e.g. msprime.sim_ancestry() +ts = tskit.load("data/basic_tree_seq.arg") # Or generate using e.g. msprime.sim_ancestry() ts # In a Jupyter notebook this displays a summary table. Otherwise use print(ts) ``` diff --git a/python/benchmark/config.yaml b/python/benchmark/config.yaml index 659238b97b..d956a3bd7d 100644 --- a/python/benchmark/config.yaml +++ b/python/benchmark/config.yaml @@ -5,8 +5,8 @@ benchmarks: - code: ts = tskit.load("{filename}") parameters: filename: &files - - "tiny.trees" - - "bench.trees" + - "tiny.arg" + - "bench.arg" - code: ts.dump("/dev/null");"{filename}" setup: | @@ -18,7 +18,7 @@ benchmarks: #, site_mask=site_mask, sample_mask=sample_mask) setup: | import numpy - ts = tskit.load("bench.trees") + ts = tskit.load("bench.arg") tables = ts.tables tables.migrations.clear() ts = tables.tree_sequence() @@ -37,7 +37,7 @@ benchmarks: tree.seek(pos) pos = 0 if pos == 500_000 else 500_000 setup: | - ts = tskit.load("bench.trees") + ts = tskit.load("bench.arg") tree = ts.first() pos = 500_000 @@ -48,7 +48,7 @@ benchmarks: - code: tree.{array} setup: | - ts = tskit.load("bench.trees") + ts = tskit.load("bench.arg") tree = ts.first() parameters: array: &tree_arrays @@ -62,7 +62,7 @@ benchmarks: - code: tree.{array}(42); setup: | - ts = tskit.load("bench.trees") + ts = tskit.load("bench.arg") tree = ts.first() parameters: array: @@ -76,7 +76,7 @@ benchmarks: - code: tree.{traversal_order}() setup: | - ts = tskit.load("bench.trees") + ts = tskit.load("bench.arg") tree = ts.first() parameters: traversal_order: &traversal_orders @@ -102,7 +102,7 @@ benchmarks: filename: *files - code: "for row in ts.{table}(): pass" - setup: ts = tskit.load("bench.trees") + setup: ts = tskit.load("bench.arg") parameters: table: &tables - nodes @@ -128,7 +128,7 @@ benchmarks: - code: ts.{table}(1) setup: | - ts = tskit.load("bench.trees") + ts = tskit.load("bench.arg") parameters: table: - node @@ -141,18 +141,18 @@ benchmarks: - provenance - code: ts.tables - setup: ts = tskit.load("bench.trees") + setup: ts = tskit.load("bench.arg") - code: tables.{table} setup: | - ts = tskit.load("bench.trees") + ts = tskit.load("bench.arg") tables = ts.tables parameters: table: *tables - code: x = {table}.{column} setup: | - ts = tskit.load("bench.trees") + ts = tskit.load("bench.arg") tables = ts.tables {table} = tables.{table} parameters: &table_columns diff --git a/python/benchmark/run.py b/python/benchmark/run.py index bb6f7219c8..7c6465a9b8 100644 --- a/python/benchmark/run.py +++ b/python/benchmark/run.py @@ -41,7 +41,7 @@ def system_info(): def make_file(): - benchmark_trees = tskit_dir / "benchmark" / "bench.trees" + benchmark_trees = tskit_dir / "benchmark" / "bench.arg" if not os.path.exists(benchmark_trees): print("Generating benchmark trees...") demography = msprime.Demography() @@ -70,7 +70,7 @@ def make_file(): record_provenance=True, ) ts = msprime.sim_mutations(ts, rate=0.001, random_seed=42) - ts.dump(tskit_dir / "benchmark" / "tiny.trees") + ts.dump(tskit_dir / "benchmark" / "tiny.arg") def autotime(setup, code): diff --git a/python/tests/data/SLiM/minimal-example.trees b/python/tests/data/SLiM/minimal-example.arg similarity index 100% rename from python/tests/data/SLiM/minimal-example.trees rename to python/tests/data/SLiM/minimal-example.arg diff --git a/python/tests/data/SLiM/minimal-example.txt b/python/tests/data/SLiM/minimal-example.txt index d867d40926..97edb7920d 100644 --- a/python/tests/data/SLiM/minimal-example.txt +++ b/python/tests/data/SLiM/minimal-example.txt @@ -10,6 +10,6 @@ initialize() { sim.addSubpop("p1", 5); } 3 { - sim.treeSeqOutput("tests/data/SLiM/minimal-example.trees"); + sim.treeSeqOutput("tests/data/SLiM/minimal-example.arg"); sim.simulationFinished(); } diff --git a/python/tests/data/SLiM/single-locus-example.trees b/python/tests/data/SLiM/single-locus-example.arg similarity index 100% rename from python/tests/data/SLiM/single-locus-example.trees rename to python/tests/data/SLiM/single-locus-example.arg diff --git a/python/tests/data/SLiM/single-locus-example.txt b/python/tests/data/SLiM/single-locus-example.txt index 15e7e9757b..444d47e19e 100644 --- a/python/tests/data/SLiM/single-locus-example.txt +++ b/python/tests/data/SLiM/single-locus-example.txt @@ -10,6 +10,6 @@ initialize() { sim.addSubpop("p1", 5); } 3 { - sim.treeSeqOutput("tests/data/SLiM/single-locus-example.trees"); + sim.treeSeqOutput("tests/data/SLiM/single-locus-example.arg"); sim.simulationFinished(); } diff --git a/python/tests/data/old-formats/tskit-0.3.3.trees b/python/tests/data/old-formats/tskit-0.3.3.arg similarity index 100% rename from python/tests/data/old-formats/tskit-0.3.3.trees rename to python/tests/data/old-formats/tskit-0.3.3.arg diff --git a/python/tests/ibd.py b/python/tests/ibd.py index 6db35201b7..b931ff151c 100644 --- a/python/tests/ibd.py +++ b/python/tests/ibd.py @@ -327,7 +327,7 @@ def passes_filters(self, a, b, left, right): """ A simple CLI for running IBDFinder on a command line from the `python` subdirectory. Basic usage: - > python3 ./tests/ibd.py --infile test.trees + > python3 ./tests/ibd.py --infile test.arg """ parser = argparse.ArgumentParser( diff --git a/python/tests/test_cli.py b/python/tests/test_cli.py index cca5045123..6c06a33bc2 100644 --- a/python/tests/test_cli.py +++ b/python/tests/test_cli.py @@ -94,7 +94,7 @@ class TestTskitArgumentParser: def test_individuals_default_values(self): parser = cli.get_tskit_parser() cmd = "individuals" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence]) assert args.tree_sequence == tree_sequence assert args.precision == 6 @@ -102,7 +102,7 @@ def test_individuals_default_values(self): def test_individuals_short_args(self): parser = cli.get_tskit_parser() cmd = "individuals" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "-p", "8"]) assert args.tree_sequence == tree_sequence assert args.precision == 8 @@ -110,7 +110,7 @@ def test_individuals_short_args(self): def test_individuals_long_args(self): parser = cli.get_tskit_parser() cmd = "individuals" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "--precision", "5"]) assert args.tree_sequence == tree_sequence assert args.precision == 5 @@ -118,7 +118,7 @@ def test_individuals_long_args(self): def test_nodes_default_values(self): parser = cli.get_tskit_parser() cmd = "nodes" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence]) assert args.tree_sequence == tree_sequence assert args.precision == 6 @@ -126,7 +126,7 @@ def test_nodes_default_values(self): def test_nodes_short_args(self): parser = cli.get_tskit_parser() cmd = "nodes" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "-p", "8"]) assert args.tree_sequence == tree_sequence assert args.precision == 8 @@ -134,7 +134,7 @@ def test_nodes_short_args(self): def test_nodes_long_args(self): parser = cli.get_tskit_parser() cmd = "nodes" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "--precision", "5"]) assert args.tree_sequence == tree_sequence assert args.precision == 5 @@ -142,7 +142,7 @@ def test_nodes_long_args(self): def test_edges_default_values(self): parser = cli.get_tskit_parser() cmd = "edges" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence]) assert args.tree_sequence == tree_sequence assert args.precision == 6 @@ -150,7 +150,7 @@ def test_edges_default_values(self): def test_edges_short_args(self): parser = cli.get_tskit_parser() cmd = "edges" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "-p", "8"]) assert args.tree_sequence == tree_sequence assert args.precision == 8 @@ -158,7 +158,7 @@ def test_edges_short_args(self): def test_edges_long_args(self): parser = cli.get_tskit_parser() cmd = "edges" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "--precision", "5"]) assert args.tree_sequence == tree_sequence assert args.precision == 5 @@ -166,7 +166,7 @@ def test_edges_long_args(self): def test_sites_default_values(self): parser = cli.get_tskit_parser() cmd = "sites" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence]) assert args.tree_sequence == tree_sequence assert args.precision == 6 @@ -174,7 +174,7 @@ def test_sites_default_values(self): def test_sites_short_args(self): parser = cli.get_tskit_parser() cmd = "sites" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "-p", "8"]) assert args.tree_sequence == tree_sequence assert args.precision == 8 @@ -182,7 +182,7 @@ def test_sites_short_args(self): def test_sites_long_args(self): parser = cli.get_tskit_parser() cmd = "sites" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "--precision", "5"]) assert args.tree_sequence == tree_sequence assert args.precision == 5 @@ -190,7 +190,7 @@ def test_sites_long_args(self): def test_mutations_default_values(self): parser = cli.get_tskit_parser() cmd = "mutations" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence]) assert args.tree_sequence == tree_sequence assert args.precision == 6 @@ -198,7 +198,7 @@ def test_mutations_default_values(self): def test_mutations_short_args(self): parser = cli.get_tskit_parser() cmd = "mutations" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "-p", "4"]) assert args.tree_sequence == tree_sequence assert args.precision == 4 @@ -206,7 +206,7 @@ def test_mutations_short_args(self): def test_mutations_long_args(self): parser = cli.get_tskit_parser() cmd = "mutations" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "--precision", "9"]) assert args.tree_sequence == tree_sequence assert args.precision == 9 @@ -214,7 +214,7 @@ def test_mutations_long_args(self): def test_provenances_default_values(self): parser = cli.get_tskit_parser() cmd = "provenances" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence]) assert args.tree_sequence == tree_sequence assert not args.human @@ -222,7 +222,7 @@ def test_provenances_default_values(self): def test_provenances_short_args(self): parser = cli.get_tskit_parser() cmd = "provenances" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "-H"]) assert args.tree_sequence == tree_sequence assert args.human @@ -230,7 +230,7 @@ def test_provenances_short_args(self): def test_provenances_long_args(self): parser = cli.get_tskit_parser() cmd = "provenances" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "--human"]) assert args.tree_sequence == tree_sequence assert args.human @@ -239,7 +239,7 @@ def test_provenances_long_args(self): def test_fasta_default_values(self): parser = cli.get_tskit_parser() cmd = "fasta" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence]) assert args.tree_sequence == tree_sequence assert args.wrap == 60 @@ -248,7 +248,7 @@ def test_fasta_default_values(self): def test_fasta_short_args(self): parser = cli.get_tskit_parser() cmd = "fasta" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "-w", "100"]) assert args.tree_sequence == tree_sequence assert args.wrap == 100 @@ -257,7 +257,7 @@ def test_fasta_short_args(self): def test_fasta_long_args(self): parser = cli.get_tskit_parser() cmd = "fasta" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "--wrap", "50"]) assert args.tree_sequence == tree_sequence assert args.wrap == 50 @@ -273,7 +273,7 @@ def test_fasta_long_args(self): def test_vcf_ploidy(self, flags, expected): parser = cli.get_tskit_parser() cmd = "vcf" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, *flags]) assert args.tree_sequence == tree_sequence assert args.ploidy == expected @@ -289,7 +289,7 @@ def test_vcf_ploidy(self, flags, expected): def test_vcf_contig_id(self, flags, expected): parser = cli.get_tskit_parser() cmd = "vcf" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, *flags]) assert args.tree_sequence == tree_sequence assert args.contig_id == expected @@ -305,7 +305,7 @@ def test_vcf_contig_id(self, flags, expected): def test_vcf_allow_position_zero(self, flags, expected): parser = cli.get_tskit_parser() cmd = "vcf" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, *flags]) assert args.tree_sequence == tree_sequence assert args.allow_position_zero == expected @@ -313,28 +313,28 @@ def test_vcf_allow_position_zero(self, flags, expected): def test_info_default_values(self): parser = cli.get_tskit_parser() cmd = "info" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence]) assert args.tree_sequence == tree_sequence def test_populations_default_values(self): parser = cli.get_tskit_parser() cmd = "populations" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence]) assert args.tree_sequence == tree_sequence def test_migrations_default_values(self): parser = cli.get_tskit_parser() cmd = "migrations" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence]) assert args.tree_sequence == tree_sequence def test_migrations_short_args(self): parser = cli.get_tskit_parser() cmd = "migrations" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "-p", "2"]) assert args.tree_sequence == tree_sequence assert args.precision == 2 @@ -342,7 +342,7 @@ def test_migrations_short_args(self): def test_migrations_long_args(self): parser = cli.get_tskit_parser() cmd = "migrations" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "--precision", "5"]) assert args.tree_sequence == tree_sequence assert args.precision == 5 @@ -350,7 +350,7 @@ def test_migrations_long_args(self): def test_trees_default_values(self): parser = cli.get_tskit_parser() cmd = "trees" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence]) assert args.tree_sequence == tree_sequence assert args.precision == 6 @@ -359,7 +359,7 @@ def test_trees_default_values(self): def test_trees_short_args(self): parser = cli.get_tskit_parser() cmd = "trees" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "-d", "-p", "8"]) assert args.tree_sequence == tree_sequence assert args.precision == 8 @@ -368,7 +368,7 @@ def test_trees_short_args(self): def test_trees_long_args(self): parser = cli.get_tskit_parser() cmd = "trees" - tree_sequence = "test.trees" + tree_sequence = "test.arg" args = parser.parse_args([cmd, tree_sequence, "--precision", "5", "--draw"]) assert args.tree_sequence == tree_sequence assert args.precision == 5 @@ -397,9 +397,7 @@ def setUpClass(cls): cls._tree_sequence = tsutil.insert_random_ploidy_individuals( ts, samples_only=True ) - fd, cls._tree_sequence_file = tempfile.mkstemp( - prefix="tsk_cli", suffix=".trees" - ) + fd, cls._tree_sequence_file = tempfile.mkstemp(prefix="tsk_cli", suffix=".arg") os.close(fd) cls._tree_sequence.dump(cls._tree_sequence_file) @@ -596,11 +594,11 @@ class TestVCFZeroPosition: def test_zero_position(self, tmp_path): ts = msprime.simulate(10, mutation_rate=1, random_seed=1) - ts.dump(tmp_path / "test.trees") + ts.dump(tmp_path / "test.arg") with pytest.raises(ValueError): - capture_output(cli.tskit_main, ["vcf", str(tmp_path / "test.trees")]) + capture_output(cli.tskit_main, ["vcf", str(tmp_path / "test.arg")]) stdout, stderr = capture_output( - cli.tskit_main, ["vcf", "-0", str(tmp_path / "test.trees")] + cli.tskit_main, ["vcf", "-0", str(tmp_path / "test.arg")] ) assert len(stderr) == 0 diff --git a/python/tests/test_file_format.py b/python/tests/test_file_format.py index ff35dc90bc..317b75dc59 100644 --- a/python/tests/test_file_format.py +++ b/python/tests/test_file_format.py @@ -271,7 +271,7 @@ def test_format_too_old_raised_for_hdf5(self): tskit.TableCollection.load(path) def test_tskit_v_0_3_3(self): - path = os.path.join(test_data_dir, "old-formats", "tskit-0.3.3.trees") + path = os.path.join(test_data_dir, "old-formats", "tskit-0.3.3.arg") ts = tskit.load(path) self.verify_tree_sequence(ts) @@ -671,14 +671,14 @@ def test_fixture_has_reference_sequence(self, ts_fixture): def test_round_trip(self, ts_fixture, tmp_path): ts1 = ts_fixture - temp_file = tmp_path / "tmp.trees" + temp_file = tmp_path / "tmp.arg" ts1.dump(temp_file) ts2 = tskit.load(temp_file) ts1.tables.assert_equals(ts2.tables) def test_no_reference_sequence(self, ts_fixture, tmp_path): ts1 = ts_fixture - temp_file = tmp_path / "tmp.trees" + temp_file = tmp_path / "tmp.arg" ts1.dump(temp_file) with kastore.load(temp_file) as store: all_data = dict(store) @@ -698,7 +698,7 @@ def test_no_reference_sequence(self, ts_fixture, tmp_path): @pytest.mark.parametrize("attr", ["data", "url"]) def test_missing_attr(self, ts_fixture, tmp_path, attr): ts1 = ts_fixture - temp_file = tmp_path / "tmp.trees" + temp_file = tmp_path / "tmp.arg" ts1.dump(temp_file) with kastore.load(temp_file) as store: all_data = dict(store) @@ -710,7 +710,7 @@ def test_missing_attr(self, ts_fixture, tmp_path, attr): def test_missing_metadata(self, ts_fixture, tmp_path): ts1 = ts_fixture - temp_file = tmp_path / "tmp.trees" + temp_file = tmp_path / "tmp.arg" ts1.dump(temp_file) with kastore.load(temp_file) as store: all_data = dict(store) @@ -722,7 +722,7 @@ def test_missing_metadata(self, ts_fixture, tmp_path): def test_missing_metadata_schema(self, ts_fixture, tmp_path): ts1 = ts_fixture - temp_file = tmp_path / "tmp.trees" + temp_file = tmp_path / "tmp.arg" ts1.dump(temp_file) with kastore.load(temp_file) as store: all_data = dict(store) @@ -943,7 +943,7 @@ def test_ts_read_path_interface(self, tmp_path, ts_fixture): # Check the fixture has metadata and a schema assert ts_fixture.metadata_schema is not None assert len(ts_fixture.metadata) > 0 - save_path = tmp_path / "tmp.trees" + save_path = tmp_path / "tmp.arg" ts_fixture.dump(save_path) ts_no_tables = tskit.load(save_path, skip_tables=True) assert not ts_no_tables.equals(ts_fixture) @@ -951,7 +951,7 @@ def test_ts_read_path_interface(self, tmp_path, ts_fixture): assert_tables_empty(ts_no_tables.tables) def test_ts_read_one_stream(self, tmp_path, ts_fixture): - save_path = tmp_path / "tmp.trees" + save_path = tmp_path / "tmp.arg" ts_fixture.dump(save_path) with open(save_path, "rb") as f: ts_no_tables = tskit.load(f, skip_tables=True) @@ -960,7 +960,7 @@ def test_ts_read_one_stream(self, tmp_path, ts_fixture): assert_tables_empty(ts_no_tables.tables) def test_ts_twofile_stream_noskip(self, tmp_path, ts_fixture): - save_path = tmp_path / "tmp.trees" + save_path = tmp_path / "tmp.arg" with open(save_path, "wb") as f: ts_fixture.dump(f) ts_fixture.dump(f) @@ -972,7 +972,7 @@ def test_ts_twofile_stream_noskip(self, tmp_path, ts_fixture): def test_ts_twofile_stream_fails(self, tmp_path, ts_fixture): # We can't skip_tables while reading from a stream - save_path = tmp_path / "tmp.trees" + save_path = tmp_path / "tmp.arg" with open(save_path, "wb") as f: ts_fixture.dump(f) ts_fixture.dump(f) @@ -982,7 +982,7 @@ def test_ts_twofile_stream_fails(self, tmp_path, ts_fixture): tskit.load(f) def test_table_collection_load_path(self, tmp_path, ts_fixture): - save_path = tmp_path / "tmp.trees" + save_path = tmp_path / "tmp.arg" ts_fixture.dump(save_path) tables_skipped = tskit.TableCollection.load(save_path, skip_tables=True) tables = ts_fixture.tables @@ -991,7 +991,7 @@ def test_table_collection_load_path(self, tmp_path, ts_fixture): assert_tables_empty(tables_skipped) def test_table_collection_load_stream(self, tmp_path, ts_fixture): - save_path = tmp_path / "tmp.trees" + save_path = tmp_path / "tmp.arg" ts_fixture.dump(save_path) with open(save_path, "rb") as f: tables_skipped = tskit.TableCollection.load(f, skip_tables=True) @@ -1009,7 +1009,7 @@ class TestSkipReferenceSequence: def test_ts_load_path(self, tmp_path, ts_fixture): assert ts_fixture.has_reference_sequence() - save_path = tmp_path / "tmp.trees" + save_path = tmp_path / "tmp.arg" ts_fixture.dump(save_path) ts_no_refseq = tskit.load(save_path, skip_reference_sequence=True) assert not ts_no_refseq.equals(ts_fixture) @@ -1017,7 +1017,7 @@ def test_ts_load_path(self, tmp_path, ts_fixture): assert not ts_no_refseq.has_reference_sequence() def test_ts_load_stream(self, tmp_path, ts_fixture): - save_path = tmp_path / "tmp.trees" + save_path = tmp_path / "tmp.arg" ts_fixture.dump(save_path) with open(save_path, "rb") as f: ts_no_refseq = tskit.load(f, skip_reference_sequence=True) @@ -1027,7 +1027,7 @@ def test_ts_load_stream(self, tmp_path, ts_fixture): def test_ts_twofile_stream_fails(self, tmp_path, ts_fixture): # We can't skip_reference_sequence while reading from a stream - save_path = tmp_path / "tmp.trees" + save_path = tmp_path / "tmp.arg" with open(save_path, "wb") as f: ts_fixture.dump(f) ts_fixture.dump(f) @@ -1037,7 +1037,7 @@ def test_ts_twofile_stream_fails(self, tmp_path, ts_fixture): tskit.load(f) def test_table_collection_load_path(self, tmp_path, ts_fixture): - save_path = tmp_path / "tmp.trees" + save_path = tmp_path / "tmp.arg" ts_fixture.dump(save_path) tables_no_refseq = tskit.TableCollection.load( save_path, skip_reference_sequence=True @@ -1048,7 +1048,7 @@ def test_table_collection_load_path(self, tmp_path, ts_fixture): assert not tables_no_refseq.has_reference_sequence() def test_table_collection_load_stream(self, tmp_path, ts_fixture): - save_path = tmp_path / "tmp.trees" + save_path = tmp_path / "tmp.arg" ts_fixture.dump(save_path) with open(save_path, "rb") as f: tables_no_refseq = tskit.TableCollection.load( diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py index 00b8bca3e0..ff7cb48368 100644 --- a/python/tests/test_highlevel.py +++ b/python/tests/test_highlevel.py @@ -1979,7 +1979,7 @@ def test_removed_methods(self): ts.to_nexus() def test_dump_pathlib(self, ts_fixture, tmp_path): - path = tmp_path / "tmp.trees" + path = tmp_path / "tmp.arg" assert path.exists assert path.is_file ts_fixture.dump(path) @@ -1991,7 +1991,7 @@ def test_dump_load_errors(self): ts = msprime.simulate(5, random_seed=1) # Try to dump/load files we don't have access to or don't exist. for func in [ts.dump, tskit.load]: - for f in ["/", "/test.trees", "/dir_does_not_exist/x.trees"]: + for f in ["/", "/test.arg", "/dir_does_not_exist/x.arg"]: with pytest.raises(OSError): func(f) try: @@ -2012,7 +2012,7 @@ def test_dump_load_errors(self): func(bad_filename) def test_zlib_compression_warning(self, ts_fixture, tmp_path): - temp_file = tmp_path / "tmp.trees" + temp_file = tmp_path / "tmp.arg" with warnings.catch_warnings(record=True) as w: ts_fixture.dump(temp_file, zlib_compression=True) assert len(w) == 1 @@ -3260,7 +3260,7 @@ class TestFileUuid(HighLevelTestCase): def validate(self, ts): with tempfile.TemporaryDirectory() as tempdir: - temp_file = pathlib.Path(tempdir) / "tmp.trees" + temp_file = pathlib.Path(tempdir) / "tmp.arg" assert ts.file_uuid is None ts.dump(temp_file) other_ts = tskit.load(temp_file) @@ -4590,7 +4590,7 @@ def verify_random_permutation(self, ts): assert j == ts.get_num_trees() # Verify we can dump this new tree sequence OK. with tempfile.TemporaryDirectory() as tempdir: - temp_file = pathlib.Path(tempdir) / "tmp.trees" + temp_file = pathlib.Path(tempdir) / "tmp.arg" other_ts.dump(temp_file) ts3 = tskit.load(temp_file) self.verify_tree_sequences_equal(other_ts, ts3) diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py index 7d0efae1c1..119a84ccef 100644 --- a/python/tests/test_lowlevel.py +++ b/python/tests/test_lowlevel.py @@ -170,18 +170,18 @@ class TestTableCollection(LowLevelTestCase): def test_skip_tables(self, tmp_path): tc = _tskit.TableCollection(1) self.get_example_tree_sequence().dump_tables(tc) - with open(tmp_path / "tmp.trees", "wb") as f: + with open(tmp_path / "tmp.arg", "wb") as f: tc.dump(f) for good_bool in [1, True]: - with open(tmp_path / "tmp.trees", "rb") as f: + with open(tmp_path / "tmp.arg", "rb") as f: tc_skip = _tskit.TableCollection() tc_skip.load(f, skip_tables=good_bool) assert not tc.equals(tc_skip) assert tc.equals(tc_skip, ignore_tables=True) for bad_bool in ["x", 0.5, {}]: - with open(tmp_path / "tmp.trees", "rb") as f: + with open(tmp_path / "tmp.arg", "rb") as f: tc_skip = _tskit.TableCollection() with pytest.raises(TypeError): tc_skip.load(f, skip_tables=bad_bool) @@ -190,18 +190,18 @@ def test_skip_reference_sequence(self, tmp_path): tc = _tskit.TableCollection(1) self.get_example_tree_sequence().dump_tables(tc) tc.reference_sequence.data = "ACGT" - with open(tmp_path / "tmp.trees", "wb") as f: + with open(tmp_path / "tmp.arg", "wb") as f: tc.dump(f) for good_bool in [1, True]: - with open(tmp_path / "tmp.trees", "rb") as f: + with open(tmp_path / "tmp.arg", "rb") as f: tc_skip = _tskit.TableCollection() tc_skip.load(f, skip_reference_sequence=good_bool) assert not tc.equals(tc_skip) assert tc.equals(tc_skip, ignore_reference_sequence=True) for bad_bool in ["x", 0.5, {}]: - with open(tmp_path / "tmp.trees", "rb") as f: + with open(tmp_path / "tmp.arg", "rb") as f: tc_skip = _tskit.TableCollection() with pytest.raises(TypeError): tc_skip.load(f, skip_reference_sequence=bad_bool) @@ -222,9 +222,9 @@ def loader(*args): func(bad_type) def test_file_format_eof_error(self, tmp_path): - with open(tmp_path / "tmp.trees", "wb") as f: + with open(tmp_path / "tmp.arg", "wb") as f: f.write(b"") - with open(tmp_path / "tmp.trees", "rb") as f: + with open(tmp_path / "tmp.arg", "rb") as f: tc2 = _tskit.TableCollection() with pytest.raises(EOFError): tc2.load(f) @@ -232,9 +232,9 @@ def test_file_format_eof_error(self, tmp_path): def test_file_format_kas_error(self, tmp_path): tc1 = _tskit.TableCollection(1) self.get_example_tree_sequence().dump_tables(tc1) - with open(tmp_path / "tmp.trees", "wb") as f: + with open(tmp_path / "tmp.arg", "wb") as f: tc1.dump(f) - with open(tmp_path / "tmp.trees", "rb") as f: + with open(tmp_path / "tmp.arg", "rb") as f: f.seek(1) tc2 = _tskit.TableCollection() with pytest.raises(_tskit.FileFormatError): @@ -244,9 +244,9 @@ def test_dump_equality(self, tmp_path): for ts in self.get_example_tree_sequences(): tc = _tskit.TableCollection(sequence_length=ts.get_sequence_length()) ts.dump_tables(tc) - with open(tmp_path / "tmp.trees", "wb") as f: + with open(tmp_path / "tmp.arg", "wb") as f: tc.dump(f) - with open(tmp_path / "tmp.trees", "rb") as f: + with open(tmp_path / "tmp.arg", "rb") as f: tc2 = _tskit.TableCollection() tc2.load(f) assert tc.equals(tc2) @@ -1224,13 +1224,13 @@ def tearDown(self): def test_skip_tables(self, tmp_path): ts = self.get_example_tree_sequence() - with open(tmp_path / "tmp.trees", "wb") as f: + with open(tmp_path / "tmp.arg", "wb") as f: ts.dump(f) tc = _tskit.TableCollection(1) ts.dump_tables(tc) for good_bool in [1, True]: - with open(tmp_path / "tmp.trees", "rb") as f: + with open(tmp_path / "tmp.arg", "rb") as f: ts_skip = _tskit.TreeSequence() ts_skip.load(f, skip_tables=good_bool) tc_skip = _tskit.TableCollection() @@ -1239,7 +1239,7 @@ def test_skip_tables(self, tmp_path): assert tc.equals(tc_skip, ignore_tables=True) for bad_bool in ["x", 0.5, {}]: - with open(tmp_path / "tmp.trees", "rb") as f: + with open(tmp_path / "tmp.arg", "rb") as f: ts_skip = _tskit.TreeSequence() with pytest.raises(TypeError): ts_skip.load(f, skip_tables=bad_bool) @@ -1250,11 +1250,11 @@ def test_skip_reference_sequence(self, tmp_path): tc.reference_sequence.data = "ACGT" ts = _tskit.TreeSequence() ts.load_tables(tc, build_indexes=True) - with open(tmp_path / "tmp.trees", "wb") as f: + with open(tmp_path / "tmp.arg", "wb") as f: ts.dump(f) for good_bool in [1, True]: - with open(tmp_path / "tmp.trees", "rb") as f: + with open(tmp_path / "tmp.arg", "rb") as f: ts_skip = _tskit.TreeSequence() ts_skip.load(f, skip_reference_sequence=good_bool) tc_skip = _tskit.TableCollection() @@ -1263,7 +1263,7 @@ def test_skip_reference_sequence(self, tmp_path): assert tc.equals(tc_skip, ignore_reference_sequence=True) for bad_bool in ["x", 0.5, {}]: - with open(tmp_path / "tmp.trees", "rb") as f: + with open(tmp_path / "tmp.arg", "rb") as f: ts_skip = _tskit.TreeSequence() with pytest.raises(TypeError): ts_skip.load(f, skip_reference_sequence=bad_bool) @@ -1319,9 +1319,9 @@ def test_dump_equality(self, tmp_path): tables.compute_mutation_times() ts = _tskit.TreeSequence() ts.load_tables(tables) - with open(tmp_path / "temp.trees", "wb") as f: + with open(tmp_path / "temp.arg", "wb") as f: ts.dump(f) - with open(tmp_path / "temp.trees", "rb") as f: + with open(tmp_path / "temp.arg", "rb") as f: ts2 = _tskit.TreeSequence() ts2.load(f) tc = _tskit.TableCollection(ts.get_sequence_length()) diff --git a/python/tests/test_tables.py b/python/tests/test_tables.py index 3b1bd50bad..6c34ac90f2 100644 --- a/python/tests/test_tables.py +++ b/python/tests/test_tables.py @@ -3932,7 +3932,7 @@ def check_concordance(d1, d2): check_concordance(lwt.asdict(), tables.asdict()) def test_dump_pathlib(self, ts_fixture, tmp_path): - path = pathlib.Path(tmp_path) / "tmp.trees" + path = pathlib.Path(tmp_path) / "tmp.arg" assert path.exists assert path.is_file tc = ts_fixture.dump_tables() @@ -3945,7 +3945,7 @@ def test_dump_load_errors(self, ts_fixture): tc = ts_fixture.dump_tables() # Try to dump/load files we don't have access to or don't exist. for func in [tc.dump, tskit.TableCollection.load]: - for f in ["/", "/test.trees", "/dir_does_not_exist/x.trees"]: + for f in ["/", "/test.arg", "/dir_does_not_exist/x.arg"]: with pytest.raises(OSError): func(f) try: diff --git a/python/tskit/trees.py b/python/tskit/trees.py index ca03d22d3c..d3a8a74099 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -3406,9 +3406,9 @@ def load(file, *, skip_tables=False, skip_reference_sequence=False): (e.g. a socket or STDIN) of multiple tree sequences using consecutive calls to :meth:`tskit.load`. - :param str file: The file object or path of the ``.trees`` file containing the + :param str file: The file object or path of the ``.arg`` file containing the tree sequence we wish to load. - :param bool skip_tables: If True, no tables are read from the ``.trees`` + :param bool skip_tables: If True, no tables are read from the ``.arg`` file and only the top-level information is populated in the tree sequence object. :param bool skip_reference_sequence: If True, the tree sequence is read