sgkit-dev
diff --git a/‎tests/test_append.py‎
Lines changed: 8 additions & 139 deletions b/‎tests/test_append.py‎
Lines changed: 8 additions & 139 deletions
diff --git a/‎tests/test_normalise.py‎
Lines changed: 1 addition & 44 deletions b/‎tests/test_normalise.py‎
Lines changed: 1 addition & 44 deletions
diff --git a/‎tests/test_remove.py‎
Lines changed: 1 addition & 110 deletions b/‎tests/test_remove.py‎
Lines changed: 1 addition & 110 deletions
@@ -1,28 +1,3 @@
-# Create the VCF files, one with samples NA00001 and NA00002 and the other with NA00003
-
-# bcftools view -s NA00001,NA00002 --no-update -O z tests/data/vcf/sample.vcf.gz \
-#  > tests/data/vcf/sample-part1.vcf.gz
-# bcftools view -s NA00003 --no-update -O z tests/data/vcf/sample.vcf.gz \
-#  > tests/data/vcf/sample-part2.vcf.gz
-# bcftools index -c tests/data/vcf/sample-part1.vcf.gz
-# bcftools index -c tests/data/vcf/sample-part2.vcf.gz
-
-# Similarly for chr22.vcf.gz
-# bcftools view --no-update \
-#  -S <(bcftools query -l tests/data/vcf/chr22.vcf.gz | head -55) \
-#  tests/data/vcf/chr22.vcf.gz --write-index=csi -o tests/data/vcf/chr22-part1.vcf.gz
-# bcftools view --no-update \
-#  -S <(bcftools query -l tests/data/vcf/chr22.vcf.gz | tail -45) \
-#  tests/data/vcf/chr22.vcf.gz --write-index=csi -o tests/data/vcf/chr22-part2.vcf.gz
-
-# Create a variants list VCF with no samples.
-# Note that the header contains FORMAT fields, even though there are no samples,
-# which is necessary for vc2zarr to create empty arrays.
-
-# bin/vcf-drop-samples.sh tests/data/vcf/sample.vcf.gz \
-#  tests/data/vcf/sample-variants.vcf.gz
-
-
 import numpy as np
 import pytest
 import zarr
@@ -31,13 +6,7 @@
 
 from vczstore.append import append
 
-from .utils import (
-    compare_vcf_and_vcz,
-    convert_vcf_to_vcz,
-    convert_vcf_to_vcz_icechunk,
-    make_vcz,
-    run_vcztools,
-)
+from .utils import make_vcz
 
 
 def test_append():
@@ -104,67 +73,21 @@ def test_append():
     )
 
 
-@pytest.mark.parametrize("samples_chunk_size", [1, 2, 4])
-@pytest.mark.parametrize("backend_storage", [None, "obstore"])
-def test_append_compare_vcf(tmp_path, samples_chunk_size, backend_storage):
-    vcz1 = convert_vcf_to_vcz(
-        "sample-part1.vcf.gz", tmp_path, samples_chunk_size=samples_chunk_size
-    )
-    vcz2 = convert_vcf_to_vcz("sample-part2.vcf.gz", tmp_path)
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz1}")
-    assert vcztools_out.strip() == "NA00001\nNA00002"
-
-    append(vcz1, vcz2, backend_storage=backend_storage)
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz1}")
-    assert vcztools_out.strip() == "NA00001\nNA00002\nNA00003"
-
-    # check equivalence with original VCF
-    compare_vcf_and_vcz(
-        tmp_path, "view --no-version", "sample.vcf.gz", "view --no-version", vcz1
-    )
-
-
-def test_append_from_variants_list(tmp_path):
-    vcz0 = convert_vcf_to_vcz(
-        "sample-variants.vcf.gz", tmp_path, ploidy=2, samples_chunk_size=2
-    )
-    vcz1 = convert_vcf_to_vcz("sample-part1.vcf.gz", tmp_path)
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz0}")
-    assert vcztools_out.strip() == ""
-
-    append(vcz0, vcz1)
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz0}")
-    assert vcztools_out.strip() == "NA00001\nNA00002"
-
-    # check equivalence with original VCF
-    compare_vcf_and_vcz(
-        tmp_path, "view --no-version", "sample-part1.vcf.gz", "view --no-version", vcz0
-    )
-
-
-def test_append_fail_num_variants_mismatch(tmp_path):
-    vcz1 = convert_vcf_to_vcz("sample-part1.vcf.gz", tmp_path)
-    vcz2 = convert_vcf_to_vcz("alleles-1.vcf.gz", tmp_path)
+def test_append_fail_num_variants_mismatch():
+    vcz1 = make_vcz([0, 0], [1, 2], [["A", "T"], ["A", "G"]])
+    vcz2 = make_vcz([0], [1], [["A", "C"]])
 
     with pytest.raises(
         ValueError,
         match="Stores being appended must have same number of variants. "
-        "First has 9, second has 2",
+        "First has 2, second has 1",
     ):
         append(vcz1, vcz2)
 
 
-def test_append_fail_alleles_mismatch(tmp_path):
-    vcz1 = convert_vcf_to_vcz("sample-part1.vcf.gz", tmp_path)
-    vcz2 = convert_vcf_to_vcz("sample-part2-alleles-mismatch.vcf.gz", tmp_path)
+def test_append_fail_alleles_mismatch():
+    vcz1 = make_vcz([0], [1], [["A", "T"]])
+    vcz2 = make_vcz([0], [1], [["A", "C"]])
 
     with pytest.raises(
         ValueError,
@@ -419,60 +342,6 @@ def test_append_deletes_stale_destination_chunk_when_source_chunk_is_sparse():
     )
 
 
-def test_append_multiple_chunks(tmp_path):
-    vcz1 = convert_vcf_to_vcz(
-        "chr22-part1.vcf.gz", tmp_path, variants_chunk_size=10, samples_chunk_size=50
-    )
-    vcz2 = convert_vcf_to_vcz(
-        "chr22-part2.vcf.gz", tmp_path, variants_chunk_size=10, samples_chunk_size=50
-    )
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz1}")
-    assert len(vcztools_out.strip().split("\n")) == 55
-
-    append(vcz1, vcz2)
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz1}")
-    assert len(vcztools_out.strip().split("\n")) == 100
-
-    # check equivalence with original VCF
-    compare_vcf_and_vcz(
-        tmp_path, "view --no-version", "chr22.vcf.gz", "view --no-version", vcz1
-    )
-
-
-def test_append_icechunk(tmp_path):
-    pytest.importorskip("icechunk")
-
-    # note that vcz1 is in icechunk, but the dataset being appended, vcz2, needn't be
-    vcz1 = convert_vcf_to_vcz_icechunk("sample-part1.vcf.gz", tmp_path)
-    vcz2 = convert_vcf_to_vcz("sample-part2.vcf.gz", tmp_path, zarr_format=3)
-
-    print(vcz1)
-    print(vcz2)
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz1} --backend-storage icechunk")
-    assert vcztools_out.strip() == "NA00001\nNA00002"
-
-    append(vcz1, vcz2, backend_storage="icechunk")
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz1} --backend-storage icechunk")
-    assert vcztools_out.strip() == "NA00001\nNA00002\nNA00003"
-
-    # check equivalence with original VCF
-    compare_vcf_and_vcz(
-        tmp_path,
-        "view --no-version",
-        "sample.vcf.gz",
-        "view --no-version --backend-storage icechunk",
-        vcz1,
-    )
-
-
 def _make_genotype(num_variants, num_samples):
     values = np.zeros((num_variants, num_samples, 2), dtype=np.int8)
     for variant_index in range(num_variants):
 
@@ -14,12 +14,7 @@
     variant_alleles_are_equivalent,
 )
 
-from .utils import (
-    compare_vcf_and_vcz,
-    convert_vcf_to_vcz,
-    convert_vcf_to_vcz_icechunk,
-    make_vcz,
-)
+from .utils import make_vcz
 
 
 @pytest.mark.parametrize(
@@ -409,41 +404,3 @@ def test_normalise__other_call_fields_not_implemented():
 
     with pytest.raises(NotImplementedError):
         normalise(vcz1, vcz2, vcz2_norm)
-
-
-def test_normalise_and_append(tmp_path):
-    vcz0 = convert_vcf_to_vcz("sample-variants.vcf.gz", tmp_path, ploidy=2)
-    vcz1 = convert_vcf_to_vcz("sample-part1.vcf.gz", tmp_path)
-    vcz1_norm = zarr.storage.MemoryStore()
-
-    normalise(vcz0, vcz1, vcz1_norm)
-
-    append(vcz0, vcz1_norm)
-
-    # check equivalence with original VCF
-    compare_vcf_and_vcz(
-        tmp_path, "view --no-version", "sample-part1.vcf.gz", "view --no-version", vcz0
-    )
-
-
-def test_normalise_and_append_icechunk(tmp_path):
-    pytest.importorskip("icechunk")
-    from vczstore.utils import icechunk_transaction
-
-    # note that vcz0 is in icechunk, but the others needn't be
-    vcz0 = convert_vcf_to_vcz_icechunk("sample-variants.vcf.gz", tmp_path, ploidy=2)
-    vcz1 = convert_vcf_to_vcz("sample-part1.vcf.gz", tmp_path, zarr_format=3)
-    vcz1_norm = zarr.storage.MemoryStore()
-
-    with icechunk_transaction(vcz0, "main", message="append") as store:
-        normalise(store, vcz1, vcz1_norm)
-        append(store, vcz1_norm)
-
-    # check equivalence with original VCF
-    compare_vcf_and_vcz(
-        tmp_path,
-        "view --no-version",
-        "sample-part1.vcf.gz",
-        "view --no-version --backend-storage icechunk",
-        vcz0,
-    )
@@ -5,14 +5,7 @@
 
 from vczstore.remove import remove
 
-from .utils import (
-    check_removed_sample,
-    compare_vcf_and_vcz,
-    convert_vcf_to_vcz,
-    convert_vcf_to_vcz_icechunk,
-    make_vcz,
-    run_vcztools,
-)
+from .utils import make_vcz
 
 
 def test_remove():
@@ -61,108 +54,6 @@ def test_remove():
     )
 
 
-@pytest.mark.parametrize("backend_storage", [None, "obstore"])
-def test_remove_compare_vcf(tmp_path, backend_storage):
-    vcz = convert_vcf_to_vcz("sample.vcf.gz", tmp_path)
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz}")
-    assert vcztools_out.strip() == "NA00001\nNA00002\nNA00003"
-
-    remove(vcz, "NA00002", backend_storage=backend_storage)
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz}")
-    assert vcztools_out.strip() == "NA00001\nNA00003"
-
-    # check equivalence with original VCF (with sample subsetting)
-    compare_vcf_and_vcz(
-        tmp_path,
-        "view --no-version -s NA00001,NA00003 --no-update",
-        "sample.vcf.gz",
-        "view --no-version",
-        vcz,
-    )
-
-    # check sample values are missing
-    check_removed_sample(vcz, "NA00002")
-
-
-def test_remove_multiple_chunks(tmp_path):
-    vcz = convert_vcf_to_vcz("chr22.vcf.gz", tmp_path, variants_chunk_size=10)
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz}")
-    assert len(vcztools_out.strip().split("\n")) == 100
-
-    remove(vcz, "HG00100")
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz}")
-    assert "HG00100" not in vcztools_out
-    assert len(vcztools_out.strip().split("\n")) == 99
-
-    # check equivalence with original VCF (with sample subsetting)
-    reduced_samples = ",".join(vcztools_out.strip().split("\n"))
-    compare_vcf_and_vcz(
-        tmp_path,
-        f"view --no-version -s {reduced_samples} --no-update",
-        "chr22.vcf.gz",
-        "view --no-version",
-        vcz,
-    )
-
-    # check sample values are missing
-    check_removed_sample(vcz, "HG00100")
-
-
-def test_remove_icechunk(tmp_path):
-    pytest.importorskip("icechunk")
-    from icechunk import Repository
-
-    from vczstore.utils import make_icechunk_storage
-
-    vcz = convert_vcf_to_vcz_icechunk("sample.vcf.gz", tmp_path)
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz} --backend-storage icechunk")
-    assert vcztools_out.strip() == "NA00001\nNA00002\nNA00003"
-
-    icechunk_storage = make_icechunk_storage(vcz)
-    repo = Repository.open(icechunk_storage)
-
-    snapshots = [snapshot for snapshot in repo.ancestry(branch="main")]
-    assert len(snapshots) == 2
-    assert snapshots[0].message == "create"
-    assert snapshots[1].message == "Repository initialized"
-
-    remove(vcz, "NA00002", backend_storage="icechunk")
-
-    snapshots = [snapshot for snapshot in repo.ancestry(branch="main")]
-    assert len(snapshots) == 2
-    # note that 'create' has been deleted
-    assert snapshots[0].message == "remove"
-    assert snapshots[1].message == "Repository initialized"
-
-    # check samples query
-    vcztools_out, _ = run_vcztools(f"query -l {vcz} --backend-storage icechunk")
-    assert vcztools_out.strip() == "NA00001\nNA00003"
-
-    # check equivalence with original VCF (with sample subsetting)
-    compare_vcf_and_vcz(
-        tmp_path,
-        "view --no-version -s NA00001,NA00003 --no-update",
-        "sample.vcf.gz",
-        "view --no-version --backend-storage icechunk",
-        vcz,
-    )
-
-    # check sample values are missing
-    session = repo.readonly_session("main")
-    store = session.store
-    check_removed_sample(store, "NA00002")
-
-
 def test_remove_fails_for_misaligned_variant_chunks():
     vcz = make_vcz(
         variant_contig=[0, 0],