Skip to content

Commit 9119c12

Browse files
authored
Enforce sample ID uniqueness on append (#93)
1 parent 7d637f7 commit 9119c12

2 files changed

Lines changed: 22 additions & 3 deletions

File tree

tests/test_append.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,3 +476,19 @@ def test_require_direct_copy_fails_before_mutating_when_incoming_is_not_aligned(
476476
root = zarr.open_group(store=store1, mode="r")
477477
np.testing.assert_array_equal(root["sample_id"][:], np.array(["S1", "S2"]))
478478
assert root["call_genotype"].shape == (2, 2, 2)
479+
480+
481+
def test_append_fails_when_duplicate_sample_id():
482+
store1 = _create_minimal_append_store(
483+
["S1", "S2"],
484+
_make_genotype(2, 2),
485+
samples_chunk_size=2,
486+
)
487+
store2 = _create_minimal_append_store(
488+
["S1", "S3"],
489+
_make_genotype(2, 4),
490+
samples_chunk_size=2,
491+
)
492+
493+
with pytest.raises(ValueError, match=r"Duplicate samples found: \['S1'\]"):
494+
append(store1, store2)

vczstore/append.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,12 @@ def append(
128128
)
129129

130130
# check preconditions
131+
sample_id1 = root1["sample_id"]
132+
sample_id2 = root2["sample_id"]
133+
common_samples = np.intersect1d(sample_id1, sample_id2)
134+
if common_samples.shape[0] > 0:
135+
raise ValueError(f"Duplicate samples found: {common_samples}")
136+
131137
n_variants1 = root1["variant_contig"].shape[0]
132138
n_variants2 = root2["variant_contig"].shape[0]
133139
if n_variants1 != n_variants2:
@@ -168,9 +174,6 @@ def append(
168174
)
169175

170176
# append samples
171-
sample_id1 = root1["sample_id"]
172-
sample_id2 = root2["sample_id"]
173-
174177
old_num_samples = sample_id1.shape[0]
175178
incoming_num_samples = sample_id2.shape[0]
176179
new_num_samples = old_num_samples + incoming_num_samples

0 commit comments

Comments
 (0)