Skip to content

Commit f314f28

Browse files
authored
Allow samples chunk size to be set by create command (#107)
1 parent 0603228 commit f314f28

3 files changed

Lines changed: 34 additions & 4 deletions

File tree

tests/test_create.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,21 @@ def test_create__single():
6161
assert root["variant_allele"][0, 1] == "T"
6262

6363

64+
def test_create__override_samples_chunk_size():
65+
vcz1 = make_vcz(
66+
[0],
67+
[100],
68+
[["A", "T"]],
69+
sample_id=["S1"],
70+
call_genotype=[[[0, 0]]],
71+
samples_chunk_size=1,
72+
)
73+
vcz_out = zarr.storage.MemoryStore()
74+
create(vcz_out, vcz1, samples_chunk_size=2)
75+
root = zarr.open(vcz_out)
76+
assert root["call_genotype"].chunks[1] == 2
77+
78+
6479
def test_create__no_match():
6580
# Disjoint alts at same position → 2 output variants
6681
vcz1 = make_vcz([0], [100], [["A", "T"]])

vczstore/cli.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,16 +98,23 @@ def append(vcz1, vcz2, verbose, backend_storage, io_concurrency, require_direct_
9898
@click.command()
9999
@click.argument("vcz_out", type=click.Path())
100100
@click.argument("vczs", nargs=-1, type=click.Path())
101+
@click.option(
102+
"--samples-chunk-size",
103+
type=click.IntRange(min=1),
104+
default=None,
105+
help="Chunk size in the samples dimension",
106+
)
101107
@verbose
102108
@progress
103109
@backend_storage
104-
def create(vcz_out, vczs, verbose, progress, backend_storage):
110+
def create(vcz_out, vczs, samples_chunk_size, verbose, progress, backend_storage):
105111
"""Create a new, empty store VCZ_OUT using merged variants from VCZS"""
106112
setup_logging(verbose)
107113
call_or_error(
108114
create_function,
109115
vcz_out,
110116
*vczs,
117+
samples_chunk_size=samples_chunk_size,
111118
show_progress=progress,
112119
backend_storage=backend_storage,
113120
)

vczstore/create.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,9 @@ def _compute_merged_variants(
521521
)
522522

523523

524-
def create(vcz_out, *vczs, show_progress=False, backend_storage=None) -> None:
524+
def create(
525+
vcz_out, *vczs, samples_chunk_size=None, show_progress=False, backend_storage=None
526+
) -> None:
525527
"""Create a new, empty store vcz_out using merged variants from vczs
526528
using -m none semantics with stable variant ordering.
527529
@@ -555,6 +557,8 @@ def create(vcz_out, *vczs, show_progress=False, backend_storage=None) -> None:
555557
root1 = zarr.open(vcz1, mode="r")
556558

557559
if len(vczs) == 1:
560+
n_variants = root1["variant_contig"].shape[0]
561+
558562
out_root = open_zarr(
559563
vcz_out,
560564
mode="w",
@@ -713,13 +717,17 @@ def create(vcz_out, *vczs, show_progress=False, backend_storage=None) -> None:
713717
if var.startswith("call_"):
714718
arr = root1[var]
715719
shape = (n_variants, 0) + arr.shape[2:]
716-
# TODO: should allow sample chunk size to be overridden/enforced here
720+
chunks = arr.chunks
721+
if samples_chunk_size is None:
722+
chunks = arr.chunks
723+
else:
724+
chunks = (arr.chunks[0], samples_chunk_size) + arr.chunks[2:]
717725
create_empty_group_array(
718726
out_root,
719727
var,
720728
shape=shape,
721729
dtype=arr.dtype,
722-
chunks=arr.chunks,
730+
chunks=chunks,
723731
compressor=get_compressor_config(arr),
724732
dimension_names=array_dims(arr),
725733
)

0 commit comments

Comments
 (0)