diff --git a/bio2zarr/vcf.py b/bio2zarr/vcf.py index 31b17bc..9665325 100644 --- a/bio2zarr/vcf.py +++ b/bio2zarr/vcf.py @@ -1068,14 +1068,16 @@ def iter_alleles_and_genotypes(self, start, stop, shape, num_alleles): for variant_length, alleles in zip( variant_lengths, self.iter_alleles(start, stop, num_alleles) ): - yield vcz.VariantData(variant_length, alleles, None, None) + # Stored ICF values are always at least 1D arrays; "rlen" is Number=1 + # so we must extract the scalar to avoid NumPy scalar-conversion issues. + yield vcz.VariantData(variant_length[0], alleles, None, None) else: for variant_length, alleles, (gt, phased) in zip( variant_lengths, self.iter_alleles(start, stop, num_alleles), self.iter_genotypes(shape, start, stop), ): - yield vcz.VariantData(variant_length, alleles, gt, phased) + yield vcz.VariantData(variant_length[0], alleles, gt, phased) def generate_schema( self, variants_chunk_size=None, samples_chunk_size=None, local_alleles=None diff --git a/docs/Makefile b/docs/Makefile index a295590..732920e 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -9,6 +9,7 @@ B2Z_VERSION:=$(shell PYTHONPATH=${PYPATH} \ CASTS=_static/vcf2zarr_convert.cast\ _static/vcf2zarr_explode.cast +ASCIINEMA_ARGS=-c "env PS1=\\$$\\ bash --noprofile --norc" BUILDDIR = _build @@ -36,7 +37,7 @@ sample.vcf.gz: _static/vcf2zarr_convert.cast: sample.vcf.gz rm -fR sample.vcz - asciinema-automation -d cast_scripts/vcf2zarr_convert.sh $@ + asciinema-automation -aa '$(ASCIINEMA_ARGS)' -d cast_scripts/vcf2zarr_convert.sh $@ cat _static/vcf2zarr_convert.log asciinema play _static/vcf2zarr_convert.cast cp -R sample.vcz vcf2zarr @@ -44,7 +45,7 @@ _static/vcf2zarr_convert.cast: sample.vcf.gz # TODO rename this cast _static/vcf2zarr_explode.cast: sample.vcf.gz rm -Rf sample.icf sample.vcz - asciinema-automation -d cast_scripts/vcf2zarr_explode.sh $@ + asciinema-automation -aa '$(ASCIINEMA_ARGS)' -d cast_scripts/vcf2zarr_explode.sh $@ cat _static/vcf2zarr_explode.log asciinema play _static/vcf2zarr_explode.cast cp -R sample.icf sample.vcz vcf2zarr diff --git a/docs/build.sh b/docs/build.sh index 4165d36..b91b52f 100755 --- a/docs/build.sh +++ b/docs/build.sh @@ -1,4 +1,4 @@ -#/bin/bash +#!/usr/bin/env bash # Jupyter-build doesn't have an option to automatically show the # saved reports, which makes it difficult to debug the reasons for @@ -6,7 +6,7 @@ REPORTDIR=_build/html/reports -jupyter-book build -Wn --keep-going . +jupyter-book build -W -n --keep-going . RETVAL=$? if [ $RETVAL -ne 0 ]; then if [ -e $REPORTDIR ]; then diff --git a/docs/requirements.txt b/docs/requirements.txt index 8a466d7..948df41 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ asciinema-automation bash_kernel -jupyter-book +jupyter-book<2 sphinx-click diff --git a/docs/vcf2zarr/tutorial.md b/docs/vcf2zarr/tutorial.md index 2125096..42817e3 100644 --- a/docs/vcf2zarr/tutorial.md +++ b/docs/vcf2zarr/tutorial.md @@ -62,7 +62,7 @@ of these arrays is then stored hierarchically within these directories: ```{code-cell} -tree sample.vcz +find sample.vcz -maxdepth 2 -type d | sort ``` You can get a better idea of what's being stored and the sizes diff --git a/pyproject.toml b/pyproject.toml index 50e52f2..98d9fd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,14 +65,14 @@ dev = [ "pytest-xdist", "sgkit>=0.8.0", "tqdm", - "tskit>=0.6.4", + "tskit>=0.6.4,<1", "bed_reader", "cyvcf2" ] -tskit = ["tskit>=0.6.4"] +tskit = ["tskit>=0.6.4,<1"] vcf = ["cyvcf2"] all = [ - "tskit>=0.6.4", + "tskit>=0.6.4,<1", "cyvcf2" ]