Skip to content

Commit fdd38ca

Browse files
sharkinsspatialTomNicholaspre-commit-ci[bot]maxrjoneschuckwondo
authored
Refactor codebase to a new simplified Parser->ManifestStore model. (#601)
See #601 for details --------- Co-authored-by: Tom Nicholas <tom@earthmover.io> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Max Jones <14077947+maxrjones@users.noreply.github.com> Co-authored-by: Chuck Daniels <chuck@developmentseed.org> Co-authored-by: Raphael Hagen <norlandrhagen@gmail.com>
1 parent 639e8d4 commit fdd38ca

55 files changed

Lines changed: 3285 additions & 3418 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

conftest.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,8 +256,17 @@ def netcdf4_file_with_2d_coords(tmp_path: Path) -> str:
256256
def netcdf4_virtual_dataset(netcdf4_file):
257257
"""Create a virtual dataset from a NetCDF4 file."""
258258
from virtualizarr import open_virtual_dataset
259-
260-
with open_virtual_dataset(netcdf4_file, loadable_variables=[]) as ds:
259+
from virtualizarr.parsers import HDFParser
260+
from virtualizarr.tests.utils import obstore_local
261+
262+
store = obstore_local(file_url=netcdf4_file)
263+
parser = HDFParser()
264+
with open_virtual_dataset(
265+
file_url=netcdf4_file,
266+
object_store=store,
267+
parser=parser,
268+
loadable_variables=[],
269+
) as ds:
261270
yield ds
262271

263272

docs/api.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ User API
1313
Reading
1414
-------
1515

16-
.. currentmodule:: virtualizarr.backend
16+
.. currentmodule:: virtualizarr.xarray
1717
.. autosummary::
1818
:nosignatures:
1919
:toctree: generated/

pyproject.toml

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
[project]
32
name = "virtualizarr"
43
description = "Create virtual Zarr stores from archival data using xarray API"
@@ -28,7 +27,8 @@ dependencies = [
2827
"numcodecs>=0.15.1",
2928
"ujson",
3029
"packaging",
31-
"zarr>=3.0.2",
30+
"zarr>=3.0.8",
31+
"obstore>=0.5.1",
3232
]
3333

3434
# Dependency sets under optional-dependencies are available via PyPI
@@ -40,25 +40,16 @@ remote = [
4040
"aiohttp",
4141
"s3fs",
4242
]
43-
obstore = [
44-
"obstore>=0.5.1",
45-
]
46-
# non-kerchunk-based readers
43+
44+
# non-kerchunk-based parsers
4745
hdf = [
48-
"virtualizarr[remote]",
4946
"h5py",
5047
"hdf5plugin",
5148
"imagecodecs",
5249
"imagecodecs-numcodecs==2024.6.1",
53-
"obstore>=0.5.1",
5450
]
5551

56-
# kerchunk-based readers
57-
hdf5 = [
58-
"virtualizarr[remote]",
59-
"kerchunk>=0.2.8",
60-
"h5py",
61-
]
52+
# kerchunk-based parsers
6253
netcdf3 = [
6354
"virtualizarr[remote]",
6455
"kerchunk>=0.2.8",
@@ -69,11 +60,17 @@ fits = [
6960
"kerchunk>=0.2.8",
7061
"astropy",
7162
]
72-
all_readers = [
63+
kerchunk_parquet = [
64+
"virtualizarr[remote]",
65+
"fastparquet",
66+
]
67+
68+
# kerchunk
69+
all_parsers = [
7370
"virtualizarr[hdf]",
74-
"virtualizarr[hdf5]",
7571
"virtualizarr[netcdf3]",
7672
"virtualizarr[fits]",
73+
"virtualizarr[kerchunk_parquet]",
7774
]
7875

7976
# writers
@@ -183,14 +180,14 @@ run-tests-html-cov = { cmd = "pytest -n auto --run-network-tests --verbose --cov
183180

184181
# Define which features and groups to include in different pixi (similar to conda) environments)
185182
[tool.pixi.environments]
186-
min-deps = ["dev", "test", "hdf", "hdf5", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs
183+
min-deps = ["dev", "test", "hdf", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs
187184
# Inherit from min-deps to get all the test commands, along with optional dependencies
188-
test = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore"]
189-
test-py311 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py311"] # test against python 3.11
190-
test-py312 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312"] # test against python 3.12
191-
minio = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312", "minio"]
192-
upstream = ["dev", "test", "hdf", "hdf5", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"]
193-
all = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "all_readers", "all_writers"]
185+
test = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib"]
186+
test-py311 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py311"] # test against python 3.11
187+
test-py312 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py312"] # test against python 3.12
188+
minio = ["dev", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py312", "minio"]
189+
upstream = ["dev", "test", "hdf", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"]
190+
all = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk","kerchunk_parquet", "hdf5-lib", "all_parsers", "all_writers"]
194191
docs = ["docs"]
195192

196193
# Define commands to run within the docs environment
@@ -201,8 +198,8 @@ readthedocs = "rm -rf $READTHEDOCS_OUTPUT/html && cp -r docs/_build/html $READTH
201198

202199
# Define commands to run within the docs environment
203200
[tool.pixi.feature.minio.tasks]
204-
run-tests = { cmd = "pytest virtualizarr/tests/test_manifests/test_store.py virtualizarr/tests/test_readers/test_hdf/test_hdf_manifest_store.py --run-minio-tests --run-network-tests --verbose" }
205-
run-tests-xml-cov = { cmd = "pytest virtualizarr/tests/test_manifests/test_store.py virtualizarr/tests/test_readers/test_hdf/test_hdf_manifest_store.py --run-minio-tests --run-network-tests --verbose --cov-report=xml" }
201+
run-tests = { cmd = "pytest virtualizarr/tests/test_manifests/test_store.py virtualizarr/tests/test_parsers/test_hdf/test_hdf_manifest_store.py --run-minio-tests --run-network-tests --verbose" }
202+
run-tests-xml-cov = { cmd = "pytest virtualizarr/tests/test_manifests/test_store.py virtualizarr/tests/test_parsers/test_hdf/test_hdf_manifest_store.py --run-minio-tests --run-network-tests --verbose --cov-report=xml" }
206203

207204
[tool.setuptools_scm]
208205
fallback_version = "9999"

virtualizarr/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
VirtualiZarrDatasetAccessor,
55
VirtualiZarrDataTreeAccessor,
66
)
7-
from virtualizarr.backend import open_virtual_dataset, open_virtual_mfdataset
87
from virtualizarr.manifests import ChunkManifest, ManifestArray
8+
from virtualizarr.xarray import open_virtual_dataset, open_virtual_mfdataset
99

1010
try:
1111
__version__ = _version("virtualizarr")

0 commit comments

Comments
 (0)