diff --git a/Cargo.lock b/Cargo.lock index dcad309..c411d1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6140,15 +6140,19 @@ dependencies = [ "arrow-array", "arrow-schema", "async-trait", + "chrono", "datafusion", "futures", + "geo", "geoarrow-array", "geoarrow-schema", "geodatafusion", "icechunk", "object_store 0.12.5", + "tempfile", "thiserror 2.0.18", "tokio", + "wkb", "zarrs", "zarrs_filesystem", "zarrs_icechunk", diff --git a/Cargo.toml b/Cargo.toml index f1df2e3..59e7cfe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,5 +24,9 @@ zarrs_object_store = "0.5.0" zarrs_storage = { version = "0.4.0", features = ["async"] } [dev-dependencies] +chrono = "0.4.44" +geo = "0.31" geoarrow-array = "0.7.0" +tempfile = "3" tokio = { version = "1.48", features = ["macros", "rt-multi-thread"] } +wkb = "0.9" diff --git a/data/icechunk/manifests/KC53SCXBBZQR3HR12Z7G b/data/icechunk/manifests/KC53SCXBBZQR3HR12Z7G deleted file mode 100644 index eabe6dc..0000000 Binary files a/data/icechunk/manifests/KC53SCXBBZQR3HR12Z7G and /dev/null differ diff --git a/data/icechunk/manifests/M0HCK3P6N722F5WSDJZG b/data/icechunk/manifests/M0HCK3P6N722F5WSDJZG deleted file mode 100644 index 97215fb..0000000 Binary files a/data/icechunk/manifests/M0HCK3P6N722F5WSDJZG and /dev/null differ diff --git a/data/icechunk/manifests/S9CF7DYNZ4GHPJ1JNTSG b/data/icechunk/manifests/S9CF7DYNZ4GHPJ1JNTSG deleted file mode 100644 index 1eb9b69..0000000 Binary files a/data/icechunk/manifests/S9CF7DYNZ4GHPJ1JNTSG and /dev/null differ diff --git a/data/icechunk/refs/branch.main/ref.json b/data/icechunk/refs/branch.main/ref.json deleted file mode 100644 index 1f3bbd7..0000000 --- a/data/icechunk/refs/branch.main/ref.json +++ /dev/null @@ -1 +0,0 @@ -{"snapshot":"ZKWW9E6BW6YFNNDKESR0"} \ No newline at end of file diff --git a/data/icechunk/snapshots/1CECHNKREP0F1RSTCMT0 b/data/icechunk/snapshots/1CECHNKREP0F1RSTCMT0 deleted file mode 100644 index b5a1023..0000000 Binary files a/data/icechunk/snapshots/1CECHNKREP0F1RSTCMT0 and /dev/null differ diff --git a/data/icechunk/snapshots/ZKWW9E6BW6YFNNDKESR0 b/data/icechunk/snapshots/ZKWW9E6BW6YFNNDKESR0 deleted file mode 100644 index 819aa66..0000000 Binary files a/data/icechunk/snapshots/ZKWW9E6BW6YFNNDKESR0 and /dev/null differ diff --git a/data/icechunk/transactions/ZKWW9E6BW6YFNNDKESR0 b/data/icechunk/transactions/ZKWW9E6BW6YFNNDKESR0 deleted file mode 100644 index 364f8f0..0000000 Binary files a/data/icechunk/transactions/ZKWW9E6BW6YFNNDKESR0 and /dev/null differ diff --git a/data/zarr_store.zarr/meta/bbox/c/0 b/data/zarr_store.zarr/meta/bbox/c/0 deleted file mode 100644 index 989c27c..0000000 Binary files a/data/zarr_store.zarr/meta/bbox/c/0 and /dev/null differ diff --git a/data/zarr_store.zarr/meta/bbox/zarr.json b/data/zarr_store.zarr/meta/bbox/zarr.json deleted file mode 100644 index 1534872..0000000 --- a/data/zarr_store.zarr/meta/bbox/zarr.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "shape": [ - 3 - ], - "data_type": "variable_length_bytes", - "chunk_grid": { - "name": "regular", - "configuration": { - "chunk_shape": [ - 3 - ] - } - }, - "chunk_key_encoding": { - "name": "default", - "configuration": { - "separator": "/" - } - }, - "fill_value": "", - "codecs": [ - { - "name": "vlen-bytes", - "configuration": {} - }, - { - "name": "zstd", - "configuration": { - "level": 0, - "checksum": false - } - } - ], - "attributes": {}, - "zarr_format": 3, - "node_type": "array", - "storage_transformers": [] -} \ No newline at end of file diff --git a/data/zarr_store.zarr/meta/collection/c/0 b/data/zarr_store.zarr/meta/collection/c/0 deleted file mode 100644 index e41cab4..0000000 Binary files a/data/zarr_store.zarr/meta/collection/c/0 and /dev/null differ diff --git a/data/zarr_store.zarr/meta/collection/zarr.json b/data/zarr_store.zarr/meta/collection/zarr.json deleted file mode 100644 index 359c83f..0000000 --- a/data/zarr_store.zarr/meta/collection/zarr.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "shape": [ - 3 - ], - "data_type": "string", - "chunk_grid": { - "name": "regular", - "configuration": { - "chunk_shape": [ - 3 - ] - } - }, - "chunk_key_encoding": { - "name": "default", - "configuration": { - "separator": "/" - } - }, - "fill_value": "", - "codecs": [ - { - "name": "vlen-utf8", - "configuration": {} - }, - { - "name": "zstd", - "configuration": { - "level": 0, - "checksum": false - } - } - ], - "attributes": {}, - "zarr_format": 3, - "node_type": "array", - "storage_transformers": [] -} \ No newline at end of file diff --git a/data/zarr_store.zarr/meta/date/c/0 b/data/zarr_store.zarr/meta/date/c/0 deleted file mode 100644 index e335d79..0000000 Binary files a/data/zarr_store.zarr/meta/date/c/0 and /dev/null differ diff --git a/data/zarr_store.zarr/meta/date/zarr.json b/data/zarr_store.zarr/meta/date/zarr.json deleted file mode 100644 index 0a34c05..0000000 --- a/data/zarr_store.zarr/meta/date/zarr.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "shape": [ - 3 - ], - "data_type": { - "name": "numpy.datetime64", - "configuration": { - "unit": "ms", - "scale_factor": 1 - } - }, - "chunk_grid": { - "name": "regular", - "configuration": { - "chunk_shape": [ - 3 - ] - } - }, - "chunk_key_encoding": { - "name": "default", - "configuration": { - "separator": "/" - } - }, - "fill_value": -9223372036854775808, - "codecs": [ - { - "name": "bytes", - "configuration": { - "endian": "little" - } - }, - { - "name": "zstd", - "configuration": { - "level": 0, - "checksum": false - } - } - ], - "attributes": {}, - "zarr_format": 3, - "node_type": "array", - "storage_transformers": [] -} \ No newline at end of file diff --git a/data/zarr_store.zarr/meta/zarr.json b/data/zarr_store.zarr/meta/zarr.json deleted file mode 100644 index d1f97da..0000000 --- a/data/zarr_store.zarr/meta/zarr.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "attributes": {}, - "zarr_format": 3, - "node_type": "group" -} \ No newline at end of file diff --git a/data/zarr_store.zarr/zarr.json b/data/zarr_store.zarr/zarr.json deleted file mode 100644 index d1f97da..0000000 --- a/data/zarr_store.zarr/zarr.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "attributes": {}, - "zarr_format": 3, - "node_type": "group" -} \ No newline at end of file diff --git a/python/pyproject.toml b/python/pyproject.toml index 0e81389..b1fb1f1 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -31,3 +31,24 @@ dev-dependencies = [ "pytest", "shapely", ] + + +[tool.ruff] +line-length = 88 +target-version = "py311" + +[tool.ruff.lint] +select = ["E", "F", "I"] +per-file-ignores = {} + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" + +[tool.pytest.ini_options] +# Directories to search for tests +testpaths = [ + "tests", +] diff --git a/python/python/zarr_datafusion_search/__init__.py b/python/python/zarr_datafusion_search/__init__.py index df4465a..e542958 100644 --- a/python/python/zarr_datafusion_search/__init__.py +++ b/python/python/zarr_datafusion_search/__init__.py @@ -1,7 +1,6 @@ from __future__ import annotations -from ._rust import ZarrTable -from ._rust import ___version +from ._rust import ZarrTable, ___version __version__: str = ___version() diff --git a/python/tests/conftest.py b/python/tests/conftest.py new file mode 100644 index 0000000..86793d7 --- /dev/null +++ b/python/tests/conftest.py @@ -0,0 +1,51 @@ +"""Pytest fixtures for zarr-datafusion tests.""" +import pytest + +from .fixtures import create_test_icechunk_data, create_test_zarr_data + + +@pytest.fixture +def zarr_store(tmp_path): + """Create a temporary zarr store with test data. + + Yields the path to the zarr store, which is automatically cleaned up after the test. + """ + store_path = tmp_path / "zarr_store.zarr" + create_test_zarr_data(store_path) + yield store_path + + +@pytest.fixture +def icechunk_store(tmp_path): + """Create a temporary icechunk store with test data. + + Yields the path to the icechunk store, which is automatically cleaned up + after the test. + """ + store_path = tmp_path / "icechunk" + create_test_icechunk_data(store_path) + yield store_path + + +@pytest.fixture(scope="session") +def session_zarr_store(tmp_path_factory): + """Create a session-scoped zarr store with test data. + + This fixture is created once per test session and shared across all tests. + Useful for read-only tests that don't modify the data. + """ + store_path = tmp_path_factory.mktemp("data") / "zarr_store.zarr" + create_test_zarr_data(store_path) + yield store_path + + +@pytest.fixture(scope="session") +def session_icechunk_store(tmp_path_factory): + """Create a session-scoped icechunk store with test data. + + This fixture is created once per test session and shared across all tests. + Useful for read-only tests that don't modify the data. + """ + store_path = tmp_path_factory.mktemp("data") / "icechunk" + create_test_icechunk_data(store_path) + yield store_path diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py new file mode 100644 index 0000000..2952697 --- /dev/null +++ b/python/tests/fixtures.py @@ -0,0 +1,89 @@ +from pathlib import Path + +import icechunk +import numpy as np +import shapely +import zarr +from zarr.core.group import Group +from zarr.dtype import VariableLengthBytes, VariableLengthUTF8 + + +def generate_test_data(root: Group) -> None: + """Create test zarr data + + Adds to a zarr store: + - a /meta group which contains + - date: datetime64[ms] array with dates [2023-01-01, 2023-01-02, + 2023-01-03] + - collection: variable length UTF8 array with ["collection_a", + "collection_b", "collection_c"] + - bbox: variable length bytes (WKB) array with boxes at (-10,-10,10,10), + (-20,-20,20,20), (-30,-30,30,30) + + Args: + root: The root Group in the target Zarr or Icechunk store. + """ + meta = root.create_group("meta") + + # Create date array + date_data = np.array( + ["2023-01-01", "2023-01-02", "2023-01-03"], dtype="datetime64[ms]" + ) + meta.create_array("date", data=date_data) + + # Create collection array + collection_data = ["collection_a", "collection_b", "collection_c"] + collection_array = meta.create_array( + "collection", + shape=(len(collection_data),), + dtype=VariableLengthUTF8(), + ) + collection_array[:] = collection_data + + # Create bbox array with WKB-encoded geometries + bbox_data = shapely.to_wkb( + [ + shapely.box(-10.0, -10.0, 10.0, 10.0), + shapely.box(-20.0, -20.0, 20.0, 20.0), + shapely.box(-30.0, -30.0, 30.0, 30.0), + ] + ) + + bbox_array = meta.create_array( + "bbox", + shape=(len(bbox_data),), + dtype=VariableLengthBytes(), + ) + bbox_array[:] = bbox_data + + +def create_test_zarr_data(store_path: Path | str) -> None: + """Create test store data at the specified path. + + Args: + store_path: Path where the Zarr store will be created. Can be string + or Path object. + + """ + store_path = Path(store_path) if isinstance(store_path, str) else store_path + root = zarr.open_group(store_path, mode="w", zarr_format=3) + generate_test_data(root=root) + + +def create_test_icechunk_data(store_path: Path | str) -> None: + """Create test icechunk store at the specified path. + + Args: + store_path: Path where the Icechunk repository will be created. + Can be string or Path object. + + """ + store_path = Path(store_path) if isinstance(store_path, str) else store_path + + storage = icechunk.local_filesystem_storage(str(store_path)) + repo = icechunk.Repository.create(storage) + session = repo.writable_session("main") + + root = zarr.open_group(session.store, mode="w", zarr_format=3) + generate_test_data(root=root) + session.commit("Initial test data") diff --git a/python/tests/test_datafusion.py b/python/tests/test_datafusion.py index ce6cc37..fb3fdc1 100644 --- a/python/tests/test_datafusion.py +++ b/python/tests/test_datafusion.py @@ -1,5 +1,3 @@ -from pathlib import Path - import icechunk import pytest from datafusion import SessionContext @@ -7,13 +5,11 @@ from obstore.store import LocalStore from zarr_datafusion_search import ZarrTable -ROOT_DIR = Path(__file__).parent.parent.parent - -def test_zarr_scan(): +def test_zarr_scan(session_zarr_store): + """Test basic zarr scanning with DataFusion.""" ctx = SessionContext() - zarr_path = ROOT_DIR / "data" / "zarr_store.zarr" - zarr_table = ZarrTable(str(zarr_path), "/meta") + zarr_table = ZarrTable(str(session_zarr_store), "/meta") ctx.register_table("zarr_data", zarr_table) @@ -21,23 +17,27 @@ def test_zarr_scan(): df = ctx.sql(sql) df.show() -def test_spatial_functions_registered(): + +def test_spatial_functions_registered(session_zarr_store): + """Test that spatial functions work with zarr data.""" ctx = SessionContext() register_all(ctx) - zarr_path = ROOT_DIR / "data" / "zarr_store.zarr" - zarr_table = ZarrTable(str(zarr_path), "/meta") + zarr_table = ZarrTable(str(session_zarr_store), "/meta") ctx.register_table("zarr_data", zarr_table) sql = ( "SELECT collection FROM zarr_data " - "WHERE ST_Intersects(bbox, ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))'))" + "WHERE ST_Intersects(bbox, " + "ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))'))" ) df = ctx.sql(sql) df.show() + @pytest.mark.asyncio -async def test_zarr_scan_from_obstore(): - store = LocalStore(ROOT_DIR / "data" / "zarr_store.zarr") +async def test_zarr_scan_from_obstore(session_zarr_store): + """Test zarr scanning from object store.""" + store = LocalStore(session_zarr_store) zarr_table = await ZarrTable.from_obstore(store, "/meta") ctx = SessionContext() @@ -51,8 +51,9 @@ async def test_zarr_scan_from_obstore(): @pytest.mark.asyncio -async def test_zarr_scan_from_icechunk(): - storage = icechunk.local_filesystem_storage(ROOT_DIR / "data" / "icechunk") +async def test_zarr_scan_from_icechunk(session_icechunk_store): + """Test zarr scanning from icechunk.""" + storage = icechunk.local_filesystem_storage(session_icechunk_store) repo = icechunk.Repository.open(storage) session = repo.readonly_session("main") diff --git a/scripts/generate_data.py b/scripts/generate_data.py deleted file mode 100644 index 3ce1a3c..0000000 --- a/scripts/generate_data.py +++ /dev/null @@ -1,37 +0,0 @@ -import numpy as np -import shapely -import zarr -from zarr.dtype import VariableLengthBytes, VariableLengthUTF8 - - -# Root of the Zarr store -root = zarr.open_group("data/zarr_store.zarr", mode="w", zarr_format=3) - -meta = root.create_group("meta") - -date_data = np.array(["2023-01-01", "2023-01-02", "2023-01-03"], dtype="datetime64[ms]") -meta.create_array("date", data=date_data) - -collection_data = ["collection_a", "collection_b", "collection_c"] -collection_array = meta.create_array( - "collection", - shape=(len(collection_data),), - dtype=VariableLengthUTF8(), -) -collection_array[:] = collection_data - - -bbox_data = shapely.to_wkb( - [ - shapely.box(-10.0, -10.0, 10.0, 10.0), - shapely.box(-20.0, -20.0, 20.0, 20.0), - shapely.box(-30.0, -30.0, 30.0, 30.0), - ] -) - -bbox_array = meta.create_array( - "bbox", - shape=(len(bbox_data),), - dtype=VariableLengthBytes(), -) -bbox_array[:] = bbox_data diff --git a/scripts/generate_icechunk.py b/scripts/generate_icechunk.py deleted file mode 100644 index 5400df8..0000000 --- a/scripts/generate_icechunk.py +++ /dev/null @@ -1,41 +0,0 @@ -import icechunk -import zarr -import numpy as np -import shapely - -from zarr.dtype import VariableLengthBytes, VariableLengthUTF8 - -location = "data/icechunk" -storage = icechunk.local_filesystem_storage(location) -repo = icechunk.Repository.create(storage) -session = repo.writable_session("main") - -root = zarr.open_group(session.store, mode="w", zarr_format=3) -meta = root.create_group("meta") - -date_data = np.array(["2023-01-01", "2023-01-02", "2023-01-03"], dtype="datetime64[ms]") -meta.create_array("date", data=date_data) - -meta.create_array( - "collection", - shape=(3,), - dtype=VariableLengthUTF8(), -) -meta["collection"][...] = ["collection_a", "collection_b", "collection_c"] - -bbox_data = shapely.to_wkb( - [ - shapely.box(-10.0, -10.0, 10.0, 10.0), - shapely.box(-20.0, -20.0, 20.0, 20.0), - shapely.box(-30.0, -30.0, 30.0, 30.0), - ] -) - -meta.create_array( - "bbox", - shape=len(bbox_data), - dtype=VariableLengthBytes(), -) -meta["bbox"][...] = bbox_data - -session.commit("First") diff --git a/src/schema.rs b/src/schema.rs index f34ab2a..2197abb 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -131,33 +131,39 @@ mod tests { use std::sync::Arc; use super::*; + use crate::testing::utils::get_local_zarr_store; use zarrs_filesystem::FilesystemStore; - #[test] - fn test_schema_from_zarr_group() { - let storage = Arc::new(FilesystemStore::new("data/zarr_store.zarr").unwrap()); - - let group = Group::open(storage.clone(), "/meta").unwrap(); - let schema = group_arrays_schema(&group).unwrap(); - - let geoarrow_metadata = Arc::new(geoarrow_schema::Metadata::new( - Crs::from_authority_code("EPSG:4326".to_string()), - None, - )); - - let expected_fields = vec![ - Arc::new( - Field::new("bbox", DataType::BinaryView, false) - .with_extension_type(WkbType::new(geoarrow_metadata)), - ), - Arc::new(Field::new("collection", DataType::Utf8View, false)), - Arc::new(Field::new( - "date", - DataType::Timestamp(TimeUnit::Millisecond, None), - false, - )), - ]; - let expected_schema = Arc::new(Schema::new(expected_fields)); - assert_eq!(&schema, &expected_schema); + #[tokio::test] + async fn test_schema_from_zarr_group() { + let wrapper = get_local_zarr_store().await; + let path = wrapper.get_store_path(); + + { + let storage = Arc::new(FilesystemStore::new(path).unwrap()); + + let group = Group::open(storage.clone(), "/meta").unwrap(); + let schema = group_arrays_schema(&group).unwrap(); + + let geoarrow_metadata = Arc::new(geoarrow_schema::Metadata::new( + Crs::from_authority_code("EPSG:4326".to_string()), + None, + )); + + let expected_fields = vec![ + Arc::new( + Field::new("bbox", DataType::BinaryView, false) + .with_extension_type(WkbType::new(geoarrow_metadata)), + ), + Arc::new(Field::new("collection", DataType::Utf8View, false)), + Arc::new(Field::new( + "date", + DataType::Timestamp(TimeUnit::Millisecond, None), + false, + )), + ]; + let expected_schema = Arc::new(Schema::new(expected_fields)); + assert_eq!(&schema, &expected_schema); + } } } diff --git a/src/table_provider.rs b/src/table_provider.rs index 3bc10d0..42d01b8 100644 --- a/src/table_provider.rs +++ b/src/table_provider.rs @@ -458,12 +458,17 @@ impl DisplayAs for ZarrExec { #[cfg(test)] mod tests { + use super::*; + use crate::testing::utils::get_local_zarr_store; use datafusion::prelude::SessionContext; #[tokio::test] async fn test_basic_table_provider() { - let provider = ZarrTableProvider::new_filesystem("data/zarr_store.zarr", "/meta").unwrap(); + let wrapper = get_local_zarr_store().await; + let path = wrapper.get_store_path(); + + let provider = ZarrTableProvider::new_filesystem(path, "/meta").unwrap(); // Register with DataFusion let ctx = SessionContext::new(); @@ -484,7 +489,10 @@ mod tests { #[tokio::test] #[ignore = "Projection support"] async fn test_table_provider_with_sql() { - let provider = ZarrTableProvider::new_filesystem("data/zarr_store.zarr", "/meta").unwrap(); + let wrapper = get_local_zarr_store().await; + let path = wrapper.get_store_path(); + + let provider = ZarrTableProvider::new_filesystem(path, "/meta").unwrap(); // Register with DataFusion let ctx = SessionContext::new(); @@ -519,25 +527,27 @@ mod tests { async fn test_st_intersects_selects_matching_record() { use arrow_array::Array; + let wrapper = get_local_zarr_store().await; + let path = wrapper.get_store_path(); + let ctx = SessionContext::new(); register_spatial_functions(&ctx).expect("Failed to register spatial functions"); - let provider = ZarrTableProvider::new_filesystem("data/zarr_store.zarr", "/meta") - .expect("Failed to create table provider"); + let provider = ZarrTableProvider::new_filesystem(path, "/meta").unwrap(); ctx.register_table("zarr_data", Arc::new(provider)) .expect("Failed to register table"); // Query with a polygon that intersects collection_a // The query box (0,0) to (5,5) is within collection_a's bbox (-10,-10) to (10,10) let sql = " - SELECT collection FROM zarr_data - WHERE ST_Intersects( - bbox, - ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))') - ) - ORDER BY collection - "; + SELECT collection FROM zarr_data + WHERE ST_Intersects( + bbox, + ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))') + ) + ORDER BY collection + "; let df = ctx.sql(sql).await.expect("Failed to execute query"); let batches = df.collect().await.expect("Failed to collect results"); @@ -571,24 +581,26 @@ mod tests { /// Test that ST_Intersects correctly returns no records when query geometry doesn't intersect. #[tokio::test] async fn test_st_intersects_no_match() { + let wrapper = get_local_zarr_store().await; + let path = wrapper.get_store_path(); + let ctx = SessionContext::new(); register_spatial_functions(&ctx).expect("Failed to register spatial functions"); + let provider = ZarrTableProvider::new_filesystem(path, "/meta").unwrap(); - let provider = ZarrTableProvider::new_filesystem("data/zarr_store.zarr", "/meta") - .expect("Failed to create table provider"); ctx.register_table("zarr_data", Arc::new(provider)) .expect("Failed to register table"); // Query with a polygon that doesn't intersect any of the test data // The query box (100,100) to (110,110) is far from all test bboxes let sql = " - SELECT collection FROM zarr_data - WHERE ST_Intersects( - bbox, - ST_GeomFromText('POLYGON((100 100, 100 110, 110 110, 110 100, 100 100))') - ) - "; + SELECT collection FROM zarr_data + WHERE ST_Intersects( + bbox, + ST_GeomFromText('POLYGON((100 100, 100 110, 110 110, 110 100, 100 100))') + ) + "; let df = ctx.sql(sql).await.expect("Failed to execute query"); let batches = df.collect().await.expect("Failed to collect results"); @@ -601,25 +613,27 @@ mod tests { async fn test_st_intersects_multiple_matches() { use arrow_array::Array; + let wrapper = get_local_zarr_store().await; + let path = wrapper.get_store_path(); + let ctx = SessionContext::new(); register_spatial_functions(&ctx).expect("Failed to register spatial functions"); - let provider = ZarrTableProvider::new_filesystem("data/zarr_store.zarr", "/meta") - .expect("Failed to create table provider"); + let provider = ZarrTableProvider::new_filesystem(path, "/meta").unwrap(); ctx.register_table("zarr_data", Arc::new(provider)) .expect("Failed to register table"); // Query with a polygon that intersects collection_a and collection_b // The query box (-15,-15) to (15,15) overlaps with both a and b but possibly not c let sql = " - SELECT collection FROM zarr_data - WHERE ST_Intersects( - bbox, - ST_GeomFromText('POLYGON((-15 -15, -15 15, 15 15, 15 -15, -15 -15))') - ) - ORDER BY collection - "; + SELECT collection FROM zarr_data + WHERE ST_Intersects( + bbox, + ST_GeomFromText('POLYGON((-15 -15, -15 15, 15 15, 15 -15, -15 -15))') + ) + ORDER BY collection + "; let df = ctx.sql(sql).await.expect("Failed to execute query"); let batches = df.collect().await.expect("Failed to collect results"); diff --git a/src/testing/datafusion.rs b/src/testing/datafusion.rs index c281a04..938357e 100644 --- a/src/testing/datafusion.rs +++ b/src/testing/datafusion.rs @@ -8,9 +8,15 @@ use icechunk::{Repository, RepositoryConfig}; use tokio::runtime::Handle; use crate::table_provider::ZarrTableProvider; +use crate::testing::utils::get_local_icechunk_store; -async fn create_icechunk_table_provider() -> icechunk::session::Session { - let storage = icechunk::new_local_filesystem_storage(Path::new("data/icechunk")) +#[tokio::test] +async fn test_datafusion() { + let ctx = SessionContext::new(); + + let wrapper = get_local_icechunk_store().await; + let path = wrapper.get_store_path(); + let storage = icechunk::new_local_filesystem_storage(Path::new(&path)) .await .unwrap(); let config = RepositoryConfig::default(); @@ -18,15 +24,8 @@ async fn create_icechunk_table_provider() -> icechunk::session::Session { .await .unwrap(); let version_info = VersionInfo::BranchTipRef("main".to_string()); - repo.readonly_session(&version_info).await.unwrap() -} - -#[tokio::test] -async fn test_datafusion() { - let ctx = SessionContext::new(); + let icechunk_session = repo.readonly_session(&version_info).await.unwrap(); - // Add icechunk session - let icechunk_session = create_icechunk_table_provider().await; let table_provider = Arc::new( ZarrTableProvider::new_icechunk(icechunk_session, Handle::current(), "/meta") .await diff --git a/src/testing/iter_group.rs b/src/testing/iter_group.rs index 4d05abc..16fcf1a 100644 --- a/src/testing/iter_group.rs +++ b/src/testing/iter_group.rs @@ -1,3 +1,5 @@ +use crate::testing::utils::get_local_icechunk_store; +use crate::testing::utils::get_local_zarr_store; use icechunk::{Repository, RepositoryConfig, repository::VersionInfo}; use std::collections::HashMap; use std::path::Path; @@ -6,19 +8,26 @@ use zarrs::group::Group; use zarrs_filesystem::FilesystemStore; use zarrs_icechunk::AsyncIcechunkStore; -#[test] -fn test_load_group() { - let storage = Arc::new(FilesystemStore::new("data/zarr_store.zarr").unwrap()); +#[tokio::test] +async fn test_load_group() { + let wrapper = get_local_zarr_store().await; + let path = wrapper.get_store_path(); - let group = Group::open(storage.clone(), "/meta").unwrap(); - dbg!(group.path()); + { + let storage = Arc::new(FilesystemStore::new(path).unwrap()); + let group = Group::open(storage.clone(), "/meta").unwrap(); + dbg!(group.path()); + } } #[tokio::test] async fn test_load_group_icechunk() { - let storage = icechunk::new_local_filesystem_storage(Path::new("data/icechunk")) + let wrapper = get_local_icechunk_store().await; + let path = wrapper.get_store_path(); + let storage = icechunk::new_local_filesystem_storage(Path::new(&path)) .await .unwrap(); + let config = RepositoryConfig::default(); let repo = Repository::open(Some(config), storage, HashMap::new()) .await diff --git a/src/testing/load_into_arrow.rs b/src/testing/load_into_arrow.rs index a28a74a..c8e0a69 100644 --- a/src/testing/load_into_arrow.rs +++ b/src/testing/load_into_arrow.rs @@ -1,3 +1,5 @@ +use crate::testing::utils::get_local_icechunk_store; +use crate::testing::utils::get_local_zarr_store; use arrow_array::{ArrayRef, RecordBatch, StringArray, TimestampMillisecondArray}; use arrow_schema::{DataType, Field, Schema, TimeUnit}; use geoarrow_array::GeoArrowArray; @@ -12,102 +14,108 @@ use zarrs::array_subset::ArraySubset; use zarrs_filesystem::FilesystemStore; use zarrs_icechunk::AsyncIcechunkStore; -#[test] -fn test_load_zarrs_into_arrow_record_batch() { - // Create a filesystem store pointing to the zarr store - let store = Arc::new(FilesystemStore::new("data/zarr_store.zarr").unwrap()); - - // Load collection array (strings) - let collection_array = Array::open(store.clone(), "/meta/collection").unwrap(); - let collection_subset = ArraySubset::new_with_shape(collection_array.shape().to_vec()); - let collection_data: Vec = collection_array - .retrieve_array_subset_elements(&collection_subset) - .unwrap(); - - // Load date array (datetime64[ms]) - let date_array = Array::open(store.clone(), "/meta/date").unwrap(); - let date_subset = ArraySubset::new_with_shape(date_array.shape().to_vec()); - let date_data: Vec = date_array - .retrieve_array_subset_elements(&date_subset) - .unwrap(); - - // Load bbox array (binary data representing WKB geometries) - let bbox_array = Array::open(store.clone(), "/meta/bbox").unwrap(); - let bbox_subset = ArraySubset::new_with_shape(bbox_array.shape().to_vec()); - let bbox_data: Vec> = bbox_array - .retrieve_array_subset_elements(&bbox_subset) +#[tokio::test] +async fn test_load_zarrs_into_arrow_record_batch() { + let wrapper = get_local_zarr_store().await; + let path = wrapper.get_store_path(); + + { + let store = Arc::new(FilesystemStore::new(path).unwrap()); + + // Load collection array (strings) + let collection_array = Array::open(store.clone(), "/meta/collection").unwrap(); + let collection_subset = ArraySubset::new_with_shape(collection_array.shape().to_vec()); + let collection_data: Vec = collection_array + .retrieve_array_subset_elements(&collection_subset) + .unwrap(); + + // Load date array (datetime64[ms]) + let date_array = Array::open(store.clone(), "/meta/date").unwrap(); + let date_subset = ArraySubset::new_with_shape(date_array.shape().to_vec()); + let date_data: Vec = date_array + .retrieve_array_subset_elements(&date_subset) + .unwrap(); + + // Load bbox array (binary data representing WKB geometries) + let bbox_array = Array::open(store.clone(), "/meta/bbox").unwrap(); + let bbox_subset = ArraySubset::new_with_shape(bbox_array.shape().to_vec()); + let bbox_data: Vec> = bbox_array + .retrieve_array_subset_elements(&bbox_subset) + .unwrap(); + + // Create Arrow arrays from the loaded data + let collection_arrow: ArrayRef = Arc::new(StringArray::from(collection_data.clone())); + let date_arrow: ArrayRef = Arc::new(TimestampMillisecondArray::from(date_data.clone())); + let wkb_crs = Crs::from_authority_code("EPSG:4326".to_string()); + let wkb_metadata = Arc::new(geoarrow_schema::Metadata::new(wkb_crs, None)); + let bbox_refs: Vec<&[u8]> = bbox_data.iter().map(|v| v.as_slice()).collect(); + let wkb_arrow = WkbArray::new(bbox_refs.into(), wkb_metadata); + + // Define the schema + let schema = Arc::new(Schema::new(vec![ + Field::new("collection", DataType::Utf8, false), + Field::new( + "date", + DataType::Timestamp(TimeUnit::Millisecond, None), + false, + ), + wkb_arrow.data_type().to_field("bbox", false), + ])); + + // Create the RecordBatch + let record_batch = RecordBatch::try_new( + schema.clone(), + vec![collection_arrow, date_arrow, wkb_arrow.into_array_ref()], + ) .unwrap(); - // Create Arrow arrays from the loaded data - let collection_arrow: ArrayRef = Arc::new(StringArray::from(collection_data.clone())); - let date_arrow: ArrayRef = Arc::new(TimestampMillisecondArray::from(date_data.clone())); - let wkb_crs = Crs::from_authority_code("EPSG:4326".to_string()); - let wkb_metadata = Arc::new(geoarrow_schema::Metadata::new(wkb_crs, None)); - let bbox_refs: Vec<&[u8]> = bbox_data.iter().map(|v| v.as_slice()).collect(); - let wkb_arrow = WkbArray::new(bbox_refs.into(), wkb_metadata); - - // Define the schema - let schema = Arc::new(Schema::new(vec![ - Field::new("collection", DataType::Utf8, false), - Field::new( - "date", - DataType::Timestamp(TimeUnit::Millisecond, None), - false, - ), - wkb_arrow.data_type().to_field("bbox", false), - ])); - - // Create the RecordBatch - let record_batch = RecordBatch::try_new( - schema.clone(), - vec![collection_arrow, date_arrow, wkb_arrow.into_array_ref()], - ) - .unwrap(); - - // Print the record batch - println!("Created Arrow RecordBatch from Zarr arrays:"); - println!(" Schema: {:?}", record_batch.schema()); - println!(" Num rows: {}", record_batch.num_rows()); - println!(" Num columns: {}", record_batch.num_columns()); - println!("\nData:"); - for i in 0..record_batch.num_rows() { - println!( - " Row {}: collection='{}', date={}, bbox len={}", - i, - collection_data[i], - date_data[i], - bbox_data[i].len() - ); + // Print the record batch + println!("Created Arrow RecordBatch from Zarr arrays:"); + println!(" Schema: {:?}", record_batch.schema()); + println!(" Num rows: {}", record_batch.num_rows()); + println!(" Num columns: {}", record_batch.num_columns()); + println!("\nData:"); + for i in 0..record_batch.num_rows() { + println!( + " Row {}: collection='{}', date={}, bbox len={}", + i, + collection_data[i], + date_data[i], + bbox_data[i].len() + ); + } + + // Assertions + assert_eq!(record_batch.num_rows(), 3); + assert_eq!(record_batch.num_columns(), 3); + assert_eq!(record_batch.schema().fields().len(), 3); + + // Verify the data + let collection_col = record_batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(collection_col.value(0), "collection_a"); + assert_eq!(collection_col.value(1), "collection_b"); + assert_eq!(collection_col.value(2), "collection_c"); + + let date_col = record_batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(date_col.value(0), 1672531200000); + assert_eq!(date_col.value(1), 1672617600000); + assert_eq!(date_col.value(2), 1672704000000); } - - // Assertions - assert_eq!(record_batch.num_rows(), 3); - assert_eq!(record_batch.num_columns(), 3); - assert_eq!(record_batch.schema().fields().len(), 3); - - // Verify the data - let collection_col = record_batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(collection_col.value(0), "collection_a"); - assert_eq!(collection_col.value(1), "collection_b"); - assert_eq!(collection_col.value(2), "collection_c"); - - let date_col = record_batch - .column(1) - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(date_col.value(0), 1672531200000); - assert_eq!(date_col.value(1), 1672617600000); - assert_eq!(date_col.value(2), 1672704000000); } #[tokio::test] async fn test_load_zarrs_into_arrow_record_batch_icechunk() { - let storage = icechunk::new_local_filesystem_storage(Path::new("data/icechunk")) + let wrapper = get_local_icechunk_store().await; + let path = wrapper.get_store_path(); + let storage = icechunk::new_local_filesystem_storage(Path::new(&path)) .await .unwrap(); let config = RepositoryConfig::default(); diff --git a/src/testing/load_zarrs.rs b/src/testing/load_zarrs.rs index a0b2153..5309f14 100644 --- a/src/testing/load_zarrs.rs +++ b/src/testing/load_zarrs.rs @@ -1,3 +1,5 @@ +use crate::testing::utils::get_local_icechunk_store; +use crate::testing::utils::get_local_zarr_store; use icechunk::{Repository, RepositoryConfig, repository::VersionInfo}; use std::collections::HashMap; use std::path::Path; @@ -7,43 +9,49 @@ use zarrs::array_subset::ArraySubset; use zarrs_filesystem::FilesystemStore; use zarrs_icechunk::AsyncIcechunkStore; -#[test] -fn test_load_collection_array() { - // Create a filesystem store pointing to the zarr store - let store = Arc::new(FilesystemStore::new("data/zarr_store.zarr").unwrap()); - - // Open the collection array from the /meta/collection path - let collection_array = Array::open(store, "/meta/collection").unwrap(); - - // Print array metadata - println!("Array shape: {:?}", collection_array.shape()); - println!("Data type: {:?}", collection_array.data_type()); - - // Create array subset for the entire array (shape is [3]) - let array_subset = ArraySubset::new_with_shape(collection_array.shape().to_vec()); - - // Read the entire array as strings - let data: Vec = collection_array - .retrieve_array_subset_elements(&array_subset) - .unwrap(); - - println!("Collection array contents:"); - for (i, item) in data.iter().enumerate() { - println!(" [{}]: {}", i, item); +#[tokio::test] +async fn test_load_collection_array() { + let wrapper = get_local_zarr_store().await; + let path = wrapper.get_store_path(); + + { + let store = Arc::new(FilesystemStore::new(path).unwrap()); + + // Open the collection array from the /meta/collection path + let collection_array = Array::open(store, "/meta/collection").unwrap(); + + // Print array metadata + println!("Array shape: {:?}", collection_array.shape()); + println!("Data type: {:?}", collection_array.data_type()); + + // Create array subset for the entire array (shape is [3]) + let array_subset = ArraySubset::new_with_shape(collection_array.shape().to_vec()); + + // Read the entire array as strings + let data: Vec = collection_array + .retrieve_array_subset_elements(&array_subset) + .unwrap(); + + println!("Collection array contents:"); + for (i, item) in data.iter().enumerate() { + println!(" [{}]: {}", i, item); + } + + // Basic assertions + assert!(!data.is_empty(), "Collection array should not be empty"); + assert_eq!( + collection_array.shape(), + &[3], + "Collection array should have 3 elements" + ); } - - // Basic assertions - assert!(!data.is_empty(), "Collection array should not be empty"); - assert_eq!( - collection_array.shape(), - &[3], - "Collection array should have 3 elements" - ); } #[tokio::test] async fn test_load_collection_array_icechunk() { - let storage = icechunk::new_local_filesystem_storage(Path::new("data/icechunk")) + let wrapper = get_local_icechunk_store().await; + let path = wrapper.get_store_path(); + let storage = icechunk::new_local_filesystem_storage(Path::new(&path)) .await .unwrap(); let config = RepositoryConfig::default(); @@ -84,10 +92,12 @@ async fn test_load_collection_array_icechunk() { ); } -#[test] -fn test_load_date_array() { - // Create a filesystem store pointing to the zarr store - let store = Arc::new(FilesystemStore::new("data/zarr_store.zarr").unwrap()); +#[tokio::test] +async fn test_load_date_array() { + let wrapper = get_local_zarr_store().await; + let path = wrapper.get_store_path(); + + let store = Arc::new(FilesystemStore::new(path).unwrap()); // Open the date array from the /meta/date path let date_array = Array::open(store, "/meta/date").unwrap(); @@ -120,7 +130,9 @@ fn test_load_date_array() { #[tokio::test] async fn test_load_date_array_icechunk() { - let storage = icechunk::new_local_filesystem_storage(Path::new("data/icechunk")) + let wrapper = get_local_icechunk_store().await; + let path = wrapper.get_store_path(); + let storage = icechunk::new_local_filesystem_storage(Path::new(&path)) .await .unwrap(); let config = RepositoryConfig::default(); @@ -161,48 +173,17 @@ async fn test_load_date_array_icechunk() { ); } -#[test] -fn test_load_bbox_array() { - // Create a filesystem store pointing to the zarr store - let store = Arc::new(FilesystemStore::new("data/zarr_store.zarr").unwrap()); - - let bbox_array = Array::open(store.clone(), "/meta/bbox").unwrap(); - - println!("HII"); - dbg!(bbox_array.data_type()); - - // let array_subset = ArraySubset::new_with_shape(bbox_array.shape().to_vec()); - // let data: Vec> = bbox_array - // .retrieve_array_subset_elements(&array_subset) - // .unwrap(); - // dbg!(data); - - // // Note: The bbox array uses "variable_length_bytes" data type which is not yet - // // fully supported in zarrs 0.21.2. This test demonstrates reading the raw chunk data. - - // println!("Reading bbox array chunk data from meta/bbox/c/0"); - - // // Read the raw chunk data directly from storage - // let chunk_key = StoreKey::new("meta/bbox/c/0").unwrap(); - // let chunk_data = store.get(&chunk_key).unwrap().unwrap(); - // let chunk_bytes: Vec = chunk_data.to_vec(); - - // println!("Bbox chunk data:"); - // println!(" Chunk key: {}", chunk_key); - // println!(" Raw bytes length: {} bytes", chunk_bytes.len()); - // println!( - // " First 64 bytes (or all if less): {:?}", - // &chunk_bytes[..chunk_bytes.len().min(64)] - // ); - - // // Basic assertions - // assert!( - // !chunk_bytes.is_empty(), - // "Bbox chunk data should not be empty" - // ); - - // // The chunk contains compressed data (zstd) for 3 variable-length byte arrays - // println!("\nNote: This is the compressed chunk data. To properly decode:"); - // println!(" 1. Decompress with zstd"); - // println!(" 2. Decode vlen-bytes format (offsets + data)"); +#[tokio::test] +async fn test_load_bbox_array() { + let wrapper = get_local_zarr_store().await; + let path = wrapper.get_store_path(); + + { + let store = Arc::new(FilesystemStore::new(path).unwrap()); + + let bbox_array = Array::open(store.clone(), "/meta/bbox").unwrap(); + + println!("HII"); + dbg!(bbox_array.data_type()); + } } diff --git a/src/testing/mod.rs b/src/testing/mod.rs index 097f020..8c6f96d 100644 --- a/src/testing/mod.rs +++ b/src/testing/mod.rs @@ -2,3 +2,4 @@ pub mod datafusion; pub mod iter_group; pub mod load_into_arrow; pub mod load_zarrs; +pub mod utils; diff --git a/src/testing/utils.rs b/src/testing/utils.rs new file mode 100644 index 0000000..b2122ea --- /dev/null +++ b/src/testing/utils.rs @@ -0,0 +1,198 @@ +use icechunk::{ObjectStorage, Repository}; +use object_store::local::LocalFileSystem; +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; +use tempfile::TempDir; +use zarrs::array::{ArrayBuilder, DataType, FillValue}; +use zarrs::array_subset::ArraySubset; +use zarrs::metadata_ext::data_type::NumpyTimeUnit; +use zarrs_icechunk::AsyncIcechunkStore; +use zarrs_object_store::AsyncObjectStore; +use zarrs_storage::AsyncReadableWritableListableStorageTraits; + +pub(crate) struct LocalZarrStoreWrapper { + _temp_dir: TempDir, + store: Arc>, + path: PathBuf, +} + +#[cfg(test)] +impl LocalZarrStoreWrapper { + pub(crate) fn new() -> Self { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().to_path_buf(); + let store = AsyncObjectStore::new(LocalFileSystem::new_with_prefix(&path).unwrap()); + Self { + _temp_dir: temp_dir, + store: Arc::new(store), + path, + } + } + + pub(crate) fn get_store(&self) -> Arc> { + self.store.clone() + } + + pub(crate) fn get_store_path(&self) -> String { + self.path.as_os_str().to_str().unwrap().into() + } +} + +pub(crate) struct LocalIcechunkStoreWrapper { + _temp_dir: TempDir, + store: Arc, + path: PathBuf, +} + +#[cfg(test)] +impl LocalIcechunkStoreWrapper { + pub(crate) async fn new() -> Self { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().to_path_buf(); + let repo = Repository::create( + None, + Arc::new(ObjectStorage::new_local_filesystem(&path).await.unwrap()), + HashMap::new(), + ) + .await + .unwrap(); + let session = repo.writable_session("main").await.unwrap(); + Self { + _temp_dir: temp_dir, + store: Arc::new(AsyncIcechunkStore::new(session)), + path, + } + } + + pub(crate) fn get_store(&self) -> Arc { + self.store.clone() + } + + pub(crate) fn get_store_path(&self) -> String { + self.path.to_str().unwrap().into() + } +} + +/// Creates three arrays in /meta group: +/// - date: datetime64[ms] array with dates [2023-01-01, 2023-01-02, 2023-01-03] +/// - collection: variable length UTF8 array with ["collection_a", "collection_b", "collection_c"] +/// - bbox: variable length bytes array with WKB-encoded boxes +#[cfg(test)] +pub(crate) async fn generate_test_data_arrays( + store: Arc, +) -> Result<(), Box> { + use chrono::NaiveDate; + + // Zarrs requires explicit group creation to + // avoid implicit group issues. + let group = zarrs::group::GroupBuilder::new().build(store.clone(), "/")?; + group.async_store_metadata().await?; + + let group = zarrs::group::GroupBuilder::new().build(store.clone(), "/meta")?; + group.async_store_metadata().await?; + + let dates = [ + NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(), + NaiveDate::from_ymd_opt(2023, 1, 2).unwrap(), + NaiveDate::from_ymd_opt(2023, 1, 3).unwrap(), + ]; + + // Convert to milliseconds since Unix epoch + let date_data: Vec = dates + .iter() + .map(|d| d.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp_millis()) + .collect(); + + let date_array = ArrayBuilder::new( + vec![3], + vec![3], + DataType::NumpyDateTime64 { + unit: NumpyTimeUnit::Millisecond, + scale_factor: 1.try_into().unwrap(), + }, + FillValue::from(0i64), + ) + .build(store.clone(), "/meta/date")?; + + date_array.async_store_metadata().await?; + date_array + .async_store_array_subset_elements(&ArraySubset::new_with_shape(vec![3]), &date_data) + .await?; + + let collection_data = vec!["collection_a", "collection_b", "collection_c"]; + + let collection_array = + ArrayBuilder::new(vec![3], vec![3], DataType::String, FillValue::from("")) + .build(store.clone(), "/meta/collection")?; + + collection_array.async_store_metadata().await?; + collection_array + .async_store_array_subset_elements(&ArraySubset::new_with_shape(vec![3]), &collection_data) + .await?; + + // Create bbox array - variable length bytes (WKB format) + // Boxes: [-10,-10,10,10], [-20,-20,20,20], [-30,-30,30,30] + use geo::Polygon; + use geo::Rect; + use wkb::writer::{WriteOptions, write_polygon}; + + let boxes = vec![ + Rect::new( + geo::coord! { x: -10.0, y: -10.0 }, + geo::coord! { x: 10.0, y: 10.0 }, + ), + Rect::new( + geo::coord! { x: -20.0, y: -20.0 }, + geo::coord! { x: 20.0, y: 20.0 }, + ), + Rect::new( + geo::coord! { x: -30.0, y: -30.0 }, + geo::coord! { x: 30.0, y: 30.0 }, + ), + ]; + + let write_options = WriteOptions::default(); + let mut bbox_data: Vec> = Vec::new(); + for rect in boxes { + let polygon: Polygon = rect.into(); + let mut buffer = Vec::new(); + write_polygon(&mut buffer, &polygon, &write_options)?; + bbox_data.push(buffer); + } + + let bbox_array = ArrayBuilder::new(vec![3], vec![3], DataType::Bytes, FillValue::from(vec![])) + .build(store.clone(), "/meta/bbox")?; + + bbox_array.async_store_metadata().await?; + bbox_array + .async_store_array_subset_elements(&ArraySubset::new_with_shape(vec![3]), &bbox_data) + .await?; + + Ok(()) +} + +pub(crate) async fn get_local_zarr_store() -> LocalZarrStoreWrapper { + let wrapper = LocalZarrStoreWrapper::new(); + let store = wrapper.get_store(); + generate_test_data_arrays(store) + .await + .expect("Failed to generate test data arrays"); + wrapper +} + +pub(crate) async fn get_local_icechunk_store() -> LocalIcechunkStoreWrapper { + let wrapper = LocalIcechunkStoreWrapper::new().await; + let store = wrapper.get_store(); + generate_test_data_arrays(store.clone()) + .await + .expect("Failed to generate test data arrays"); + let _ = store + .session() + .write() + .await + .commit("test data", None) + .await + .unwrap(); + wrapper +}