From a17791bbf3c01d754d2fe695a5c874863b535272 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 22 Oct 2025 17:38:22 -0400 Subject: [PATCH 1/4] feat: Use schema inference code path in Zarr table provider --- src/error.rs | 19 +++--- src/table_provider.rs | 135 +++++++++++++++++++++++++----------------- 2 files changed, 93 insertions(+), 61 deletions(-) diff --git a/src/error.rs b/src/error.rs index 4782282..9327de5 100644 --- a/src/error.rs +++ b/src/error.rs @@ -3,17 +3,22 @@ use thiserror::Error; #[derive(Error, Debug)] pub enum ZarrDataFusionError { - #[error("DataFusion error: {0}")] - DataFusion(#[from] DataFusionError), - - #[error("Zarrs error: {0}")] - Zarrs(#[from] zarrs::array::ArrayError), + // Zarrs errors + #[error("Zarrs array creation error: {0}")] + ArrayCreateError(#[from] zarrs::array::ArrayCreateError), #[error("Zarrs filesystem create error: {0}")] FilesystemStoreCreateError(#[from] zarrs_filesystem::FilesystemStoreCreateError), - #[error("Zarrs array creation error: {0}")] - ArrayCreateError(#[from] zarrs::array::ArrayCreateError), + #[error("Zarrs group create error: {0}")] + GroupCreateError(#[from] zarrs::group::GroupCreateError), + + #[error("Zarrs error: {0}")] + Zarrs(#[from] zarrs::array::ArrayError), + + // Other errors + #[error("DataFusion error: {0}")] + DataFusion(#[from] DataFusionError), #[error("Arrow error: {0}")] Arrow(#[from] arrow::error::ArrowError), diff --git a/src/table_provider.rs b/src/table_provider.rs index e74b73b..634ca66 100644 --- a/src/table_provider.rs +++ b/src/table_provider.rs @@ -1,5 +1,5 @@ -use arrow_array::{ArrayRef, RecordBatch, StringArray, TimestampMillisecondArray}; -use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit}; +use arrow_array::{ArrayRef, RecordBatch, StringViewArray, TimestampMillisecondArray}; +use arrow_schema::SchemaRef; use async_trait::async_trait; use datafusion::catalog::Session; use datafusion::datasource::{TableProvider, TableType}; @@ -14,19 +14,21 @@ use datafusion::physical_plan::{ SendableRecordBatchStream, }; use geoarrow_array::GeoArrowArray; -use geoarrow_array::array::WktArray; -use geoarrow_schema::{Crs, WktType}; +use geoarrow_array::array::WktViewArray; +use geoarrow_schema::Crs; use object_store::ObjectStore; use std::any::Any; use std::fmt::{self, Debug}; use std::sync::Arc; use zarrs::array::{Array, ElementOwned}; use zarrs::array_subset::ArraySubset; +use zarrs::group::Group; use zarrs::storage::{AsyncReadableListableStorageTraits, ReadableListableStorageTraits}; use zarrs_filesystem::{FilesystemStore, FilesystemStoreCreateError}; use zarrs_storage::{MaybeSend, MaybeSync}; use crate::error::ZarrDataFusionResult; +use crate::schema::{group_arrays_schema, group_arrays_schema_async}; /// A simple DataFusion table provider that loads data from a Zarr store #[derive(Debug)] @@ -38,40 +40,27 @@ pub struct ZarrTableProvider { impl ZarrTableProvider { /// Create a new ZarrTableProvider from a Zarr store path pub fn new_filesystem>( - zarr_path: P, - ) -> Result> { - let zarr_backend = ZarrBackend::new_filesystem(zarr_path)?; - let schema = Self::construct_schema(); + base_path: P, + group_path: &str, + ) -> ZarrDataFusionResult { + let zarr_backend = SyncZarrBackend::new_filesystem(base_path)?; + let schema = zarr_backend.infer_group_schema(group_path)?; Ok(Self { schema, - zarr_backend, + zarr_backend: zarr_backend.into(), }) } - pub fn new_object_store(store: T) -> Self { - let zarr_backend = ZarrBackend::new_object_store(store); - let schema = Self::construct_schema(); - Self { + pub async fn new_object_store( + store: T, + group_path: &str, + ) -> ZarrDataFusionResult { + let zarr_backend = AsyncZarrBackend::new_object_store(store); + let schema = zarr_backend.infer_group_schema(group_path).await?; + Ok(Self { schema, - zarr_backend, - } - } - - fn construct_schema() -> SchemaRef { - // Define the schema based on the expected Zarr arrays - let wkt_crs = Crs::from_authority_code("EPSG:4326".to_string()); - let wkt_metadata = Arc::new(geoarrow_schema::Metadata::new(wkt_crs, None)); - - Arc::new(Schema::new(vec![ - Field::new("collection", DataType::Utf8, false), - Field::new( - "date", - DataType::Timestamp(TimeUnit::Millisecond, None), - false, - ), - Field::new("wkt_field", DataType::Utf8, false) - .with_extension_type(WktType::new(wkt_metadata)), - ])) + zarr_backend: zarr_backend.into(), + }) } } @@ -108,17 +97,32 @@ impl TableProvider for ZarrTableProvider { struct SyncZarrBackend(Arc); impl SyncZarrBackend { + fn new_filesystem>( + base_path: P, + ) -> Result { + Ok(SyncZarrBackend(Arc::new(FilesystemStore::new(base_path)?))) + } + fn load_array(&self, path: &str) -> ZarrDataFusionResult> { let array = Array::open(self.0.clone(), path)?; let full_subset = ArraySubset::new_with_shape(array.shape().to_vec()); Ok(array.retrieve_array_subset_elements(&full_subset)?) } + + fn infer_group_schema(&self, group_path: &str) -> ZarrDataFusionResult { + let group = Group::open(self.0.clone(), group_path)?; + group_arrays_schema(&group) + } } #[derive(Clone)] struct AsyncZarrBackend(Arc); impl AsyncZarrBackend { + fn new_object_store(store: T) -> Self { + AsyncZarrBackend(Arc::new(zarrs_object_store::AsyncObjectStore::new(store))) + } + async fn load_array( &self, path: &str, @@ -129,37 +133,52 @@ impl AsyncZarrBackend { .async_retrieve_array_subset_elements(&full_subset) .await?) } + + async fn infer_group_schema(&self, group_path: &str) -> ZarrDataFusionResult { + let group = Group::async_open(self.0.clone(), group_path).await?; + group_arrays_schema_async(&group).await + } } #[derive(Clone)] enum ZarrBackend { - Sync(SyncZarrBackend), Async(AsyncZarrBackend), + Sync(SyncZarrBackend), } impl Debug for ZarrBackend { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - ZarrBackend::Sync(_) => write!(f, "ZarrBackend::Sync"), ZarrBackend::Async(_) => write!(f, "ZarrBackend::Async"), + ZarrBackend::Sync(_) => write!(f, "ZarrBackend::Sync"), } } } -impl ZarrBackend { - fn new_filesystem>( - base_path: P, - ) -> Result { - Ok(Self::Sync(SyncZarrBackend(Arc::new(FilesystemStore::new( - base_path, - )?)))) +impl From for ZarrBackend { + fn from(async_backend: AsyncZarrBackend) -> Self { + ZarrBackend::Async(async_backend) } +} - fn new_object_store(store: T) -> Self { - Self::Async(AsyncZarrBackend(Arc::new( - zarrs_object_store::AsyncObjectStore::new(store), - ))) +impl From for ZarrBackend { + fn from(sync_backend: SyncZarrBackend) -> Self { + ZarrBackend::Sync(sync_backend) } +} + +impl ZarrBackend { + // fn new_filesystem>( + // base_path: P, + // ) -> Result { + // Ok(Self::Sync(SyncZarrBackend::new_filesystem(base_path)?)) + // } + + // fn new_object_store(store: T) -> Self { + // Self::Async(AsyncZarrBackend(Arc::new( + // zarrs_object_store::AsyncObjectStore::new(store), + // ))) + // } async fn load_array( &self, @@ -177,17 +196,25 @@ impl ZarrBackend { let bbox_data: Vec = self.load_array("/meta/bbox").await?; // Create Arrow arrays from the loaded data - let collection_arrow: ArrayRef = Arc::new(StringArray::from(collection_data)); + let collection_arrow: ArrayRef = Arc::new(StringViewArray::from(collection_data)); let date_arrow: ArrayRef = Arc::new(TimestampMillisecondArray::from(date_data)); let wkt_crs = Crs::from_authority_code("EPSG:4326".to_string()); let wkt_metadata = Arc::new(geoarrow_schema::Metadata::new(wkt_crs, None)); - let wkt_arrow = WktArray::new(bbox_data.into(), wkt_metadata); + let wkt_arrow = WktViewArray::new(bbox_data.into(), wkt_metadata); + + let columns = schema + .fields() + .iter() + .map(|field| match field.name().as_str() { + "collection" => collection_arrow.clone(), + "date" => date_arrow.clone(), + "bbox" => wkt_arrow.clone().into_array_ref(), + _ => panic!("Unexpected field name: {}", field.name()), + }) + .collect(); // Create the RecordBatch - let record_batch = RecordBatch::try_new( - schema, - vec![collection_arrow, date_arrow, wkt_arrow.into_array_ref()], - )?; + let record_batch = RecordBatch::try_new(schema, columns)?; Ok(record_batch) } @@ -279,7 +306,7 @@ mod tests { #[tokio::test] async fn test_basic_table_provider() { - let provider = ZarrTableProvider::new_filesystem("data/zarr_store.zarr").unwrap(); + let provider = ZarrTableProvider::new_filesystem("data/zarr_store.zarr", "/meta").unwrap(); // Register with DataFusion let ctx = SessionContext::new(); @@ -300,7 +327,7 @@ mod tests { #[tokio::test] #[ignore = "Projection support"] async fn test_table_provider_with_sql() { - let provider = ZarrTableProvider::new_filesystem("data/zarr_store.zarr").unwrap(); + let provider = ZarrTableProvider::new_filesystem("data/zarr_store.zarr", "/meta").unwrap(); // Register with DataFusion let ctx = SessionContext::new(); @@ -325,7 +352,7 @@ mod tests { let collection_col = batch .column(0) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); assert_eq!(collection_col.value(0), "collection_a"); } From e8b8fdc7a0851d013707d552895285bcb4c87450 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 22 Oct 2025 17:43:49 -0400 Subject: [PATCH 2/4] fix table construction --- python/src/table.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/python/src/table.rs b/python/src/table.rs index 016ea66..7a3bef9 100644 --- a/python/src/table.rs +++ b/python/src/table.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use datafusion_ffi::table_provider::FFI_TableProvider; use pyo3::prelude::*; +use pyo3::pybacked::PyBackedStr; use pyo3::types::PyCapsule; use zarr_datafusion_search::table_provider::ZarrTableProvider; @@ -11,13 +12,14 @@ pub struct PyZarrTable(Arc); #[pymethods] impl PyZarrTable { #[new] - pub fn new(zarr_path: String) -> PyResult { - let table_provider = ZarrTableProvider::new_filesystem(zarr_path).map_err(|e| { - PyErr::new::(format!( - "Failed to create ZarrTableProvider: {}", - e - )) - })?; + pub fn new(zarr_path: String, group_path: PyBackedStr) -> PyResult { + let table_provider = + ZarrTableProvider::new_filesystem(zarr_path, &group_path).map_err(|e| { + PyErr::new::(format!( + "Failed to create ZarrTableProvider: {}", + e + )) + })?; Ok(PyZarrTable(Arc::new(table_provider))) } From 9f31dc4c5c36e971083a2c5018af418f2955f25f Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 22 Oct 2025 18:02:05 -0400 Subject: [PATCH 3/4] fix python test --- python/python/zarr_datafusion_search/_rust.pyi | 2 +- python/tests/test_datafusion.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/python/zarr_datafusion_search/_rust.pyi b/python/python/zarr_datafusion_search/_rust.pyi index f76e565..fa146ff 100644 --- a/python/python/zarr_datafusion_search/_rust.pyi +++ b/python/python/zarr_datafusion_search/_rust.pyi @@ -1,3 +1,3 @@ class ZarrTable: - def __init__(self, path: str) -> None: ... + def __init__(self, zarr_path: str, group_path: str) -> None: ... def __datafusion_table_provider__(self) -> object: ... diff --git a/python/tests/test_datafusion.py b/python/tests/test_datafusion.py index b32a0e3..1b550fb 100644 --- a/python/tests/test_datafusion.py +++ b/python/tests/test_datafusion.py @@ -10,7 +10,7 @@ def test_zarr_scan(): ctx = SessionContext() zarr_path = ROOT_DIR / "data" / "zarr_store.zarr" - zarr_table = zarr_datafusion_search.ZarrTable(str(zarr_path)) + zarr_table = zarr_datafusion_search.ZarrTable(str(zarr_path), "/meta") ctx.register_table_provider("zarr_data", zarr_table) From 5a5f24bd4ca6ee7b6d63ccd35bf93e83d927fade Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 22 Oct 2025 18:10:22 -0400 Subject: [PATCH 4/4] use patched zarrs-metadata --- python/Cargo.lock | 315 ++++++++++++++++++---------------------------- python/Cargo.toml | 4 + python/src/lib.rs | 3 + 3 files changed, 130 insertions(+), 192 deletions(-) diff --git a/python/Cargo.lock b/python/Cargo.lock index c6e7257..78399d8 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -50,15 +50,6 @@ dependencies = [ "core_extensions", ] -[[package]] -name = "addr2line" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" -dependencies = [ - "gimli", -] - [[package]] name = "adler2" version = "2.0.1" @@ -73,7 +64,7 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", "const-random", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "version_check", "zerocopy", @@ -408,7 +399,7 @@ checksum = "ddf3728566eefa873833159754f5732fb0951d3649e6e5b891cc70d56dd41673" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -430,7 +421,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -450,7 +441,7 @@ checksum = "ffdcb70bdbc4d478427380519163274ac86e52916e10f0a8889adf0f96d3fee7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -459,21 +450,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" -[[package]] -name = "backtrace" -version = "0.3.76" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-link", -] - [[package]] name = "base64" version = "0.22.1" @@ -482,9 +458,9 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bigdecimal" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934" dependencies = [ "autocfg", "libm", @@ -495,9 +471,9 @@ dependencies = [ [[package]] name = "bitflags" -version = "2.9.4" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "blake2" @@ -587,7 +563,7 @@ checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -613,9 +589,9 @@ dependencies = [ [[package]] name = "bzip2" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" dependencies = [ "libbz2-rs-sys", ] @@ -632,9 +608,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.40" +version = "1.2.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d05d92f4b1fd76aad469d46cdd858ca761576082cd37df81416691e50199fb" +checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7" dependencies = [ "find-msvc-tools", "jobserver", @@ -644,9 +620,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "chrono" @@ -826,21 +802,21 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" dependencies = [ "csv-core", "itoa", "ryu", - "serde", + "serde_core", ] [[package]] name = "csv-core" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" dependencies = [ "memchr", ] @@ -870,7 +846,7 @@ dependencies = [ "arrow-schema", "async-trait", "bytes", - "bzip2 0.6.0", + "bzip2 0.6.1", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -1009,7 +985,7 @@ dependencies = [ "async-compression", "async-trait", "bytes", - "bzip2 0.6.0", + "bzip2 0.6.1", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -1182,9 +1158,9 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "50.1.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cda6e7e5f98b9587f2e32db9eb550539441e18edcea90075d6504da811de5960" +checksum = "a4bef25e2b86d9921f7a98b1a86bfb50cebe2fd97f3a9b96c85ce475e9ef78b0" dependencies = [ "abi_stable", "arrow", @@ -1339,7 +1315,7 @@ checksum = "c4868fe261ba01e462033eff141e90453b7630722cad6420fddd81ebb786f6e2" dependencies = [ "datafusion-expr", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -1467,9 +1443,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "50.1.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ca714dff69fe3de2901ec64ec3dba8d0623ae583f6fae3c6fa57355d7882017" +checksum = "87c686bfd29ec5362fe229247ef03a0beb063b50e307bf72d0f1a80b9d90f8b8" dependencies = [ "arrow", "chrono", @@ -1483,9 +1459,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "50.1.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7b628ba0f7bd1fa9565f80b19a162bcb3cbc082bbc42b29c4619760621f4e32" +checksum = "1972d37680d48d4f6167b535e0a23ea9f814a21e1359d0bd5c30d1345b95aef9" dependencies = [ "arrow", "datafusion-common", @@ -1568,7 +1544,7 @@ checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", "unicode-xid", ] @@ -1591,7 +1567,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -1645,9 +1621,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0399f9d26e5191ce32c498bebd31e7a3ceabc2745f0ac54af3f335126c3f24b3" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" [[package]] name = "fixedbitset" @@ -1688,6 +1664,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -1753,7 +1735,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -1797,9 +1779,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.7" +version = "0.14.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" dependencies = [ "typenum", "version_check", @@ -1863,30 +1845,24 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "js-sys", "libc", "r-efi", - "wasi 0.14.7+wasi-0.2.4", + "wasip2", "wasm-bindgen", ] -[[package]] -name = "gimli" -version = "0.32.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" - [[package]] name = "glob" version = "0.3.3" @@ -1895,9 +1871,9 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "half" -version = "2.7.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e54c115d4f30f52c67202f079c5f9d8b49db4691f460fdb0b4c2e838261b2ba5" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "bytemuck", "cfg-if", @@ -1922,9 +1898,7 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "allocator-api2", - "equivalent", - "foldhash", + "foldhash 0.1.5", ] [[package]] @@ -1932,6 +1906,11 @@ name = "hashbrown" version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "heck" @@ -2095,9 +2074,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.11.4" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" dependencies = [ "equivalent", "hashbrown 0.16.0", @@ -2105,9 +2084,12 @@ dependencies = [ [[package]] name = "indoc" -version = "2.0.6" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] [[package]] name = "integer-encoding" @@ -2124,17 +2106,6 @@ dependencies = [ "rustversion", ] -[[package]] -name = "io-uring" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" -dependencies = [ - "bitflags", - "cfg-if", - "libc", -] - [[package]] name = "itertools" version = "0.14.0" @@ -2156,7 +2127,7 @@ version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "libc", ] @@ -2314,11 +2285,11 @@ checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "lru" -version = "0.16.1" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe949189f46fabb938b3a9a0be30fdd93fd8a09260da863399a8cf3db756ec8" +checksum = "96051b46fc183dc9cd4a223960ef37b9af631b55191852a8274bfef064cda20f" dependencies = [ - "hashbrown 0.15.5", + "hashbrown 0.16.0", ] [[package]] @@ -2396,17 +2367,6 @@ dependencies = [ "simd-adler32", ] -[[package]] -name = "mio" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" -dependencies = [ - "libc", - "wasi 0.11.1+wasi-snapshot-preview1", - "windows-sys 0.59.0", -] - [[package]] name = "moka" version = "0.12.11" @@ -2447,7 +2407,7 @@ checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -2542,9 +2502,9 @@ dependencies = [ [[package]] name = "num_enum" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" dependencies = [ "num_enum_derive", "rustversion", @@ -2552,23 +2512,14 @@ dependencies = [ [[package]] name = "num_enum_derive" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.106", -] - -[[package]] -name = "object" -version = "0.37.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" -dependencies = [ - "memchr", + "syn 2.0.107", ] [[package]] @@ -2796,9 +2747,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "8e0f6df8eaa422d97d72edcd152e1451618fed47fabbdbd5a8864167b1d4aff7" dependencies = [ "unicode-ident", ] @@ -2823,7 +2774,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -2880,7 +2831,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -2893,14 +2844,14 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] name = "quick_cache" -version = "0.6.17" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba15f5bccfb18c666351668b97bbff66da5093f96757ca15299e4e594fe1316e" +checksum = "7ada44a88ef953a3294f6eb55d2007ba44646015e18613d2f213016379203ef3" dependencies = [ "ahash", "equivalent", @@ -2949,7 +2900,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", ] [[package]] @@ -3004,7 +2955,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -3018,9 +2969,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.3" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", @@ -3030,9 +2981,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.11" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", @@ -3041,9 +2992,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.6" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "repr_offset" @@ -3068,12 +3019,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "rustc-demangle" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" - [[package]] name = "rustc_version" version = "0.4.1" @@ -3162,7 +3107,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -3187,7 +3132,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -3272,7 +3217,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -3310,7 +3255,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -3332,9 +3277,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.106" +version = "2.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b" dependencies = [ "proc-macro2", "quote", @@ -3349,7 +3294,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -3371,7 +3316,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "rustix", "windows-sys 0.61.2", @@ -3403,7 +3348,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -3414,7 +3359,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -3458,29 +3403,24 @@ dependencies = [ [[package]] name = "tokio" -version = "1.47.1" +version = "1.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" dependencies = [ - "backtrace", "bytes", - "io-uring", - "libc", - "mio", "pin-project-lite", - "slab", "tokio-macros", ] [[package]] name = "tokio-macros" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -3498,18 +3438,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f1085dec27c2b6632b04c80b3bb1b4300d6495d1e129693bdda7d91e72eec1" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" dependencies = [ "serde_core", ] [[package]] name = "toml_edit" -version = "0.23.6" +version = "0.23.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3effe7c0e86fdff4f69cdd2ccc1b96f933e24811c5441d44904e8683e27184b" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" dependencies = [ "indexmap", "toml_datetime", @@ -3519,9 +3459,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cf893c33be71572e0e9aa6dd15e6677937abd686b066eac3f8cd3531688a627" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" dependencies = [ "winnow", ] @@ -3545,7 +3485,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -3598,9 +3538,9 @@ checksum = "f8c1ae7cc0fdb8b842d65d127cb981574b0d2b249b74d1c7a2986863dc134f71" [[package]] name = "unicode-ident" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" +checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" [[package]] name = "unicode-segmentation" @@ -3662,7 +3602,7 @@ version = "1.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "js-sys", "wasm-bindgen", ] @@ -3695,15 +3635,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.7+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" -dependencies = [ - "wasip2", -] - [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" @@ -3736,7 +3667,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", "wasm-bindgen-shared", ] @@ -3771,7 +3702,7 @@ checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3857,7 +3788,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -3868,7 +3799,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -4061,7 +3992,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", "synstructure", ] @@ -4093,6 +4024,7 @@ dependencies = [ "datafusion-ffi", "pyo3", "zarr-datafusion-search", + "zarrs_metadata", ] [[package]] @@ -4113,7 +4045,7 @@ dependencies = [ "derive_more", "flate2", "futures", - "getrandom 0.3.3", + "getrandom 0.3.4", "half", "inventory", "itertools", @@ -4178,9 +4110,8 @@ dependencies = [ [[package]] name = "zarrs_metadata" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cad059cebcb7a23fb72ffb5b138e693932a302b956d64c94c5873b3a3e63316" +version = "0.6.2" +source = "git+https://github.com/zarrs/zarrs?rev=6ea0464cf50e481cbae0e89de267c7991ed65f3f#6ea0464cf50e481cbae0e89de267c7991ed65f3f" dependencies = [ "derive_more", "half", @@ -4273,7 +4204,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] @@ -4293,7 +4224,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", "synstructure", ] @@ -4327,7 +4258,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.107", ] [[package]] diff --git a/python/Cargo.toml b/python/Cargo.toml index 9f26e65..61dbe9a 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -21,3 +21,7 @@ crate-type = ["cdylib"] datafusion-ffi = "50" pyo3 = { version = "0.26", features = ["abi3-py39"] } zarr-datafusion-search = { path = "../" } +zarrs_metadata = "0.6.1" + +[patch.crates-io] +zarrs_metadata = { git = "https://github.com/zarrs/zarrs", rev = "6ea0464cf50e481cbae0e89de267c7991ed65f3f" } diff --git a/python/src/lib.rs b/python/src/lib.rs index 632f434..861ebb2 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -1,5 +1,8 @@ #![cfg_attr(not(test), warn(unused_crate_dependencies))] +// Use patched version of zarrs-metadata +use zarrs_metadata as _; + mod table; use pyo3::prelude::*;