Skip to content

Commit 30ca784

Browse files
authored
Merge branch 'main' into 0.9.x
2 parents afbbb4d + 0196cb1 commit 30ca784

42 files changed

Lines changed: 3408 additions & 1996 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 131 additions & 122 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ rand = "0.9.3"
110110
regex = "1.11.3"
111111
reqwest = { version = "0.12.12", default-features = false, features = ["json"] }
112112
roaring = { version = "0.11" }
113+
rstest = "0.26"
113114
fastnum = { version = "0.7", default-features = false, features = ["std", "serde"] }
114115
serde = { version = "1.0.219", features = ["rc"] }
115116
serde_bytes = "0.11.17"

README.md

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,18 @@ Rust implementation of [Apache Iceberg™](https://iceberg.apache.org/).
2727

2828
The Apache Iceberg Rust project is composed of the following components:
2929

30-
| Name | Release | Docs |
31-
|--------------------------|-----------------------------------------------------------------|-------------------------------------------------------------------------------------------------------|
32-
| [iceberg] | [![iceberg image]][iceberg link] | [![docs release]][iceberg release docs] [![docs dev]][iceberg dev docs] |
33-
| [iceberg-datafusion] | [![iceberg-datafusion image]][iceberg-datafusion link] | [![docs release]][iceberg-datafusion release docs] [![docs dev]][iceberg-datafusion dev docs] |
34-
| [iceberg-catalog-glue] | [![iceberg-catalog-glue image]][iceberg-catalog-glue link] | [![docs release]][iceberg-catalog-glue release docs] [![docs dev]][iceberg-catalog-glue dev docs] |
35-
| [iceberg-catalog-hms] | [![iceberg-catalog-hms image]][iceberg-catalog-hms link] | [![docs release]][iceberg-catalog-hms release docs] [![docs dev]][iceberg-catalog-hms dev docs] |
36-
| [iceberg-catalog-rest] | [![iceberg-catalog-rest image]][iceberg-catalog-rest link] | [![docs release]][iceberg-catalog-rest release docs] [![docs dev]][iceberg-catalog-rest dev docs] |
30+
| Name | Release | Docs |
31+
|-------------------------------|--------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
32+
| [iceberg] | [![iceberg image]][iceberg link] | [![docs release]][iceberg release docs] [![docs dev]][iceberg dev docs] |
33+
| [iceberg-catalog-loader] | [![iceberg-catalog-loader image]][iceberg-catalog-loader link] | [![docs release]][iceberg-catalog-loader release docs] [![docs dev]][iceberg-catalog-loader dev docs] |
34+
| [iceberg-catalog-glue] | [![iceberg-catalog-glue image]][iceberg-catalog-glue link] | [![docs release]][iceberg-catalog-glue release docs] [![docs dev]][iceberg-catalog-glue dev docs] |
35+
| [iceberg-catalog-hms] | [![iceberg-catalog-hms image]][iceberg-catalog-hms link] | [![docs release]][iceberg-catalog-hms release docs] [![docs dev]][iceberg-catalog-hms dev docs] |
36+
| [iceberg-catalog-rest] | [![iceberg-catalog-rest image]][iceberg-catalog-rest link] | [![docs release]][iceberg-catalog-rest release docs] [![docs dev]][iceberg-catalog-rest dev docs] |
37+
| [iceberg-catalog-s3tables] | [![iceberg-catalog-s3tables image]][iceberg-catalog-s3tables link] | [![docs release]][iceberg-catalog-s3tables release docs] [![docs dev]][iceberg-catalog-s3tables dev docs] |
38+
| [iceberg-catalog-sql] | [![iceberg-catalog-sql image]][iceberg-catalog-sql link] | [![docs release]][iceberg-catalog-sql release docs] [![docs dev]][iceberg-catalog-sql dev docs] |
39+
| [iceberg-cache-moka] | [![iceberg-cache-moka image]][iceberg-cache-moka link] | [![docs release]][iceberg-cache-moka release docs] [![docs dev]][iceberg-cache-moka dev docs] |
40+
| [iceberg-datafusion] | [![iceberg-datafusion image]][iceberg-datafusion link] | [![docs release]][iceberg-datafusion release docs] [![docs dev]][iceberg-datafusion dev docs] |
41+
| [iceberg-storage-opendal] | [![iceberg-storage-opendal image]][iceberg-storage-opendal link] | [![docs release]][iceberg-storage-opendal release docs] [![docs dev]][iceberg-storage-opendal dev docs] |
3742

3843
[docs release]: https://img.shields.io/badge/docs-release-blue
3944
[docs dev]: https://img.shields.io/badge/docs-dev-blue
@@ -61,13 +66,42 @@ The Apache Iceberg Rust project is composed of the following components:
6166
[iceberg-catalog-hms release docs]: https://docs.rs/iceberg-catalog-hms
6267
[iceberg-catalog-hms dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_hms/
6368

64-
6569
[iceberg-catalog-rest]: crates/catalog/rest/README.md
6670
[iceberg-catalog-rest image]: https://img.shields.io/crates/v/iceberg-catalog-rest.svg
6771
[iceberg-catalog-rest link]: https://crates.io/crates/iceberg-catalog-rest
6872
[iceberg-catalog-rest release docs]: https://docs.rs/iceberg-catalog-rest
6973
[iceberg-catalog-rest dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_rest/
7074

75+
[iceberg-catalog-sql]: crates/catalog/sql
76+
[iceberg-catalog-sql image]: https://img.shields.io/crates/v/iceberg-catalog-sql.svg
77+
[iceberg-catalog-sql link]: https://crates.io/crates/iceberg-catalog-sql
78+
[iceberg-catalog-sql release docs]: https://docs.rs/iceberg-catalog-sql
79+
[iceberg-catalog-sql dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_sql/
80+
81+
[iceberg-catalog-s3tables]: crates/catalog/s3tables/README.md
82+
[iceberg-catalog-s3tables image]: https://img.shields.io/crates/v/iceberg-catalog-s3tables.svg
83+
[iceberg-catalog-s3tables link]: https://crates.io/crates/iceberg-catalog-s3tables
84+
[iceberg-catalog-s3tables release docs]: https://docs.rs/iceberg-catalog-s3tables
85+
[iceberg-catalog-s3tables dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_s3tables/
86+
87+
[iceberg-storage-opendal]: crates/storage/opendal/README.md
88+
[iceberg-storage-opendal image]: https://img.shields.io/crates/v/iceberg-storage-opendal.svg
89+
[iceberg-storage-opendal link]: https://crates.io/crates/iceberg-storage-opendal
90+
[iceberg-storage-opendal release docs]: https://docs.rs/iceberg-storage-opendal
91+
[iceberg-storage-opendal dev docs]: https://rust.iceberg.apache.org/api/iceberg_storage_opendal/
92+
93+
[iceberg-catalog-loader]: crates/catalog/loader
94+
[iceberg-catalog-loader image]: https://img.shields.io/crates/v/iceberg-catalog-loader.svg
95+
[iceberg-catalog-loader link]: https://crates.io/crates/iceberg-catalog-loader
96+
[iceberg-catalog-loader release docs]: https://docs.rs/iceberg-catalog-loader
97+
[iceberg-catalog-loader dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_loader/
98+
99+
[iceberg-cache-moka]: crates/integrations/cache-moka
100+
[iceberg-cache-moka image]: https://img.shields.io/crates/v/iceberg-cache-moka.svg
101+
[iceberg-cache-moka link]: https://crates.io/crates/iceberg-cache-moka
102+
[iceberg-cache-moka release docs]: https://docs.rs/iceberg-cache-moka
103+
[iceberg-cache-moka dev docs]: https://rust.iceberg.apache.org/api/iceberg_cache_moka/
104+
71105
## Iceberg Rust Implementation Status
72106

73107
The features that Iceberg Rust currently supports can be found [here](https://iceberg.apache.org/status/).

bindings/python/Cargo.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bindings/python/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ crate-type = ["cdylib"]
3333
[dependencies]
3434
arrow = { version = "57.1", features = ["pyarrow", "chrono-tz"] }
3535
iceberg = { path = "../../crates/iceberg" }
36-
iceberg-storage-opendal = { path = "../../crates/storage/opendal", features = ["opendal-s3", "opendal-fs", "opendal-memory"] }
36+
iceberg-storage-opendal = { path = "../../crates/storage/opendal", features = ["opendal-all"] }
3737
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py310"] }
3838
iceberg-datafusion = { path = "../../crates/integrations/datafusion" }
3939
datafusion-ffi = { version = "52.1" }

bindings/python/src/datafusion_table_provider.rs

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,40 +22,16 @@ use std::sync::Arc;
2222
use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec;
2323
use datafusion_ffi::table_provider::FFI_TableProvider;
2424
use iceberg::TableIdent;
25-
use iceberg::io::{FileIOBuilder, StorageFactory};
25+
use iceberg::io::FileIOBuilder;
2626
use iceberg::table::StaticTable;
2727
use iceberg_datafusion::table::IcebergStaticTableProvider;
28-
use iceberg_storage_opendal::OpenDalStorageFactory;
28+
use iceberg_storage_opendal::OpenDalResolvingStorageFactory;
2929
use pyo3::exceptions::{PyRuntimeError, PyValueError};
3030
use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods, *};
3131
use pyo3::types::{PyAny, PyCapsule};
3232

3333
use crate::runtime::runtime;
3434

35-
/// Parse the scheme from a URL and return the appropriate StorageFactory.
36-
fn storage_factory_from_path(path: &str) -> PyResult<Arc<dyn StorageFactory>> {
37-
let scheme = path
38-
.split("://")
39-
.next()
40-
.ok_or_else(|| PyRuntimeError::new_err(format!("Invalid path, missing scheme: {path}")))?;
41-
42-
let factory: Arc<dyn StorageFactory> = match scheme {
43-
"file" | "" => Arc::new(OpenDalStorageFactory::Fs),
44-
"s3" | "s3a" => Arc::new(OpenDalStorageFactory::S3 {
45-
configured_scheme: scheme.to_string(),
46-
customized_credential_load: None,
47-
}),
48-
"memory" => Arc::new(OpenDalStorageFactory::Memory),
49-
_ => {
50-
return Err(PyRuntimeError::new_err(format!(
51-
"Unsupported storage scheme: {scheme}"
52-
)));
53-
}
54-
};
55-
56-
Ok(factory)
57-
}
58-
5935
pub(crate) fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: &str) -> PyResult<()> {
6036
let capsule_name = capsule.name()?;
6137
if capsule_name.is_none() {
@@ -110,7 +86,7 @@ impl PyIcebergDataFusionTable {
11086
let table_ident = TableIdent::from_strs(identifier)
11187
.map_err(|e| PyRuntimeError::new_err(format!("Invalid table identifier: {e}")))?;
11288

113-
let factory = storage_factory_from_path(&metadata_location)?;
89+
let factory = Arc::new(OpenDalResolvingStorageFactory::new());
11490

11591
let mut builder = FileIOBuilder::new(factory);
11692

crates/catalog/glue/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ iceberg = { workspace = true }
3737
iceberg-storage-opendal = { workspace = true, features = ["opendal-s3"] }
3838
serde_json = { workspace = true }
3939
tokio = { workspace = true }
40-
tracing = { workspace = true }
4140

4241
[dev-dependencies]
4342
iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }

crates/catalog/glue/src/catalog.rs

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
use std::collections::HashMap;
1919
use std::fmt::Debug;
20+
use std::str::FromStr;
2021
use std::sync::Arc;
2122

2223
use anyhow::anyhow;
@@ -338,6 +339,13 @@ impl Catalog for GlueCatalog {
338339
namespace: &NamespaceIdent,
339340
properties: HashMap<String, String>,
340341
) -> Result<Namespace> {
342+
if self.namespace_exists(namespace).await? {
343+
return Err(Error::new(
344+
ErrorKind::NamespaceAlreadyExists,
345+
format!("Namespace {namespace:?} already exists"),
346+
));
347+
}
348+
341349
let db_input = convert_to_database(namespace, &properties)?;
342350

343351
let builder = self.client.0.create_database().database_input(db_input);
@@ -364,15 +372,27 @@ impl Catalog for GlueCatalog {
364372
let builder = self.client.0.get_database().name(&db_name);
365373
let builder = with_catalog_id!(builder, self.config);
366374

367-
let resp = builder.send().await.map_err(from_aws_sdk_error)?;
375+
let resp = builder.send().await.map_err(|err| {
376+
if err
377+
.as_service_error()
378+
.map(|e| e.is_entity_not_found_exception())
379+
== Some(true)
380+
{
381+
return Error::new(
382+
ErrorKind::NamespaceNotFound,
383+
format!("Namespace {namespace:?} does not exist"),
384+
);
385+
}
386+
from_aws_sdk_error(err)
387+
})?;
368388

369389
match resp.database() {
370390
Some(db) => {
371391
let namespace = convert_to_namespace(db);
372392
Ok(namespace)
373393
}
374394
None => Err(Error::new(
375-
ErrorKind::DataInvalid,
395+
ErrorKind::NamespaceNotFound,
376396
format!("Database with name: {db_name} does not exist"),
377397
)),
378398
}
@@ -428,6 +448,13 @@ impl Catalog for GlueCatalog {
428448
namespace: &NamespaceIdent,
429449
properties: HashMap<String, String>,
430450
) -> Result<()> {
451+
if !self.namespace_exists(namespace).await? {
452+
return Err(Error::new(
453+
ErrorKind::NamespaceNotFound,
454+
format!("Namespace {namespace:?} does not exist"),
455+
));
456+
}
457+
431458
let db_name = validate_namespace(namespace)?;
432459
let db_input = convert_to_database(namespace, &properties)?;
433460

@@ -455,6 +482,13 @@ impl Catalog for GlueCatalog {
455482
/// - `Err(...)` signifies failure to drop the namespace due to validation
456483
/// errors, connectivity issues, or Glue Catalog constraints.
457484
async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()> {
485+
if !self.namespace_exists(namespace).await? {
486+
return Err(Error::new(
487+
ErrorKind::NamespaceNotFound,
488+
format!("Namespace {namespace:?} does not exist"),
489+
));
490+
}
491+
458492
let db_name = validate_namespace(namespace)?;
459493
let table_list = self.list_tables(namespace).await?;
460494

@@ -550,14 +584,14 @@ impl Catalog for GlueCatalog {
550584
let metadata = TableMetadataBuilder::from_table_creation(creation)?
551585
.build()?
552586
.metadata;
553-
let metadata_location =
554-
MetadataLocation::new_with_table_location(location.clone()).to_string();
587+
let metadata_location = MetadataLocation::new_with_metadata(location.clone(), &metadata);
555588

556589
metadata.write_to(&self.file_io, &metadata_location).await?;
557590

591+
let metadata_location_str = metadata_location.to_string();
558592
let glue_table = convert_to_glue_table(
559593
&table_name,
560-
metadata_location.clone(),
594+
metadata_location_str.clone(),
561595
&metadata,
562596
metadata.properties(),
563597
None,
@@ -575,7 +609,7 @@ impl Catalog for GlueCatalog {
575609

576610
Table::builder()
577611
.file_io(self.file_io())
578-
.metadata_location(metadata_location)
612+
.metadata_location(metadata_location_str)
579613
.metadata(metadata)
580614
.identifier(TableIdent::new(NamespaceIdent::new(db_name), table_name))
581615
.build()
@@ -813,12 +847,13 @@ impl Catalog for GlueCatalog {
813847
let current_metadata_location = current_table.metadata_location_result()?.to_string();
814848

815849
let staged_table = commit.apply(current_table)?;
816-
let staged_metadata_location = staged_table.metadata_location_result()?;
850+
let staged_metadata_location_str = staged_table.metadata_location_result()?;
851+
let staged_metadata_location = MetadataLocation::from_str(staged_metadata_location_str)?;
817852

818853
// Write new metadata
819854
staged_table
820855
.metadata()
821-
.write_to(staged_table.file_io(), staged_metadata_location)
856+
.write_to(staged_table.file_io(), &staged_metadata_location)
822857
.await?;
823858

824859
// Persist staged table to Glue with optimistic locking

crates/catalog/glue/src/utils.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,6 @@ mod tests {
306306
fn test_convert_to_glue_table() -> Result<()> {
307307
let table_name = "my_table".to_string();
308308
let location = "s3a://warehouse/hive".to_string();
309-
let metadata_location = MetadataLocation::new_with_table_location(location).to_string();
310-
let properties = HashMap::new();
311309
let schema = Schema::builder()
312310
.with_schema_id(1)
313311
.with_fields(vec![
@@ -316,6 +314,8 @@ mod tests {
316314
.build()?;
317315

318316
let metadata = create_metadata(schema)?;
317+
let metadata_location =
318+
MetadataLocation::new_with_metadata(location, &metadata).to_string();
319319

320320
let parameters = HashMap::from([
321321
(ICEBERG_FIELD_ID.to_string(), "1".to_string()),
@@ -336,8 +336,13 @@ mod tests {
336336
.location(metadata.location())
337337
.build();
338338

339-
let result =
340-
convert_to_glue_table(&table_name, metadata_location, &metadata, &properties, None)?;
339+
let result = convert_to_glue_table(
340+
&table_name,
341+
metadata_location,
342+
&metadata,
343+
metadata.properties(),
344+
None,
345+
)?;
341346

342347
assert_eq!(result.name(), &table_name);
343348
assert_eq!(result.description(), None);

0 commit comments

Comments
 (0)